Skip to contents

Set connections to hosts.

Example workflow makes use of public GitHub and GitLab, but it is plausible, that you will use your internal git platforms, where you need to define host parameter. See vignette("set_hosts") article on that.

library(GitStats)

git_stats <- create_gitstats() %>%
  set_github_host(
    orgs = c("r-world-devs", "openpharma"),
    token = Sys.getenv("GITHUB_PAT")
  ) %>%
  set_gitlab_host(
    orgs = c("mbtests"),
    token = Sys.getenv("GITLAB_PAT_PUBLIC")
  )
#>  Searching scope set to [org].
#>  Checking host data...
#>  Set connection to GitHub.
#>  Searching scope set to [org].
#>  Checking host data...
#>  Set connection to GitLab.

As scanning scope was set to organizations (orgs parameter in set_*_host()), GitStats will pull all repositories from these organizations.

repos <- get_repos(git_stats)
#>  [Host:GitHub][Engine:GraphQl][Scope:r-world-devs] Pulling repositories...
#>  [Host:GitHub][Engine:GraphQl][Scope:openpharma] Pulling repositories...
#>  [Host:GitHub][Engine:REST] Pulling contributors...
#>  [Host:GitLab][Engine:GraphQl][Scope:mbtests] Pulling repositories...
#>  [Host:GitLab][Engine:REST] Pulling contributors...
dplyr::glimpse(repos)
#> Rows: 78
#> Columns: 18
#> $ repo_id          <chr> "R_kgDOHNMr2w", "R_kgDOHYNOFQ", "R_kgDOHYNrJw", "R_kg…
#> $ repo_name        <chr> "shinyGizmo", "cohortBuilder", "shinyCohortBuilder", …
#> $ organization     <chr> "r-world-devs", "r-world-devs", "r-world-devs", "r-wo…
#> $ fullname         <chr> "r-world-devs/shinyGizmo", "r-world-devs/cohortBuilde…
#> $ platform         <chr> "github", "github", "github", "github", "github", "gi…
#> $ repo_url         <chr> "https://github.com/r-world-devs/shinyGizmo", "https:…
#> $ api_url          <chr> "https://api.github.com/repos/r-world-devs/shinyGizmo…
#> $ created_at       <dttm> 2022-04-20 10:04:32, 2022-05-22 18:31:55, 2022-05-22…
#> $ last_activity_at <dttm> 2024-07-12, 2024-06-13, 2024-07-08, 2024-06-13, 2024…
#> $ last_activity    <drtn> 7.31 days, 36.31 days, 11.31 days, 36.31 days, 0.31 …
#> $ default_branch   <chr> "dev", "dev", "dev", "master", "master", "master", "m…
#> $ stars            <int> 19, 4, 6, 0, 2, 3, 0, 2, 1, 0, 0, 0, 0, 8, 19, 179, 4…
#> $ forks            <int> 0, 2, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 5, 9, 31, 1, 1…
#> $ languages        <chr> "R, CSS, JavaScript", "R", "R, CSS, JavaScript, SCSS"…
#> $ issues_open      <int> 6, 23, 35, 3, 86, 0, 0, 3, 0, 0, 0, 0, 0, 6, 96, 18, …
#> $ issues_closed    <int> 12, 1, 5, 0, 220, 0, 0, 0, 0, 0, 0, 0, 0, 1, 374, 170…
#> $ contributors     <chr> "krystian8207, stla, galachad, stlagsk", "krystian820…
#> $ contributors_n   <int> 4, 2, 3, 1, 4, 1, 6, 2, 1, 142, 2, 3, 1, 5, 19, 20, 1…

You can always go for the lighter version of get_repos, i.e. get_repos_urls() which will print you a vector of URLs instead of whole table.

repos_urls <- get_repos_urls(git_stats)
#>  [Host:GitHub][Engine:REST][Scope:r-world-devs] Pulling repositories (URLS)...
#>  [Host:GitHub][Engine:REST][Scope:openpharma] Pulling repositories (URLS)...
#>  [Host:GitLab][Engine:REST][Scope:mbtests] Pulling repositories (URLS)...
dplyr::glimpse(repos_urls)
#>  'repos_urls' chr [1:52] "https://github.com/r-world-devs/shinyGizmo" ...
#>  - attr(*, "type")= chr "web"

Verbose mode

If messages overwhelm you, you can switch them off in the function:

release_logs <- get_release_logs(
  gitstats_object = git_stats, 
  since = "2024-01-01", 
  verbose = FALSE
)

Or globally:

verbose_off(git_stats)

Storage

After pulling, the data is saved to GitStats.

commits <- get_commits(
  gitstats_object = git_stats, 
  since = "2024-06-01", 
  until = "2024-06-30"
)
#>  [Host:GitHub][Engine:GraphQl][Scope:r-world-devs] Pulling commits...
#>  ■■■■■■                            16% |  ETA: 19s
#>  ■■■■■■■■■■■                       32% |  ETA: 14s
#>  ■■■■■■■■■■■■■■■■                  51% |  ETA:  9s
#>  ■■■■■■■■■■■■■■■■■■■■■             68% |  ETA:  6s
#>  ■■■■■■■■■■■■■■■■■■■■■■■■■■■       87% |  ETA:  2s
#>  [Host:GitHub][Engine:GraphQl][Scope:openpharma] Pulling commits...
#>  ■■■■■■■                           20% |  ETA: 13s
#>  ■■■■■■■■■■■                       32% |  ETA: 14s
#>  ■■■■■■■■■■■■■■                    42% |  ETA: 13s
#>  ■■■■■■■■■■■■■■■■■■                55% |  ETA: 10s
#>  ■■■■■■■■■■■■■■■■■■■■■             68% |  ETA:  7s
#>  ■■■■■■■■■■■■■■■■■■■■■■■■■         80% |  ETA:  5s
#>  ■■■■■■■■■■■■■■■■■■■■■■■■■■■■■     93% |  ETA:  2s
#>  [Host:GitLab][Engine:REST][Scope:mbtests] Pulling commits...
dplyr::glimpse(commits)
#> Rows: 91
#> Columns: 10
#> $ id             <chr> "C_kwDOIvtxstoAKDcyZDIyMjFjNDkyYzczN2I3MmZjNWNjMmU0ZmVm…
#> $ committed_date <dttm> 2024-06-28 09:05:20, 2024-06-27 11:23:20, 2024-06-27 1…
#> $ author         <chr> "Maciej Banaś", "banasm", "banasm", "banasm", "banasm",…
#> $ author_login   <chr> "maciekbanas", "maciekbanas", "maciekbanas", "maciekban…
#> $ author_name    <chr> "Maciej Banaś", "Maciej Banaś", "Maciej Banaś", "Maciej…
#> $ additions      <int> 459, 1, 100, 32, 27, 108, 52, 29, 1, 57, 172, 3, 9, 3, …
#> $ deletions      <int> 54, 1, 28, 3, 5, 16, 42, 22, 1, 26, 30, 0, 1, 0, 11, 1,…
#> $ repository     <chr> "GitStats", "GitStats", "GitStats", "GitStats", "GitSta…
#> $ organization   <chr> "r-world-devs", "r-world-devs", "r-world-devs", "r-worl…
#> $ api_url        <chr> "https://api.github.com", "https://api.github.com", "ht…

Caching feature is by default turned on. If you run the get_*() function once more, data will be retrieved from GitStats object.

commits <- get_commits(
  gitstats_object = git_stats, 
  since = "2024-06-01", 
  until = "2024-06-30"
)
#> ! Retrieving commits from the GitStats storage.
#>  If you wish to pull the data from API once more, set `cache` parameter to `FALSE`.
dplyr::glimpse(commits)
#> Rows: 91
#> Columns: 10
#> $ id             <chr> "C_kwDOIvtxstoAKDcyZDIyMjFjNDkyYzczN2I3MmZjNWNjMmU0ZmVm…
#> $ committed_date <dttm> 2024-06-28 09:05:20, 2024-06-27 11:23:20, 2024-06-27 1…
#> $ author         <chr> "Maciej Banaś", "banasm", "banasm", "banasm", "banasm",…
#> $ author_login   <chr> "maciekbanas", "maciekbanas", "maciekbanas", "maciekban…
#> $ author_name    <chr> "Maciej Banaś", "Maciej Banaś", "Maciej Banaś", "Maciej…
#> $ additions      <int> 459, 1, 100, 32, 27, 108, 52, 29, 1, 57, 172, 3, 9, 3, …
#> $ deletions      <int> 54, 1, 28, 3, 5, 16, 42, 22, 1, 26, 30, 0, 1, 0, 11, 1,…
#> $ repository     <chr> "GitStats", "GitStats", "GitStats", "GitStats", "GitSta…
#> $ organization   <chr> "r-world-devs", "r-world-devs", "r-world-devs", "r-worl…
#> $ api_url        <chr> "https://api.github.com", "https://api.github.com", "ht…

Unless, you switch off the cache:

commits <- get_commits(
  gitstats_object = git_stats, 
  since = "2024-06-01", 
  until = "2024-06-30",
  cache = FALSE
)

Or simply change the parameters for the function:

commits <- get_commits(
  gitstats_object = git_stats, 
  since = "2024-07-01"
)