Skip to contents

Set connections to hosts.

Example workflow makes use of public GitHub and GitLab, but it is plausible, that you will use your internal git platforms, where you need to define host parameter. See vignette("set_hosts") article on that.

library(GitStats)

git_stats <- create_gitstats() %>%
  set_github_host(
    orgs = c("r-world-devs", "openpharma"),
    token = Sys.getenv("GITHUB_PAT")
  ) %>%
  set_gitlab_host(
    orgs = c("mbtests"),
    token = Sys.getenv("GITLAB_PAT_PUBLIC")
  )
#>  Searching scope set to [org].
#>  Checking organizations...
#>  Set connection to GitHub.
#>  Searching scope set to [org].
#>  Checking organizations...
#>  Set connection to GitLab.

As scanning scope was set to organizations (orgs parameter in set_*_host()), GitStats will pull all repositories from these organizations.

repos <- get_repos(git_stats, progress = FALSE)
#>  [Host:GitHub][Engine:GraphQl][Scope:r-world-devs] Pulling repositories...
#>  [Host:GitHub][Engine:GraphQl][Scope:openpharma] Pulling repositories...
#>  [Host:GitHub][Engine:REST] Pulling contributors...
#>  [Host:GitLab][Engine:GraphQl][Scope:mbtests] Pulling repositories...
#>  [Host:GitLab][Engine:REST] Pulling contributors...
dplyr::glimpse(repos)
#> Rows: 88
#> Columns: 18
#> $ repo_id          <chr> "R_kgDOHNMr2w", "R_kgDOHYNOFQ", "R_kgDOHYNrJw", "R_kg…
#> $ repo_name        <chr> "shinyGizmo", "cohortBuilder", "shinyCohortBuilder", …
#> $ organization     <chr> "r-world-devs", "r-world-devs", "r-world-devs", "r-wo…
#> $ fullname         <chr> "r-world-devs/shinyGizmo", "r-world-devs/cohortBuilde…
#> $ platform         <chr> "github", "github", "github", "github", "github", "gi…
#> $ repo_url         <chr> "https://github.com/r-world-devs/shinyGizmo", "https:…
#> $ api_url          <chr> "https://api.github.com/repos/r-world-devs/shinyGizmo…
#> $ created_at       <dttm> 2022-04-20 10:04:32, 2022-05-22 18:31:55, 2022-05-22…
#> $ last_activity_at <dttm> 2024-07-12, 2024-11-04, 2024-11-04, 2024-06-13, 2024…
#> $ last_activity    <drtn> 124.45 days, 9.45 days, 9.45 days, 153.45 days, 0.45…
#> $ default_branch   <chr> "dev", "dev", "dev", "master", "master", "master", "m…
#> $ stars            <int> 19, 6, 6, 0, 3, 3, 0, 2, 1, 0, 0, 0, 0, 1, 2, 2, 10, …
#> $ forks            <int> 0, 2, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 10…
#> $ languages        <chr> "R, CSS, JavaScript", "R", "R, CSS, JavaScript, SCSS"…
#> $ issues_open      <int> 6, 21, 32, 3, 86, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 7, 6,…
#> $ issues_closed    <int> 12, 3, 13, 0, 268, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 1…
#> $ contributors     <chr> "krystian8207, stla, galachad, stlagsk", "krystian820…
#> $ contributors_n   <int> 4, 2, 3, 1, 4, 1, 6, 2, 1, 142, 2, 3, 1, 1, 1, 2, 5, …

You can always go for the lighter version of get_repos, i.e. get_repos_urls() which will print you a vector of URLs instead of whole table.

repos_urls <- get_repos_urls(git_stats)
#>  [Host:GitHub][Engine:REST][Scope:r-world-devs] Pulling repositories (URLS)...
#>  [Host:GitHub][Engine:REST][Scope:openpharma] Pulling repositories (URLS)...
#>  [Host:GitLab][Engine:REST][Scope:mbtests] Pulling repositories (URLS)...
dplyr::glimpse(repos_urls)
#>  'repos_urls' chr [1:56] "https://github.com/r-world-devs/shinyGizmo" ...
#>  - attr(*, "type")= chr "web"

Verbose mode

If messages overwhelm you, you can switch them off in the function:

release_logs <- get_release_logs(
  gitstats_object = git_stats,
  since = "2024-01-01",
  verbose = FALSE
)
dplyr::glimpse(release_logs)
#> Rows: 37
#> Columns: 7
#> $ repo_name    <chr> "cohortBuilder", "shinyCohortBuilder", "shinyCohortBuilde…
#> $ repo_url     <chr> "https://github.com/r-world-devs/cohortBuilder", "https:/…
#> $ release_name <chr> "cohortBuilder 0.3.0", "v0.3.1", "v0.3.0", "GitStats 2.1.…
#> $ release_tag  <chr> "v0.3.0", "v0.3.1", "v0.3.0", "v2.1.2", "v2.1.1", "v2.1.0…
#> $ published_at <dttm> 2024-09-27 11:35:06, 2024-10-24 08:21:19, 2024-10-24 08:…
#> $ release_url  <chr> "https://github.com/r-world-devs/cohortBuilder/releases/t…
#> $ release_log  <chr> "* Add new filter of type `\"query\"` that allows to conf…

Or globally:

verbose_off(git_stats)

Cache

After pulling, the data is saved to GitStats.

commits <- get_commits(
  gitstats_object = git_stats,
  since           = "2024-06-01",
  until           = "2024-06-30",
  progress        = FALSE
)
#>  [Host:GitHub][Engine:GraphQl][Scope:r-world-devs] Pulling commits...
#>  [Host:GitHub][Engine:GraphQl][Scope:openpharma] Pulling commits...
#>  [Host:GitLab][Engine:REST][Scope:mbtests] Pulling commits...
dplyr::glimpse(commits)
#> Rows: 188
#> Columns: 11
#> $ id             <chr> "C_kwDOHYNOFdoAKGU3Mjg5MTViZGM4YzYzMTIwOWEwMzEwMDIwOTA0…
#> $ committed_date <dttm> 2024-06-05 11:02:21, 2024-06-05 10:55:59, 2024-06-05 1…
#> $ author         <chr> "Kamil Koziej", "Kamil Koziej", "Kamil Koziej", "Krysti…
#> $ author_login   <chr> NA, NA, NA, "krystian8207", "krystian8207", "krystian82…
#> $ author_name    <chr> NA, NA, NA, "Krystian Igras", "Krystian Igras", "Krysti…
#> $ additions      <int> 559, 0, 219, 1, 2, 108, 38, 83, 14, 1596, 973, 1292, 3,…
#> $ deletions      <int> 304, 19, 87, 1, 1, 1, 0, 29, 0, 340, 163, 799, 6, 54, 1…
#> $ repository     <chr> "cohortBuilder", "cohortBuilder", "cohortBuilder", "coh…
#> $ organization   <chr> "r-world-devs", "r-world-devs", "r-world-devs", "r-worl…
#> $ repo_url       <chr> "https://github.com/r-world-devs/cohortBuilder", "https…
#> $ api_url        <glue> "https://api.github.com/graphql", "https://api.github.…

Caching feature is by default turned on. If you run the get_*() function once more, data will be retrieved from GitStats object.

commits <- get_commits(
  gitstats_object = git_stats,
  since           = "2024-06-01",
  until           = "2024-06-30"
)
#> ! Retrieving commits from the GitStats storage.
#>  If you wish to pull the data from API once more, set `cache` parameter to `FALSE`.
dplyr::glimpse(commits)
#> Rows: 188
#> Columns: 11
#> $ id             <chr> "C_kwDOHYNOFdoAKGU3Mjg5MTViZGM4YzYzMTIwOWEwMzEwMDIwOTA0…
#> $ committed_date <dttm> 2024-06-05 11:02:21, 2024-06-05 10:55:59, 2024-06-05 1…
#> $ author         <chr> "Kamil Koziej", "Kamil Koziej", "Kamil Koziej", "Krysti…
#> $ author_login   <chr> NA, NA, NA, "krystian8207", "krystian8207", "krystian82…
#> $ author_name    <chr> NA, NA, NA, "Krystian Igras", "Krystian Igras", "Krysti…
#> $ additions      <int> 559, 0, 219, 1, 2, 108, 38, 83, 14, 1596, 973, 1292, 3,…
#> $ deletions      <int> 304, 19, 87, 1, 1, 1, 0, 29, 0, 340, 163, 799, 6, 54, 1…
#> $ repository     <chr> "cohortBuilder", "cohortBuilder", "cohortBuilder", "coh…
#> $ organization   <chr> "r-world-devs", "r-world-devs", "r-world-devs", "r-worl…
#> $ repo_url       <chr> "https://github.com/r-world-devs/cohortBuilder", "https…
#> $ api_url        <glue> "https://api.github.com/graphql", "https://api.github.…

Unless, you switch off the cache:

commits <- get_commits(
  gitstats_object = git_stats,
  since    = "2024-06-01",
  until    = "2024-06-30",
  cache    = FALSE,
  progress = FALSE
)
#>  Cache set to FALSE, I will pull data from API.
#>  [Host:GitHub][Engine:GraphQl][Scope:r-world-devs] Pulling commits...
#>  [Host:GitHub][Engine:GraphQl][Scope:openpharma] Pulling commits...
#>  [Host:GitLab][Engine:REST][Scope:mbtests] Pulling commits...
dplyr::glimpse(commits)
#> Rows: 188
#> Columns: 11
#> $ id             <chr> "C_kwDOHYNOFdoAKGU3Mjg5MTViZGM4YzYzMTIwOWEwMzEwMDIwOTA0…
#> $ committed_date <dttm> 2024-06-05 11:02:21, 2024-06-05 10:55:59, 2024-06-05 1…
#> $ author         <chr> "Kamil Koziej", "Kamil Koziej", "Kamil Koziej", "Krysti…
#> $ author_login   <chr> NA, NA, NA, "krystian8207", "krystian8207", "krystian82…
#> $ author_name    <chr> NA, NA, NA, "Krystian Igras", "Krystian Igras", "Krysti…
#> $ additions      <int> 559, 0, 219, 1, 2, 108, 38, 83, 14, 1596, 973, 1292, 3,…
#> $ deletions      <int> 304, 19, 87, 1, 1, 1, 0, 29, 0, 340, 163, 799, 6, 54, 1…
#> $ repository     <chr> "cohortBuilder", "cohortBuilder", "cohortBuilder", "coh…
#> $ organization   <chr> "r-world-devs", "r-world-devs", "r-world-devs", "r-worl…
#> $ repo_url       <chr> "https://github.com/r-world-devs/cohortBuilder", "https…
#> $ api_url        <glue> "https://api.github.com/graphql", "https://api.github.…

Or simply change the parameters for the function:

commits <- get_commits(
  gitstats_object = git_stats,
  since           = "2024-07-01",
  progress        = FALSE
)
#>  Parameters changed, I will pull data from API.
#>  [Host:GitHub][Engine:GraphQl][Scope:r-world-devs] Pulling commits...
#>  [Host:GitHub][Engine:GraphQl][Scope:openpharma] Pulling commits...
#>  [Host:GitLab][Engine:REST][Scope:mbtests] Pulling commits...
#>  Looking up for authors' names and logins...
dplyr::glimpse(commits)
#> Rows: 945
#> Columns: 11
#> $ id             <chr> "C_kwDOHYNOFdoAKDgwMTE0ODgxMTlmY2Q5MjA5MzhkMzljODM5ZDg3…
#> $ committed_date <dttm> 2024-11-04 11:44:11, 2024-11-04 07:35:28, 2024-10-31 2…
#> $ author         <chr> "Adam Foryś", "Adam Foryś", "Adam Forys", "Adam Foryś",…
#> $ author_login   <chr> "galachad", "galachad", "galachad", "galachad", "galach…
#> $ author_name    <chr> "Adam Foryś", "Adam Foryś", "Adam Foryś", "Adam Foryś",…
#> $ additions      <int> 334, 1, 0, 1, 12, 40, 0, 129, 1, 5, 3, 1, 14, 16, 1, 11…
#> $ deletions      <int> 1, 1, 13, 1, 12, 36, 1, 0, 1, 1, 3, 0, 21, 10, 1, 1, 15…
#> $ repository     <chr> "cohortBuilder", "cohortBuilder", "cohortBuilder", "coh…
#> $ organization   <chr> "r-world-devs", "r-world-devs", "r-world-devs", "r-worl…
#> $ repo_url       <chr> "https://github.com/r-world-devs/cohortBuilder", "https…
#> $ api_url        <glue> "https://api.github.com/graphql", "https://api.github.…

Storage

Finally, have a glimpse at your storage:

git_stats
#> A GitStats object for 2 hosts: 
#> Hosts: https://api.github.com, https://gitlab.com/api/v4
#> Scanning scope: 
#>  Organizations: [3] r-world-devs, openpharma, mbtests
#>  Repositories: [0] 
#> Storage: 
#>  Repositories: 88 
#>  Commits: 945 [date range: 2024-07-01 - 2024-11-14]
#>  Release_logs: 37 [date range: 2024-01-01 - 2024-11-14]
#>  Repos_urls: 56 [type: web]

You can retrieve whole data from your GitStats object with:

get_storage(git_stats)
#> $repositories
#> # A tibble: 88 × 18
#>    repo_id      repo_name        organization fullname platform repo_url api_url
#>    <chr>        <chr>            <chr>        <chr>    <chr>    <chr>    <chr>  
#>  1 R_kgDOHNMr2w shinyGizmo       r-world-devs r-world… github   https:/… https:…
#>  2 R_kgDOHYNOFQ cohortBuilder    r-world-devs r-world… github   https:/… https:…
#>  3 R_kgDOHYNrJw shinyCohortBuil… r-world-devs r-world… github   https:/… https:…
#>  4 R_kgDOHYNxtw cohortBuilder.db r-world-devs r-world… github   https:/… https:…
#>  5 R_kgDOIvtxsg GitStats         r-world-devs r-world… github   https:/… https:…
#>  6 R_kgDOJAtHJA shinyTimelines   r-world-devs r-world… github   https:/… https:…
#>  7 R_kgDOJKQ8Lg ROhdsiWebApi     r-world-devs r-world… github   https:/… https:…
#>  8 R_kgDOJWYrCA hypothesis       r-world-devs r-world… github   https:/… https:…
#>  9 R_kgDOMHUIwg useR2024-master… r-world-devs r-world… github   https:/… https:…
#> 10 R_kgDOMMESGQ dbplyr           r-world-devs r-world… github   https:/… https:…
#> # ℹ 78 more rows
#> # ℹ 11 more variables: created_at <dttm>, last_activity_at <dttm>,
#> #   last_activity <drtn>, default_branch <chr>, stars <int>, forks <int>,
#> #   languages <chr>, issues_open <int>, issues_closed <int>,
#> #   contributors <chr>, contributors_n <int>
#> 
#> $commits
#> # A tibble: 945 × 11
#>    id    committed_date      author author_login author_name additions deletions
#>    <chr> <dttm>              <chr>  <chr>        <chr>           <int>     <int>
#>  1 C_kw… 2024-11-04 11:44:11 Adam … galachad     Adam Foryś        334         1
#>  2 C_kw… 2024-11-04 07:35:28 Adam … galachad     Adam Foryś          1         1
#>  3 C_kw… 2024-10-31 21:17:25 Adam … galachad     Adam Foryś          0        13
#>  4 C_kw… 2024-10-31 15:45:38 Adam … galachad     Adam Foryś          1         1
#>  5 C_kw… 2024-10-31 15:31:16 Adam … galachad     Adam Foryś         12        12
#>  6 C_kw… 2024-10-31 15:09:42 Adam … galachad     Adam Foryś         40        36
#>  7 C_kw… 2024-10-31 12:24:19 Adam … galachad     Adam Foryś          0         1
#>  8 C_kw… 2024-10-31 11:12:54 Adam … galachad     Adam Foryś        129         0
#>  9 C_kw… 2024-10-31 11:12:30 Adam … galachad     Adam Foryś          1         1
#> 10 C_kw… 2024-10-31 09:41:39 Adam … galachad     Adam Foryś          5         1
#> # ℹ 935 more rows
#> # ℹ 4 more variables: repository <chr>, organization <chr>, repo_url <chr>,
#> #   api_url <glue>
#> 
#> $users
#> NULL
#> 
#> $files
#> NULL
#> 
#> $files_structure
#> NULL
#> 
#> $R_package_usage
#> NULL
#> 
#> $release_logs
#> # A tibble: 37 × 7
#>    repo_name   repo_url release_name release_tag published_at        release_url
#>    <chr>       <chr>    <chr>        <chr>       <dttm>              <chr>      
#>  1 cohortBuil… https:/… cohortBuild… v0.3.0      2024-09-27 11:35:06 https://gi…
#>  2 shinyCohor… https:/… v0.3.1       v0.3.1      2024-10-24 08:21:19 https://gi…
#>  3 shinyCohor… https:/… v0.3.0       v0.3.0      2024-10-24 08:20:32 https://gi…
#>  4 GitStats    https:/… GitStats 2.… v2.1.2      2024-11-13 10:27:51 https://gi…
#>  5 GitStats    https:/… GitStats 2.… v2.1.1      2024-10-29 07:46:43 https://gi…
#>  6 GitStats    https:/… 2.1.0        v2.1.0      2024-10-01 07:29:43 https://gi…
#>  7 GitStats    https:/… 2.0.2        v2.0.2      2024-07-19 07:59:12 https://gi…
#>  8 GitStats    https:/… 2.0.1        v2.0.1      2024-05-14 10:22:52 https://gi…
#>  9 GitStats    https:/… 1.1.0        v1.1.0      2024-01-08 10:14:13 https://gi…
#> 10 queryBuild… https:/… First relea… v0.1.0      2024-09-27 11:31:59 https://gi…
#> # ℹ 27 more rows
#> # ℹ 1 more variable: release_log <chr>
#> 
#> $repos_urls
#>  [1] "https://github.com/r-world-devs/shinyGizmo"                          
#>  [2] "https://github.com/r-world-devs/cohortBuilder"                       
#>  [3] "https://github.com/r-world-devs/shinyCohortBuilder"                  
#>  [4] "https://github.com/r-world-devs/cohortBuilder.db"                    
#>  [5] "https://github.com/r-world-devs/GitStats"                            
#>  [6] "https://github.com/r-world-devs/shinyTimelines"                      
#>  [7] "https://github.com/r-world-devs/ROhdsiWebApi"                        
#>  [8] "https://github.com/r-world-devs/hypothesis"                          
#>  [9] "https://github.com/r-world-devs/useR2024-mastering-plumber-api"      
#> [10] "https://github.com/r-world-devs/dbplyr"                              
#> [11] "https://github.com/r-world-devs/IncidencePrevalence"                 
#> [12] "https://github.com/r-world-devs/MegaStudy"                           
#> [13] "https://github.com/r-world-devs/useR2024-cohortBuilder-minidemo"     
#> [14] "https://github.com/r-world-devs/queryBuilder"                        
#> [15] "https://github.com/r-world-devs/shinyQueryBuilder"                   
#> [16] "https://github.com/r-world-devs/GitAI"                               
#> [17] "https://github.com/openpharma/openpharma.github.io"                  
#> [18] "https://github.com/openpharma/crmPack"                               
#> [19] "https://github.com/openpharma/visR"                                  
#> [20] "https://github.com/openpharma/pypharma_nlp"                          
#> [21] "https://github.com/openpharma/RDO"                                   
#> [22] "https://github.com/openpharma/syntrial"                              
#> [23] "https://github.com/openpharma/simaerep"                              
#> [24] "https://github.com/openpharma/CTP"                                   
#> [25] "https://github.com/openpharma/sas7bdat"                              
#> [26] "https://github.com/openpharma/visR-docs"                             
#> [27] "https://github.com/openpharma/facetsr"                               
#> [28] "https://github.com/openpharma/GithubMetrics"                         
#> [29] "https://github.com/openpharma/BBS-causality-training"                
#> [30] "https://github.com/openpharma/synthetic.data.submission.shiny"       
#> [31] "https://github.com/openpharma/synthetic.data.archive"                
#> [32] "https://github.com/openpharma/tester"                                
#> [33] "https://github.com/openpharma/staged.dependencies"                   
#> [34] "https://github.com/openpharma/openpharma_log"                        
#> [35] "https://github.com/openpharma/quality_risk_assesment_clinical_trials"
#> [36] "https://github.com/openpharma/stageddeps.house"                      
#> [37] "https://github.com/openpharma/stageddeps.garden"                     
#> [38] "https://github.com/openpharma/stageddeps.food"                       
#> [39] "https://github.com/openpharma/stageddeps.electricity"                
#> [40] "https://github.com/openpharma/stageddeps.elecinfra"                  
#> [41] "https://github.com/openpharma/stageddeps.water"                      
#> [42] "https://github.com/openpharma/DataFakeR"                             
#> [43] "https://github.com/openpharma/rinpharma_workshop_2021_old"           
#> [44] "https://github.com/openpharma/rinpharma_workshop_2021"               
#> [45] "https://github.com/openpharma/elaborator"                            
#> [46] "https://github.com/openpharma/graphicalMCP"                          
#> [47] "https://gitlab.com/mbtests/graphql_tests"                            
#> [48] "https://gitlab.com/mbtests/regextips"                                
#> [49] "https://gitlab.com/mbtests/gitstatstesting4"                         
#> [50] "https://gitlab.com/mbtests/gitstatstesting3"                         
#> [51] "https://gitlab.com/mbtests/testrpackage"                             
#> [52] "https://gitlab.com/mbtests/rm-tests-3"                               
#> [53] "https://gitlab.com/mbtests/rm-tests-2"                               
#> [54] "https://gitlab.com/mbtests/rm-tests"                                 
#> [55] "https://gitlab.com/mbtests/gitstats-testing-2"                       
#> [56] "https://gitlab.com/mbtests/gitstatstesting"                          
#> attr(,"class")
#> [1] "repos_urls" "character" 
#> attr(,"type")
#> [1] "web"

Or particular data set:

get_storage(
  gitstats_object = git_stats,
  storage = "repositories"
)
#> # A tibble: 88 × 18
#>    repo_id      repo_name        organization fullname platform repo_url api_url
#>    <chr>        <chr>            <chr>        <chr>    <chr>    <chr>    <chr>  
#>  1 R_kgDOHNMr2w shinyGizmo       r-world-devs r-world… github   https:/… https:…
#>  2 R_kgDOHYNOFQ cohortBuilder    r-world-devs r-world… github   https:/… https:…
#>  3 R_kgDOHYNrJw shinyCohortBuil… r-world-devs r-world… github   https:/… https:…
#>  4 R_kgDOHYNxtw cohortBuilder.db r-world-devs r-world… github   https:/… https:…
#>  5 R_kgDOIvtxsg GitStats         r-world-devs r-world… github   https:/… https:…
#>  6 R_kgDOJAtHJA shinyTimelines   r-world-devs r-world… github   https:/… https:…
#>  7 R_kgDOJKQ8Lg ROhdsiWebApi     r-world-devs r-world… github   https:/… https:…
#>  8 R_kgDOJWYrCA hypothesis       r-world-devs r-world… github   https:/… https:…
#>  9 R_kgDOMHUIwg useR2024-master… r-world-devs r-world… github   https:/… https:…
#> 10 R_kgDOMMESGQ dbplyr           r-world-devs r-world… github   https:/… https:…
#> # ℹ 78 more rows
#> # ℹ 11 more variables: created_at <dttm>, last_activity_at <dttm>,
#> #   last_activity <drtn>, default_branch <chr>, stars <int>, forks <int>,
#> #   languages <chr>, issues_open <int>, issues_closed <int>,
#> #   contributors <chr>, contributors_n <int>