Set connections to hosts.
Example workflow makes use of public GitHub and GitLab, but it is plausible, that you will use your internal git platforms, where you need to define
host
parameter. Seevignette("set_hosts")
article on that.
library(GitStats)
git_stats <- create_gitstats() %>%
set_github_host(
orgs = c("r-world-devs", "openpharma"),
token = Sys.getenv("GITHUB_PAT")
) %>%
set_gitlab_host(
orgs = c("mbtests"),
token = Sys.getenv("GITLAB_PAT_PUBLIC")
)
#> ℹ Searching scope set to [org].
#> ℹ Checking organizations...
#> ✔ Set connection to GitHub.
#> ℹ Searching scope set to [org].
#> ℹ Checking organizations...
#> ✔ Set connection to GitLab.
As scanning scope was set to organizations
(orgs
parameter in set_*_host()
),
GitStats
will pull all repositories from these
organizations.
repos <- get_repos(git_stats, progress = FALSE)
#> ℹ [Host:GitHub][Engine:GraphQl][Scope:r-world-devs] Pulling repositories...
#> ℹ [Host:GitHub][Engine:GraphQl][Scope:openpharma] Pulling repositories...
#> ℹ [Host:GitHub][Engine:REST] Pulling contributors...
#> ℹ [Host:GitLab][Engine:GraphQl][Scope:mbtests] Pulling repositories...
#> ℹ [Host:GitLab][Engine:REST] Pulling contributors...
dplyr::glimpse(repos)
#> Rows: 85
#> Columns: 18
#> $ repo_id <chr> "R_kgDOHNMr2w", "R_kgDOHYNOFQ", "R_kgDOHYNrJw", "R_kg…
#> $ repo_name <chr> "shinyGizmo", "cohortBuilder", "shinyCohortBuilder", …
#> $ organization <chr> "r-world-devs", "r-world-devs", "r-world-devs", "r-wo…
#> $ fullname <chr> "r-world-devs/shinyGizmo", "r-world-devs/cohortBuilde…
#> $ platform <chr> "github", "github", "github", "github", "github", "gi…
#> $ repo_url <chr> "https://github.com/r-world-devs/shinyGizmo", "https:…
#> $ api_url <chr> "https://api.github.com/repos/r-world-devs/shinyGizmo…
#> $ created_at <dttm> 2022-04-20 10:04:32, 2022-05-22 18:31:55, 2022-05-22…
#> $ last_activity_at <dttm> 2024-07-12, 2024-10-28, 2024-10-24, 2024-06-13, 2024…
#> $ last_activity <drtn> 109.33 days, 1.33 days, 5.33 days, 138.33 days, 0.33…
#> $ default_branch <chr> "dev", "dev", "dev", "master", "master", "master", "m…
#> $ stars <int> 19, 5, 6, 0, 3, 3, 0, 2, 1, 0, 0, 0, 0, 1, 2, 10, 20,…
#> $ forks <int> 0, 2, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 10, 3…
#> $ languages <chr> "R, CSS, JavaScript", "R", "R, CSS, JavaScript, SCSS"…
#> $ issues_open <int> 6, 22, 31, 3, 88, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 6, 10…
#> $ issues_closed <int> 12, 2, 12, 0, 255, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 3…
#> $ contributors <chr> "krystian8207, stla, galachad, stlagsk", "krystian820…
#> $ contributors_n <int> 4, 2, 3, 1, 4, 1, 6, 2, 1, 142, 2, 3, 1, 1, 1, 5, 19,…
You can always go for the lighter version of get_repos
,
i.e. get_repos_urls()
which will print you a vector of URLs
instead of whole table.
repos_urls <- get_repos_urls(git_stats)
#> ℹ [Host:GitHub][Engine:REST][Scope:r-world-devs] Pulling repositories (URLS)...
#> ℹ [Host:GitHub][Engine:REST][Scope:openpharma] Pulling repositories (URLS)...
#> ℹ [Host:GitLab][Engine:REST][Scope:mbtests] Pulling repositories (URLS)...
dplyr::glimpse(repos_urls)
#> 'repos_urls' chr [1:55] "https://github.com/r-world-devs/shinyGizmo" ...
#> - attr(*, "type")= chr "web"
Verbose mode
If messages overwhelm you, you can switch them off in the function:
release_logs <- get_release_logs(
gitstats_object = git_stats,
since = "2024-01-01",
verbose = FALSE
)
dplyr::glimpse(release_logs)
#> Rows: 32
#> Columns: 7
#> $ repo_name <chr> "cohortBuilder", "shinyCohortBuilder", "shinyCohortBuilde…
#> $ repo_url <chr> "https://github.com/r-world-devs/cohortBuilder", "https:/…
#> $ release_name <chr> "cohortBuilder 0.3.0", "v0.3.1", "v0.3.0", "GitStats 2.1.…
#> $ release_tag <chr> "v0.3.0", "v0.3.1", "v0.3.0", "v2.1.1", "v2.1.0", "v2.0.2…
#> $ published_at <dttm> 2024-09-27 11:35:06, 2024-10-24 08:21:19, 2024-10-24 08:…
#> $ release_url <chr> "https://github.com/r-world-devs/cohortBuilder/releases/t…
#> $ release_log <chr> "* Add new filter of type `\"query\"` that allows to conf…
Or globally:
verbose_off(git_stats)
Cache
After pulling, the data is saved to GitStats
.
commits <- get_commits(
gitstats_object = git_stats,
since = "2024-06-01",
until = "2024-06-30",
progress = FALSE
)
#> ℹ [Host:GitHub][Engine:GraphQl][Scope:r-world-devs] Pulling commits...
#> ℹ [Host:GitHub][Engine:GraphQl][Scope:openpharma] Pulling commits...
#> ℹ [Host:GitLab][Engine:REST][Scope:mbtests] Pulling commits...
dplyr::glimpse(commits)
#> Rows: 168
#> Columns: 10
#> $ id <chr> "C_kwDOHYNOFdoAKGU3Mjg5MTViZGM4YzYzMTIwOWEwMzEwMDIwOTA0…
#> $ committed_date <dttm> 2024-06-05 11:02:21, 2024-06-05 10:55:59, 2024-06-05 1…
#> $ author <chr> "Kamil Koziej", "Kamil Koziej", "Kamil Koziej", "Krysti…
#> $ author_login <chr> NA, NA, NA, "krystian8207", "krystian8207", "krystian82…
#> $ author_name <chr> NA, NA, NA, "Krystian Igras", "Krystian Igras", "Krysti…
#> $ additions <int> 559, 0, 219, 1, 2, 108, 38, 83, 14, 1596, 973, 1292, 3,…
#> $ deletions <int> 304, 19, 87, 1, 1, 1, 0, 29, 0, 340, 163, 799, 6, 54, 1…
#> $ repository <chr> "cohortBuilder", "cohortBuilder", "cohortBuilder", "coh…
#> $ organization <chr> "r-world-devs", "r-world-devs", "r-world-devs", "r-worl…
#> $ api_url <glue> "https://api.github.com/graphql", "https://api.github.…
Caching feature is by default turned on. If you run the
get_*()
function once more, data will be retrieved from
GitStats
object.
commits <- get_commits(
gitstats_object = git_stats,
since = "2024-06-01",
until = "2024-06-30"
)
#> ! Retrieving commits from the GitStats storage.
#> ℹ If you wish to pull the data from API once more, set `cache` parameter to `FALSE`.
dplyr::glimpse(commits)
#> Rows: 168
#> Columns: 10
#> $ id <chr> "C_kwDOHYNOFdoAKGU3Mjg5MTViZGM4YzYzMTIwOWEwMzEwMDIwOTA0…
#> $ committed_date <dttm> 2024-06-05 11:02:21, 2024-06-05 10:55:59, 2024-06-05 1…
#> $ author <chr> "Kamil Koziej", "Kamil Koziej", "Kamil Koziej", "Krysti…
#> $ author_login <chr> NA, NA, NA, "krystian8207", "krystian8207", "krystian82…
#> $ author_name <chr> NA, NA, NA, "Krystian Igras", "Krystian Igras", "Krysti…
#> $ additions <int> 559, 0, 219, 1, 2, 108, 38, 83, 14, 1596, 973, 1292, 3,…
#> $ deletions <int> 304, 19, 87, 1, 1, 1, 0, 29, 0, 340, 163, 799, 6, 54, 1…
#> $ repository <chr> "cohortBuilder", "cohortBuilder", "cohortBuilder", "coh…
#> $ organization <chr> "r-world-devs", "r-world-devs", "r-world-devs", "r-worl…
#> $ api_url <glue> "https://api.github.com/graphql", "https://api.github.…
Unless, you switch off the cache:
commits <- get_commits(
gitstats_object = git_stats,
since = "2024-06-01",
until = "2024-06-30",
cache = FALSE,
progress = FALSE
)
#> ℹ Cache set to FALSE, I will pull data from API.
#> ℹ [Host:GitHub][Engine:GraphQl][Scope:r-world-devs] Pulling commits...
#> ℹ [Host:GitHub][Engine:GraphQl][Scope:openpharma] Pulling commits...
#> ℹ [Host:GitLab][Engine:REST][Scope:mbtests] Pulling commits...
dplyr::glimpse(commits)
#> Rows: 168
#> Columns: 10
#> $ id <chr> "C_kwDOHYNOFdoAKGU3Mjg5MTViZGM4YzYzMTIwOWEwMzEwMDIwOTA0…
#> $ committed_date <dttm> 2024-06-05 11:02:21, 2024-06-05 10:55:59, 2024-06-05 1…
#> $ author <chr> "Kamil Koziej", "Kamil Koziej", "Kamil Koziej", "Krysti…
#> $ author_login <chr> NA, NA, NA, "krystian8207", "krystian8207", "krystian82…
#> $ author_name <chr> NA, NA, NA, "Krystian Igras", "Krystian Igras", "Krysti…
#> $ additions <int> 559, 0, 219, 1, 2, 108, 38, 83, 14, 1596, 973, 1292, 3,…
#> $ deletions <int> 304, 19, 87, 1, 1, 1, 0, 29, 0, 340, 163, 799, 6, 54, 1…
#> $ repository <chr> "cohortBuilder", "cohortBuilder", "cohortBuilder", "coh…
#> $ organization <chr> "r-world-devs", "r-world-devs", "r-world-devs", "r-worl…
#> $ api_url <glue> "https://api.github.com/graphql", "https://api.github.…
Or simply change the parameters for the function:
commits <- get_commits(
gitstats_object = git_stats,
since = "2024-07-01",
progress = FALSE
)
#> ℹ Parameters changed, I will pull data from API.
#> ℹ [Host:GitHub][Engine:GraphQl][Scope:r-world-devs] Pulling commits...
#> ℹ [Host:GitHub][Engine:GraphQl][Scope:openpharma] Pulling commits...
#> ℹ [Host:GitLab][Engine:REST][Scope:mbtests] Pulling commits...
#> ℹ Looking up for authors' names and logins...
dplyr::glimpse(commits)
#> Rows: 841
#> Columns: 10
#> $ id <chr> "C_kwDOHYNOFdoAKDJjN2Q4YTFkNDIzNmQ0YzIwZTI2NDU1Mjg2YTI1…
#> $ committed_date <dttm> 2024-09-25 14:45:42, 2024-09-25 11:00:17, 2024-09-25 1…
#> $ author <chr> "Krystian Igras", "Krystian Igras", "Krystian Igras", "…
#> $ author_login <chr> "krystian8207", "krystian8207", "krystian8207", "krysti…
#> $ author_name <chr> "Krystian Igras", "Krystian Igras", "Krystian Igras", "…
#> $ additions <int> 14, 626, 36, 126, 3, 921, 46, 0, 1, 14, 1, 557, 5, 1, 1…
#> $ deletions <int> 2, 336, 36, 126, 1, 314, 2, 59, 1, 14, 0, 156, 4, 1, 2,…
#> $ repository <chr> "cohortBuilder", "cohortBuilder", "cohortBuilder", "coh…
#> $ organization <chr> "r-world-devs", "r-world-devs", "r-world-devs", "r-worl…
#> $ api_url <glue> "https://api.github.com/graphql", "https://api.github.…
Storage
Finally, have a glimpse at your storage:
git_stats
#> A GitStats object for 2 hosts:
#> Hosts: https://api.github.com, https://gitlab.com/api/v4
#> Scanning scope:
#> Organizations: [3] r-world-devs, openpharma, mbtests
#> Repositories: [0]
#> Storage:
#> Repositories: 85
#> Commits: 841 [date range: 2024-07-01 - 2024-10-30]
#> Release_logs: 32 [date range: 2024-01-01 - 2024-10-30]
#> Repos_urls: 55 [type: web]
You can retrieve whole data from your GitStats
object
with:
get_storage(git_stats)
#> $repositories
#> # A tibble: 85 × 18
#> repo_id repo_name organization fullname platform repo_url api_url
#> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
#> 1 R_kgDOHNMr2w shinyGizmo r-world-devs r-world… github https:/… https:…
#> 2 R_kgDOHYNOFQ cohortBuilder r-world-devs r-world… github https:/… https:…
#> 3 R_kgDOHYNrJw shinyCohortBuil… r-world-devs r-world… github https:/… https:…
#> 4 R_kgDOHYNxtw cohortBuilder.db r-world-devs r-world… github https:/… https:…
#> 5 R_kgDOIvtxsg GitStats r-world-devs r-world… github https:/… https:…
#> 6 R_kgDOJAtHJA shinyTimelines r-world-devs r-world… github https:/… https:…
#> 7 R_kgDOJKQ8Lg ROhdsiWebApi r-world-devs r-world… github https:/… https:…
#> 8 R_kgDOJWYrCA hypothesis r-world-devs r-world… github https:/… https:…
#> 9 R_kgDOMHUIwg useR2024-master… r-world-devs r-world… github https:/… https:…
#> 10 R_kgDOMMESGQ dbplyr r-world-devs r-world… github https:/… https:…
#> # ℹ 75 more rows
#> # ℹ 11 more variables: created_at <dttm>, last_activity_at <dttm>,
#> # last_activity <drtn>, default_branch <chr>, stars <int>, forks <int>,
#> # languages <chr>, issues_open <int>, issues_closed <int>,
#> # contributors <chr>, contributors_n <int>
#>
#> $commits
#> # A tibble: 841 × 10
#> id committed_date author author_login author_name additions deletions
#> <chr> <dttm> <chr> <chr> <chr> <int> <int>
#> 1 C_kw… 2024-09-25 14:45:42 Kryst… krystian8207 Krystian I… 14 2
#> 2 C_kw… 2024-09-25 11:00:17 Kryst… krystian8207 Krystian I… 626 336
#> 3 C_kw… 2024-09-25 10:58:36 Kryst… krystian8207 Krystian I… 36 36
#> 4 C_kw… 2024-09-25 10:53:29 Kryst… krystian8207 Krystian I… 126 126
#> 5 C_kw… 2024-09-25 10:15:05 Kryst… krystian8207 Krystian I… 3 1
#> 6 C_kw… 2024-09-25 10:03:08 Kryst… krystian8207 Krystian I… 921 314
#> 7 C_kw… 2024-09-24 08:50:02 Kryst… krystian8207 Krystian I… 46 2
#> 8 C_kw… 2024-09-20 18:43:26 Kryst… krystian8207 Krystian I… 0 59
#> 9 C_kw… 2024-09-20 18:41:14 Kryst… krystian8207 Krystian I… 1 1
#> 10 C_kw… 2024-09-20 18:39:46 Kryst… krystian8207 Krystian I… 14 14
#> # ℹ 831 more rows
#> # ℹ 3 more variables: repository <chr>, organization <chr>, api_url <glue>
#>
#> $users
#> NULL
#>
#> $files
#> NULL
#>
#> $files_structure
#> NULL
#>
#> $R_package_usage
#> NULL
#>
#> $release_logs
#> # A tibble: 32 × 7
#> repo_name repo_url release_name release_tag published_at release_url
#> <chr> <chr> <chr> <chr> <dttm> <chr>
#> 1 cohortBuil… https:/… cohortBuild… v0.3.0 2024-09-27 11:35:06 https://gi…
#> 2 shinyCohor… https:/… v0.3.1 v0.3.1 2024-10-24 08:21:19 https://gi…
#> 3 shinyCohor… https:/… v0.3.0 v0.3.0 2024-10-24 08:20:32 https://gi…
#> 4 GitStats https:/… GitStats 2.… v2.1.1 2024-10-29 07:46:43 https://gi…
#> 5 GitStats https:/… 2.1.0 v2.1.0 2024-10-01 07:29:43 https://gi…
#> 6 GitStats https:/… 2.0.2 v2.0.2 2024-07-19 07:59:12 https://gi…
#> 7 GitStats https:/… 2.0.1 v2.0.1 2024-05-14 10:22:52 https://gi…
#> 8 GitStats https:/… 1.1.0 v1.1.0 2024-01-08 10:14:13 https://gi…
#> 9 queryBuild… https:/… First relea… v0.1.0 2024-09-27 11:31:59 https://gi…
#> 10 shinyQuery… https:/… First relea… v0.1.0 2024-09-27 11:35:54 https://gi…
#> # ℹ 22 more rows
#> # ℹ 1 more variable: release_log <chr>
#>
#> $repos_urls
#> [1] "https://github.com/r-world-devs/shinyGizmo"
#> [2] "https://github.com/r-world-devs/cohortBuilder"
#> [3] "https://github.com/r-world-devs/shinyCohortBuilder"
#> [4] "https://github.com/r-world-devs/cohortBuilder.db"
#> [5] "https://github.com/r-world-devs/GitStats"
#> [6] "https://github.com/r-world-devs/shinyTimelines"
#> [7] "https://github.com/r-world-devs/ROhdsiWebApi"
#> [8] "https://github.com/r-world-devs/hypothesis"
#> [9] "https://github.com/r-world-devs/useR2024-mastering-plumber-api"
#> [10] "https://github.com/r-world-devs/dbplyr"
#> [11] "https://github.com/r-world-devs/IncidencePrevalence"
#> [12] "https://github.com/r-world-devs/MegaStudy"
#> [13] "https://github.com/r-world-devs/useR2024-cohortBuilder-minidemo"
#> [14] "https://github.com/r-world-devs/queryBuilder"
#> [15] "https://github.com/r-world-devs/shinyQueryBuilder"
#> [16] "https://github.com/openpharma/openpharma.github.io"
#> [17] "https://github.com/openpharma/crmPack"
#> [18] "https://github.com/openpharma/visR"
#> [19] "https://github.com/openpharma/pypharma_nlp"
#> [20] "https://github.com/openpharma/RDO"
#> [21] "https://github.com/openpharma/syntrial"
#> [22] "https://github.com/openpharma/simaerep"
#> [23] "https://github.com/openpharma/CTP"
#> [24] "https://github.com/openpharma/sas7bdat"
#> [25] "https://github.com/openpharma/visR-docs"
#> [26] "https://github.com/openpharma/facetsr"
#> [27] "https://github.com/openpharma/GithubMetrics"
#> [28] "https://github.com/openpharma/BBS-causality-training"
#> [29] "https://github.com/openpharma/synthetic.data.submission.shiny"
#> [30] "https://github.com/openpharma/synthetic.data.archive"
#> [31] "https://github.com/openpharma/tester"
#> [32] "https://github.com/openpharma/staged.dependencies"
#> [33] "https://github.com/openpharma/openpharma_log"
#> [34] "https://github.com/openpharma/quality_risk_assesment_clinical_trials"
#> [35] "https://github.com/openpharma/stageddeps.house"
#> [36] "https://github.com/openpharma/stageddeps.garden"
#> [37] "https://github.com/openpharma/stageddeps.food"
#> [38] "https://github.com/openpharma/stageddeps.electricity"
#> [39] "https://github.com/openpharma/stageddeps.elecinfra"
#> [40] "https://github.com/openpharma/stageddeps.water"
#> [41] "https://github.com/openpharma/DataFakeR"
#> [42] "https://github.com/openpharma/rinpharma_workshop_2021_old"
#> [43] "https://github.com/openpharma/rinpharma_workshop_2021"
#> [44] "https://github.com/openpharma/elaborator"
#> [45] "https://github.com/openpharma/mmrm"
#> [46] "https://gitlab.com/mbtests/graphql_tests"
#> [47] "https://gitlab.com/mbtests/regextips"
#> [48] "https://gitlab.com/mbtests/gitstatstesting4"
#> [49] "https://gitlab.com/mbtests/gitstatstesting3"
#> [50] "https://gitlab.com/mbtests/testrpackage"
#> [51] "https://gitlab.com/mbtests/rm-tests-3"
#> [52] "https://gitlab.com/mbtests/rm-tests-2"
#> [53] "https://gitlab.com/mbtests/rm-tests"
#> [54] "https://gitlab.com/mbtests/gitstats-testing-2"
#> [55] "https://gitlab.com/mbtests/gitstatstesting"
#> attr(,"class")
#> [1] "repos_urls" "character"
#> attr(,"type")
#> [1] "web"
Or particular data set:
get_storage(
gitstats_object = git_stats,
storage = "repositories"
)
#> # A tibble: 85 × 18
#> repo_id repo_name organization fullname platform repo_url api_url
#> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
#> 1 R_kgDOHNMr2w shinyGizmo r-world-devs r-world… github https:/… https:…
#> 2 R_kgDOHYNOFQ cohortBuilder r-world-devs r-world… github https:/… https:…
#> 3 R_kgDOHYNrJw shinyCohortBuil… r-world-devs r-world… github https:/… https:…
#> 4 R_kgDOHYNxtw cohortBuilder.db r-world-devs r-world… github https:/… https:…
#> 5 R_kgDOIvtxsg GitStats r-world-devs r-world… github https:/… https:…
#> 6 R_kgDOJAtHJA shinyTimelines r-world-devs r-world… github https:/… https:…
#> 7 R_kgDOJKQ8Lg ROhdsiWebApi r-world-devs r-world… github https:/… https:…
#> 8 R_kgDOJWYrCA hypothesis r-world-devs r-world… github https:/… https:…
#> 9 R_kgDOMHUIwg useR2024-master… r-world-devs r-world… github https:/… https:…
#> 10 R_kgDOMMESGQ dbplyr r-world-devs r-world… github https:/… https:…
#> # ℹ 75 more rows
#> # ℹ 11 more variables: created_at <dttm>, last_activity_at <dttm>,
#> # last_activity <drtn>, default_branch <chr>, stars <int>, forks <int>,
#> # languages <chr>, issues_open <int>, issues_closed <int>,
#> # contributors <chr>, contributors_n <int>