Skip to contents

Set connections to hosts.

Example workflow makes use of public GitHub and GitLab, but it is plausible, that you will use your internal git platforms, where you need to define host parameter. See vignette("set_hosts") article on that.

library(GitStats)

git_stats <- create_gitstats() %>%
  set_github_host(
    orgs = c("r-world-devs", "openpharma"),
    token = Sys.getenv("GITHUB_PAT")
  ) %>%
  set_gitlab_host(
    orgs = c("mbtests"),
    token = Sys.getenv("GITLAB_PAT_PUBLIC")
  )
#>  Searching scope set to [org].
#>  Checking organizations...
#>  Set connection to GitHub.
#>  Searching scope set to [org].
#>  Checking organizations...
#>  Set connection to GitLab.

With GitStats you can get the content of all text files in repo that are of your interest. First you need to get the files structure. You can pull specific types of files, by setting pattern with regular expression and depth with integer, which defines level of directories to look for the files.

files_structure <- get_files_structure(
  gitstats_object = git_stats,
  pattern         = "\\.md",
  depth           = 1L,
  progress        = FALSE
)
#>  [Host:GitHub][Engine:GraphQl][Scope:r-world-devs] Pulling files structure...[files matching pattern: '\.md']...
#>  [Host:GitHub][Engine:GraphQl][Scope:openpharma] Pulling files structure...[files matching pattern: '\.md']...
#>  [Host:GitLab][Engine:GraphQl][Scope:mbtests] Pulling files structure...[files matching pattern: '\.md']...
dplyr::glimpse(files_structure)
#> List of 2
#>  $ github.com:List of 2
#>   ..$ r-world-devs:List of 14
#>   .. ..$ shinyGizmo         : chr [1:3] "NEWS.md" "README.md" "cran-comments.md"
#>   .. ..$ cohortBuilder      : chr [1:2] "NEWS.md" "README.md"
#>   .. ..$ shinyCohortBuilder : chr [1:2] "NEWS.md" "README.md"
#>   .. ..$ cohortBuilder.db   : chr "README.md"
#>   .. ..$ GitStats           : chr [1:4] "LICENSE.md" "NEWS.md" "README.md" "cran-comments.md"
#>   .. ..$ shinyTimelines     : chr "README.md"
#>   .. ..$ ROhdsiWebApi       : chr [1:2] "NEWS.md" "README.md"
#>   .. ..$ hypothesis         : chr [1:2] "NEWS.md" "README.md"
#>   .. ..$ dbplyr             : chr [1:4] "LICENSE.md" "NEWS.md" "README.md" "cran-comments.md"
#>   .. ..$ IncidencePrevalence: chr [1:3] "LICENSE.md" "README.md" "cran-comments.md"
#>   .. ..$ MegaStudy          : chr "README.md"
#>   .. ..$ queryBuilder       : chr [1:2] "NEWS.md" "README.md"
#>   .. ..$ shinyQueryBuilder  : chr [1:2] "NEWS.md" "README.md"
#>   .. ..$ GitAI              : chr [1:2] "LICENSE.md" "README.md"
#>   ..$ openpharma  :List of 50
#>   .. ..$ openpharma.github.io                  : chr "README.md"
#>   .. ..$ crmPack                               : chr [1:2] "NEWS.md" "README.md"
#>   .. ..$ visR                                  : chr [1:5] "CODE_OF_CONDUCT.md" "LICENSE.md" "NEWS.md" "README.md" ...
#>   .. ..$ pypharma_nlp                          : chr [1:4] "AGENDA.md" "NOTES.md" "OUTLINE.md" "README.md"
#>   .. ..$ RDO                                   : chr [1:3] "LICENSE.md" "NEWS.md" "README.md"
#>   .. ..$ syntrial                              : chr [1:3] "LICENSE.md" "NEWS.md" "README.md"
#>   .. ..$ simaerep                              : chr [1:4] "LICENSE.md" "NEWS.md" "README.md" "cran-comments.md"
#>   .. ..$ CTP                                   : chr [1:2] "NEWS.md" "README.md"
#>   .. ..$ sas7bdat                              : chr "README.md"
#>   .. ..$ visR-docs                             : chr "README.md"
#>   .. ..$ facetsr                               : chr [1:2] "NEWS.md" "README.md"
#>   .. ..$ GithubMetrics                         : chr [1:4] "LICENSE.md" "NEWS.md" "README.md" "cran-comments.md"
#>   .. ..$ tester                                : chr [1:2] "LICENSE.md" "README.md"
#>   .. ..$ staged.dependencies                   : chr [1:2] "NEWS.md" "README.md"
#>   .. ..$ openpharma_log                        : chr "README.md"
#>   .. ..$ quality_risk_assesment_clinical_trials: chr "README.md"
#>   .. ..$ DataFakeR                             : chr [1:3] "NEWS.md" "README.md" "cran-comments.md"
#>   .. ..$ rinpharma_workshop_2021_old           : chr "README.md"
#>   .. ..$ rinpharma_workshop_2021               : chr "README.md"
#>   .. ..$ elaborator                            : chr [1:3] "NEWS.md" "README.md" "cran-comments.md"
#>   .. ..$ graphicalMCP                          : chr [1:4] "LICENSE.md" "NEWS.md" "README.md" "cran-comments.md"
#>   .. ..$ mmrm                                  : chr [1:2] "NEWS.md" "README.md"
#>   .. ..$ opensource_dashboard                  : chr "README.md"
#>   .. ..$ phuse-scripts                         : chr [1:5] "CSS_2016.md" "LICENSE.md" "MetaData_template.md" "README.md" ...
#>   .. ..$ trialreport                           : chr [1:2] "LICENSE.md" "README.md"
#>   .. ..$ openpharma_ml                         : chr "README.md"
#>   .. ..$ mtdesign                              : chr [1:4] "LICENSE.md" "NEWS.md" "README.md" "cran-comments.md"
#>   .. ..$ rbqmR                                 : chr [1:3] "LICENSE.md" "NEWS.md" "README.md"
#>   .. ..$ savvyr                                : chr [1:2] "NEWS.md" "README.md"
#>   .. ..$ roxytypes                             : chr [1:4] "LICENSE.md" "NEWS.md" "README.md" "cran-comments.md"
#>   .. ..$ roxylint                              : chr [1:3] "LICENSE.md" "NEWS.md" "README.md"
#>   .. ..$ autoquarto                            : chr [1:2] "README.md" "cran-comments.md"
#>   .. ..$ workshop-r-swe                        : chr [1:2] "LICENSE.md" "README.md"
#>   .. ..$ brms.mmrm                             : chr [1:4] "CODE_OF_CONDUCT.md" "LICENSE.md" "NEWS.md" "README.md"
#>   .. ..$ workshop-r-swe-sf                     : chr [1:2] "LICENSE.md" "README.md"
#>   .. ..$ RobinCar2                             : chr [1:2] "NEWS.md" "README.md"
#>   .. ..$ filters                               : chr [1:2] "NEWS.md" "README.md"
#>   .. ..$ workshop-r-swe-md                     : chr [1:2] "LICENSE.md" "README.md"
#>   .. ..$ workshop-r-swe-mtl                    : chr [1:2] "LICENSE.md" "README.md"
#>   .. ..$ CAMIS                                 : chr "README.md"
#>   .. ..$ clindata                              : chr [1:4] "NEWS.md" "README.md" "SECURITY.md" "workflows.md"
#>   .. ..$ SafetySignalDetection.jl              : chr "README.md"
#>   .. ..$ os-metadata                           : chr "README.md"
#>   .. ..$ diffdf                                : chr [1:3] "NEWS.md" "README.md" "cran-comments.md"
#>   .. ..$ clinsight                             : chr [1:3] "LICENSE.md" "NEWS.md" "README.md"
#>   .. ..$ beeca                                 : chr [1:4] "LICENSE.md" "NEWS.md" "README.md" "cran-comments.md"
#>   .. ..$ beeca-simulations                     : chr "README.md"
#>   .. ..$ DoseFinding                           : chr [1:3] "NEWS.md" "README.md" "cran-comments.md"
#>   .. ..$ generate_badges                       : chr "README.md"
#>   .. ..$ workshop-r-swe-rinpharma-2024         : chr [1:2] "LICENSE.md" "README.md"
#>  $ gitlab.com:List of 1
#>   ..$ mbtests:List of 10
#>   .. ..$ graphql_tests     : chr "README.md"
#>   .. ..$ regextips         : chr "README.md"
#>   .. ..$ gitstatstesting4  : chr "README.md"
#>   .. ..$ gitstatstesting3  : chr "README.md"
#>   .. ..$ testrpackage      : chr "README.md"
#>   .. ..$ rm-tests-3        : chr "README.md"
#>   .. ..$ rm-tests-2        : chr "README.md"
#>   .. ..$ rm-tests          : chr "README.md"
#>   .. ..$ gitstats-testing-2: chr "README.md"
#>   .. ..$ gitstatstesting   : chr "README.md"
#>  - attr(*, "class")= chr [1:2] "files_structure" "list"
#>  - attr(*, "pattern")= chr "\\.md"
#>  - attr(*, "depth")= int 1

Once you pull the files structure, GitStats will store it. If you run then get_files_content() function, by default it will make use of this structure (unless you define file_path, which will override saved files structure).

files_content <- get_files_content(
  gitstats_object = git_stats,
  progress        = FALSE
)
#>  I will make use of files structure stored in GitStats.
#>  [Host:GitHub][Engine:GraphQl][Scope:r-world-devs] Pulling files from files structure...
#>  [Host:GitHub][Engine:GraphQl][Scope:openpharma] Pulling files from files structure...
#>  I will make use of files structure stored in GitStats.
#>  [Host:GitLab][Engine:GraphQl][Scope:mbtests] Pulling files from files structure...
dplyr::glimpse(files_content)
#> Rows: 160
#> Columns: 8
#> $ repo_name    <chr> "shinyGizmo", "shinyGizmo", "shinyGizmo", "cohortBuilder"…
#> $ repo_id      <chr> "R_kgDOHNMr2w", "R_kgDOHNMr2w", "R_kgDOHNMr2w", "R_kgDOHY…
#> $ organization <chr> "r-world-devs", "r-world-devs", "r-world-devs", "r-world-…
#> $ file_path    <chr> "NEWS.md", "README.md", "cran-comments.md", "NEWS.md", "R…
#> $ file_content <chr> "# shinyGizmo 0.4.2\n\n* Fix handling non-existing select…
#> $ file_size    <int> 2186, 2337, 1700, 917, 15828, 2018, 3355, 3472, 1075, 199…
#> $ repo_url     <chr> "https://github.com/r-world-devs/shinyGizmo", "https://gi…
#> $ api_url      <chr> "https://api.github.com/repos/r-world-devs/shinyGizmo", "…