<- "https://api.github.com/rate_limit"
url <- httr::GET(url,
res ::add_headers(Authorization = "token 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"))
httr res
NMFS repos
<- c("noaa-fisheries-integrated-toolbox", "nmfs-fish-tools", "noaa-fims",
non.ent.orgs "noaa-iea", "ecosystem-state", "futureseas",
"pfmc-assessments", "pacific-hake", "nmfs-stock-synthesis", "r4ss", "ss3sim",
"nwfsc-math-bio", "nwfsc-fram", "NOAA-FEAT", "nwfsc-cb", "NWFSC-OA-lab", "TIDE-NWFSC",
"rverse-tutorials", "nmfs-opensci",
"NOAA-EDAB", "PIFSCstockassessments",
"afsc-assessments", "NMML", "afsc-gap-products", "afsc-ecofoci", "alaska-groundfish-efh",
"us-amlr", "noaa-garfo"
)<- c("NEFSC", "SEFSC", "SWFSC", "PIFSC-NMFS-NOAA") ent.orgs
Make a fine grained PAT.
https://api.github.com/orgs/ORG/repos
# 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
library(dplyr)
<- list()
orgtabs for(tab_type in c("non.ent.orgs", "ent.orgs")){
<- get(tab_type)
orgnames <- list()
tbl <- FALSE
update_tbl for(org in orgnames[which(!(orgnames %in% names(tbl)))]){
<- TRUE
update_tbl <- paste0("https://api.github.com/orgs/", org, "/repos?per_page=100")
url <- httr::GET(url,
res ::add_headers(authorization = "token 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"))
httr<- jsonlite::fromJSON(rawToChar(res$content))
dat $org <- org
dat$license_name = ifelse(inherits(dat$license, "logical"), NA, select(dat$license, "spdx_id")$spdx_id)[1]
dat<- dat
tbl[[org]] cat(org, " ")
}if(update_tbl) orgtabs[[tab_type]] <- tbl
}
# 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
library(dplyr)
<- list()
orgtabs for(tab_type in c("non.ent.orgs", "ent.orgs")){
<- get(tab_type)
orgnames <- list()
tbl <- FALSE
update_tbl for(org in orgnames[which(!(orgnames %in% names(tbl)))]){
<- TRUE
update_tbl <- paste0("https://api.github.com/search/repositories?q=org:", org)
url <- httr::GET(url,
res ::add_headers(authorization = "token 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"))
httr<- jsonlite::fromJSON(rawToChar(res$content))$items
dat $org <- org
dat$license_name = ifelse(inherits(dat$license, "logical"), NA, select(dat$license, "spdx_id")$spdx_id)[1]
dat<- dat
tbl[[org]]
}if(update_tbl) orgtabs[[tab_type]] <- tbl
}
dat <- dat %>% arrange(desc(pushed_at)) %>% mutate(last_update = as.Date(pushed_at, “%Y-%m-%d”), topic = ifelse(is.null(unlist(topics)), ““, unlist(topics))) %>% select(c(”org”, “name”, “language”, “last_update”, “license_name”, “description”, “topic”))
Make a table of the total number of repos.
<- list()
tmptbl "non-Enterprise GH org"]] <- bind_rows(orgtabs[[1]], .id = "GH_org") %>%
tmptbl[[mutate(last_update = as.Date(pushed_at, "%Y-%m-%d"),
topic = ifelse(is.null(unlist(topics)), "", unlist(topics))) %>%
select(c("GH_org", "name", "language", "last_update", "license_name", "description", "topic"))
"Enterprise GH org"]] <- bind_rows(orgtabs[[2]], .id = "GH_org") %>%
tmptbl[[mutate(last_update = as.Date(pushed_at, "%Y-%m-%d"),
topic = ifelse(is.null(unlist(topics)), "", unlist(topics))) %>%
select(c("GH_org", "name", "language", "last_update", "license_name", "description", "topic"))
<- bind_rows(tmptbl, .id="type") org.df
<- org.df %>% group_by(type, GH_org) %>%
nrepos_by_org summarize(nrepos = length(name),
updated_2023_2022 = sum(lubridate::year(last_update) %in% c("2023", "2022"), na.rm=TRUE),
updated_2021_2020 = sum(lubridate::year(last_update) %in% c("2021", "2020"), na.rm=TRUE)) %>%
arrange(desc(updated_2023_2022))
`summarise()` has grouped output by 'type'. You can override using the
`.groups` argument.
<- org.df %>% group_by(type) %>%
nrepos_by_type summarize(n = length(unique(GH_org)),
nrepos = length(name),
updated_2023_2022 = sum(lubridate::year(last_update) %in% c("2023", "2022"), na.rm=TRUE),
updated_2021_2020 = sum(lubridate::year(last_update) %in% c("2021", "2020"), na.rm=TRUE)) %>%
arrange(desc(updated_2023_2022))
<- read.csv("usernames_nmfs_rug.csv")
userdat <- userdat$GitHub.username[userdat$GitHub.username != ""]
usernames_rug <- read.csv("usernames_other_noaa.csv")
userdat <- c(usernames_rug, userdat$username[userdat$username != ""])
usernames_noaa <- stringr::str_trim(usernames_noaa) usernames_noaa
#https://api.github.com/repos/NOAA-FIMS/FIMS/contributors
# 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
library(dplyr)
# tbl <- list()
for(org in unique(org.df$GH_org)[!(unique(org.df$GH_org) %in% names(tbl))]){
<- org.df %>% subset(GH_org == org)
df <- NULL
df.repo for(reponame in df$name){
<- paste0("https://api.github.com/repos/", org, "/", reponame, "/contributors")
url <- httr::GET(url,
res ::add_headers(authorization = "token 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"))
httrif(rawToChar(res$content)=="") next
<- jsonlite::fromJSON(rawToChar(res$content))
dat if(length(dat) == 0) next
$org <- org
dat$repo <- reponame
dat<- rbind(df.repo, dat)
df.repo
}<- df.repo
tbl[[org]] cat(org, "\n")
}<- tbl contributortabs
<- bind_rows(contributortabs) %>%
df.contributors group_by(login) %>%
summarize(nrepos = length(login),
norgs = length(org),
ncontrib = sum(contributions, na.rm=TRUE))
<- df.contributors$login[!(df.contributors$login %in% usernames)] usernames_orgs
<- c(usernames_noaa, usernames_orgs)
usernames_all <- unique(usernames_all)
usernames_all
<- c(usernames_noaa,
usernames_noaa ::str_detect(usernames_all, "NOAA") |
usernames_all[stringr::str_detect(usernames_all, "noaa") |
stringr::str_detect(usernames_all, "Noaa")])
stringr<- bind_rows(usertabs)
tmp <- c(usernames_noaa, tmp$login[stringr::str_detect(tmp$email, "noaa") & !is.na(tmp$email)])
usernames_noaa for(i in c("NOAA", "NMFS", "NEFSC", "PIFSC", "SWFSC", "SEFSC", "GARFO", "AFSC")){
<- c(usernames_noaa, tmp$login[stringr::str_detect(tmp$company, i) & !is.na(tmp$company)])
usernames_noaa <- c(usernames_noaa, tmp$login[stringr::str_detect(tmp$bio, i) & !is.na(tmp$bio)])
usernames_noaa
}<- c(usernames_noaa, usernames_rug)
usernames_noaa <- unique(usernames_noaa)
usernames_noaa
<- read.csv("usernames_not_noaa.csv")$username
usernames_not_noaa
# sort(usernames_all[!(usernames_all %in% usernames_noaa) & !(usernames_all %in% usernames_not_noaa)])
# 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
library(dplyr)
<- list()
tbl #tbl <- userrepotabs
for(username in usernames_noaa[which(!(usernames_noaa %in% names(tbl)))]){
<- NULL
dat if(is.null(usertabs[[username]]$public_repos)) next
for(i in 1:(1+floor(usertabs[[username]]$public_repos/100))){
<- paste0("https://api.github.com/users/", username, "/repos?per_page=100&page=i")
url <- httr::GET(url,
res ::add_headers(authorization = "token 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"))
httr<- bind_rows(dat, jsonlite::fromJSON(rawToChar(res$content)))
dat
}<- dat
tbl[[username]] cat(username, " ")
}<- tbl userrepotabs
# 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
library(dplyr)
# tbl <- list()
# tbl <- usertabs
for(username in usernames_all[which(!(usernames_all %in% names(tbl)))]){
<- paste0("https://api.github.com/users/", username)
url <- httr::GET(url,
res ::add_headers(authorization = "token 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"))
httr<- jsonlite::fromJSON(rawToChar(res$content))
dat <- dat
tbl[[username]] cat(username, "\n")
}<- tbl usertabs
<- bind_rows(usertabs, .id = "username") %>%
df.user subset(username %in% usernames_noaa) %>%
summarize(nrepos = sum(public_repos, na.rm=TRUE))
Make the tables
library(gt)
%>% ungroup() %>% gt() %>%
nrepos_by_org # tab_style(
# style = cell_fill(color = "lightblue"),
# locations = cells_body(rows = type=="Enterprise GH org")
# ) %>%
cols_label(
nrepos = html("Num<br>Repos"),
updated_2023_2022 = html("Updated in<br>2023-2022"),
updated_2021_2020 = html("Updated in<br>2021-2020")
%>%
) cols_hide(columns = c(type)) %>%
cols_align(
align = "center",
columns = c(nrepos, updated_2023_2022, updated_2021_2020)
)
GH_org | Num Repos |
Updated in 2023-2022 |
Updated in 2021-2020 |
---|---|---|---|
nmfs-fish-tools | 67 | 53 | 13 |
NEFSC | 66 | 41 | 25 |
nmfs-opensci | 34 | 30 | 4 |
afsc-gap-products | 31 | 27 | 3 |
NOAA-EDAB | 42 | 26 | 12 |
afsc-assessments | 32 | 26 | 6 |
rverse-tutorials | 38 | 24 | 12 |
pfmc-assessments | 22 | 20 | 2 |
noaa-fims | 17 | 15 | 2 |
us-amlr | 16 | 13 | 3 |
PIFSCstockassessments | 12 | 10 | 1 |
ecosystem-state | 10 | 10 | 0 |
nmfs-stock-synthesis | 10 | 10 | 0 |
noaa-fisheries-integrated-toolbox | 11 | 10 | 1 |
nwfsc-cb | 10 | 8 | 2 |
nwfsc-math-bio | 11 | 8 | 1 |
NWFSC-OA-lab | 15 | 7 | 8 |
nwfsc-fram | 17 | 6 | 11 |
PIFSC-NMFS-NOAA | 5 | 5 | 0 |
SEFSC | 5 | 5 | 0 |
SWFSC | 5 | 5 | 0 |
NMML | 21 | 5 | 0 |
pacific-hake | 10 | 5 | 3 |
TIDE-NWFSC | 4 | 4 | 0 |
futureseas | 5 | 4 | 1 |
noaa-iea | 20 | 4 | 16 |
alaska-groundfish-efh | 3 | 3 | 0 |
NOAA-FEAT | 3 | 2 | 1 |
noaa-garfo | 3 | 2 | 1 |
ss3sim | 12 | 2 | 3 |
afsc-ecofoci | 1 | 1 | 0 |
r4ss | 4 | 1 | 2 |
library(gt)
<- bind_rows(userrepotabs, .id = "username") %>%
tmp subset(username %in% usernames_noaa) %>%
mutate(type="Individual account") %>%
group_by(type) %>%
summarize(n = length(unique(username)),
nrepos = length(name),
updated_2023_2022 = sum(lubridate::year(updated_at) %in% c("2023", "2022"), na.rm=TRUE),
updated_2021_2020 = sum(lubridate::year(updated_at) %in% c("2021", "2020"), na.rm=TRUE))
bind_rows(nrepos_by_type, tmp) %>%
ungroup() %>% gt() %>%
tab_style(
style = cell_fill(color = "lightgrey"),
locations = cells_body(rows = type=="Individual account")
%>%
) cols_label(
nrepos = html("Num<br>Repos"),
updated_2023_2022 = html("Updated in<br>2023-2022"),
updated_2021_2020 = html("Updated in<br>2021-2020")
%>%
) cols_align(
align = "center",
columns = c(nrepos, updated_2023_2022, updated_2021_2020)
%>%
) tab_header(
title = md(paste("Public NMFS Repositories on GitHub", Sys.Date())),
subtitle = md("non-Enterprise GH orgs, Enterprise GH orgs, work personal accounts")
%>%
) tab_source_note(
source_note = "GH org = GitHub organization. It is like a GitHub account where groups of GitHub users collaborate on a collection of repositories and manage those repositories."
%>%
) tab_source_note(
source_note = "Note: the vast majority of repos on individual accounts are 'sandboxy' in nature and are not products per se."
)
Public NMFS Repositories on GitHub 2023-04-05 | ||||
non-Enterprise GH orgs, Enterprise GH orgs, work personal accounts | ||||
type | n | Num Repos |
Updated in 2023-2022 |
Updated in 2021-2020 |
---|---|---|---|---|
non-Enterprise GH org | 28 | 481 | 336 | 108 |
Enterprise GH org | 4 | 81 | 56 | 25 |
Individual account | 229 | 2827 | 951 | 914 |
GH org = GitHub organization. It is like a GitHub account where groups of GitHub users collaborate on a collection of repositories and manage those repositories. | ||||
Note: the vast majority of repos on individual accounts are 'sandboxy' in nature and are not products per se. |