url <- "https://api.github.com/rate_limit"
res <- httr::GET(url,
httr::add_headers(Authorization = "token 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"))
resNMFS repos
non.ent.orgs <- c("noaa-fisheries-integrated-toolbox", "nmfs-fish-tools", "noaa-fims",
"noaa-iea", "ecosystem-state", "futureseas",
"pfmc-assessments", "pacific-hake", "nmfs-stock-synthesis", "r4ss", "ss3sim",
"nwfsc-math-bio", "nwfsc-fram", "NOAA-FEAT", "nwfsc-cb", "NWFSC-OA-lab", "TIDE-NWFSC",
"rverse-tutorials", "nmfs-opensci",
"NOAA-EDAB", "PIFSCstockassessments",
"afsc-assessments", "NMML", "afsc-gap-products", "afsc-ecofoci", "alaska-groundfish-efh",
"us-amlr", "noaa-garfo"
)
ent.orgs <- c("NEFSC", "SEFSC", "SWFSC", "PIFSC-NMFS-NOAA")Make a fine grained PAT.
https://api.github.com/orgs/ORG/repos
# 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
library(dplyr)
orgtabs <- list()
for(tab_type in c("non.ent.orgs", "ent.orgs")){
orgnames <- get(tab_type)
tbl <- list()
update_tbl <- FALSE
for(org in orgnames[which(!(orgnames %in% names(tbl)))]){
update_tbl <- TRUE
url <- paste0("https://api.github.com/orgs/", org, "/repos?per_page=100")
res <- httr::GET(url,
httr::add_headers(authorization = "token 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"))
dat <- jsonlite::fromJSON(rawToChar(res$content))
dat$org <- org
dat$license_name = ifelse(inherits(dat$license, "logical"), NA, select(dat$license, "spdx_id")$spdx_id)[1]
tbl[[org]] <- dat
cat(org, " ")
}
if(update_tbl) orgtabs[[tab_type]] <- tbl
}# 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
library(dplyr)
orgtabs <- list()
for(tab_type in c("non.ent.orgs", "ent.orgs")){
orgnames <- get(tab_type)
tbl <- list()
update_tbl <- FALSE
for(org in orgnames[which(!(orgnames %in% names(tbl)))]){
update_tbl <- TRUE
url <- paste0("https://api.github.com/search/repositories?q=org:", org)
res <- httr::GET(url,
httr::add_headers(authorization = "token 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"))
dat <- jsonlite::fromJSON(rawToChar(res$content))$items
dat$org <- org
dat$license_name = ifelse(inherits(dat$license, "logical"), NA, select(dat$license, "spdx_id")$spdx_id)[1]
tbl[[org]] <- dat
}
if(update_tbl) orgtabs[[tab_type]] <- tbl
}dat <- dat %>% arrange(desc(pushed_at)) %>% mutate(last_update = as.Date(pushed_at, “%Y-%m-%d”), topic = ifelse(is.null(unlist(topics)), ““, unlist(topics))) %>% select(c(”org”, “name”, “language”, “last_update”, “license_name”, “description”, “topic”))
Make a table of the total number of repos.
tmptbl <- list()
tmptbl[["non-Enterprise GH org"]] <- bind_rows(orgtabs[[1]], .id = "GH_org") %>%
mutate(last_update = as.Date(pushed_at, "%Y-%m-%d"),
topic = ifelse(is.null(unlist(topics)), "", unlist(topics))) %>%
select(c("GH_org", "name", "language", "last_update", "license_name", "description", "topic"))
tmptbl[["Enterprise GH org"]] <- bind_rows(orgtabs[[2]], .id = "GH_org") %>%
mutate(last_update = as.Date(pushed_at, "%Y-%m-%d"),
topic = ifelse(is.null(unlist(topics)), "", unlist(topics))) %>%
select(c("GH_org", "name", "language", "last_update", "license_name", "description", "topic"))
org.df <- bind_rows(tmptbl, .id="type")nrepos_by_org <- org.df %>% group_by(type, GH_org) %>%
summarize(nrepos = length(name),
updated_2023_2022 = sum(lubridate::year(last_update) %in% c("2023", "2022"), na.rm=TRUE),
updated_2021_2020 = sum(lubridate::year(last_update) %in% c("2021", "2020"), na.rm=TRUE)) %>%
arrange(desc(updated_2023_2022))`summarise()` has grouped output by 'type'. You can override using the
`.groups` argument.
nrepos_by_type <- org.df %>% group_by(type) %>%
summarize(n = length(unique(GH_org)),
nrepos = length(name),
updated_2023_2022 = sum(lubridate::year(last_update) %in% c("2023", "2022"), na.rm=TRUE),
updated_2021_2020 = sum(lubridate::year(last_update) %in% c("2021", "2020"), na.rm=TRUE)) %>%
arrange(desc(updated_2023_2022))userdat <- read.csv("usernames_nmfs_rug.csv")
usernames_rug <- userdat$GitHub.username[userdat$GitHub.username != ""]
userdat <- read.csv("usernames_other_noaa.csv")
usernames_noaa <- c(usernames_rug, userdat$username[userdat$username != ""])
usernames_noaa <- stringr::str_trim(usernames_noaa)#https://api.github.com/repos/NOAA-FIMS/FIMS/contributors
# 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
library(dplyr)
# tbl <- list()
for(org in unique(org.df$GH_org)[!(unique(org.df$GH_org) %in% names(tbl))]){
df <- org.df %>% subset(GH_org == org)
df.repo <- NULL
for(reponame in df$name){
url <- paste0("https://api.github.com/repos/", org, "/", reponame, "/contributors")
res <- httr::GET(url,
httr::add_headers(authorization = "token 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"))
if(rawToChar(res$content)=="") next
dat <- jsonlite::fromJSON(rawToChar(res$content))
if(length(dat) == 0) next
dat$org <- org
dat$repo <- reponame
df.repo <- rbind(df.repo, dat)
}
tbl[[org]] <- df.repo
cat(org, "\n")
}
contributortabs <- tbldf.contributors <- bind_rows(contributortabs) %>%
group_by(login) %>%
summarize(nrepos = length(login),
norgs = length(org),
ncontrib = sum(contributions, na.rm=TRUE))
usernames_orgs <- df.contributors$login[!(df.contributors$login %in% usernames)]usernames_all <- c(usernames_noaa, usernames_orgs)
usernames_all <- unique(usernames_all)
usernames_noaa <- c(usernames_noaa,
usernames_all[stringr::str_detect(usernames_all, "NOAA") |
stringr::str_detect(usernames_all, "noaa") |
stringr::str_detect(usernames_all, "Noaa")])
tmp <- bind_rows(usertabs)
usernames_noaa <- c(usernames_noaa, tmp$login[stringr::str_detect(tmp$email, "noaa") & !is.na(tmp$email)])
for(i in c("NOAA", "NMFS", "NEFSC", "PIFSC", "SWFSC", "SEFSC", "GARFO", "AFSC")){
usernames_noaa <- c(usernames_noaa, tmp$login[stringr::str_detect(tmp$company, i) & !is.na(tmp$company)])
usernames_noaa <- c(usernames_noaa, tmp$login[stringr::str_detect(tmp$bio, i) & !is.na(tmp$bio)])
}
usernames_noaa <- c(usernames_noaa, usernames_rug)
usernames_noaa <- unique(usernames_noaa)
usernames_not_noaa <- read.csv("usernames_not_noaa.csv")$username
# sort(usernames_all[!(usernames_all %in% usernames_noaa) & !(usernames_all %in% usernames_not_noaa)])# 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
library(dplyr)
tbl <- list()
#tbl <- userrepotabs
for(username in usernames_noaa[which(!(usernames_noaa %in% names(tbl)))]){
dat <- NULL
if(is.null(usertabs[[username]]$public_repos)) next
for(i in 1:(1+floor(usertabs[[username]]$public_repos/100))){
url <- paste0("https://api.github.com/users/", username, "/repos?per_page=100&page=i")
res <- httr::GET(url,
httr::add_headers(authorization = "token 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"))
dat <- bind_rows(dat, jsonlite::fromJSON(rawToChar(res$content)))
}
tbl[[username]] <- dat
cat(username, " ")
}
userrepotabs <- tbl# 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
library(dplyr)
# tbl <- list()
# tbl <- usertabs
for(username in usernames_all[which(!(usernames_all %in% names(tbl)))]){
url <- paste0("https://api.github.com/users/", username)
res <- httr::GET(url,
httr::add_headers(authorization = "token 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"))
dat <- jsonlite::fromJSON(rawToChar(res$content))
tbl[[username]] <- dat
cat(username, "\n")
}
usertabs <- tbldf.user <- bind_rows(usertabs, .id = "username") %>%
subset(username %in% usernames_noaa) %>%
summarize(nrepos = sum(public_repos, na.rm=TRUE))Make the tables
library(gt)
nrepos_by_org %>% ungroup() %>% gt() %>%
# tab_style(
# style = cell_fill(color = "lightblue"),
# locations = cells_body(rows = type=="Enterprise GH org")
# ) %>%
cols_label(
nrepos = html("Num<br>Repos"),
updated_2023_2022 = html("Updated in<br>2023-2022"),
updated_2021_2020 = html("Updated in<br>2021-2020")
) %>%
cols_hide(columns = c(type)) %>%
cols_align(
align = "center",
columns = c(nrepos, updated_2023_2022, updated_2021_2020)
)| GH_org | Num Repos |
Updated in 2023-2022 |
Updated in 2021-2020 |
|---|---|---|---|
| nmfs-fish-tools | 67 | 53 | 13 |
| NEFSC | 66 | 41 | 25 |
| nmfs-opensci | 34 | 30 | 4 |
| afsc-gap-products | 31 | 27 | 3 |
| NOAA-EDAB | 42 | 26 | 12 |
| afsc-assessments | 32 | 26 | 6 |
| rverse-tutorials | 38 | 24 | 12 |
| pfmc-assessments | 22 | 20 | 2 |
| noaa-fims | 17 | 15 | 2 |
| us-amlr | 16 | 13 | 3 |
| PIFSCstockassessments | 12 | 10 | 1 |
| ecosystem-state | 10 | 10 | 0 |
| nmfs-stock-synthesis | 10 | 10 | 0 |
| noaa-fisheries-integrated-toolbox | 11 | 10 | 1 |
| nwfsc-cb | 10 | 8 | 2 |
| nwfsc-math-bio | 11 | 8 | 1 |
| NWFSC-OA-lab | 15 | 7 | 8 |
| nwfsc-fram | 17 | 6 | 11 |
| PIFSC-NMFS-NOAA | 5 | 5 | 0 |
| SEFSC | 5 | 5 | 0 |
| SWFSC | 5 | 5 | 0 |
| NMML | 21 | 5 | 0 |
| pacific-hake | 10 | 5 | 3 |
| TIDE-NWFSC | 4 | 4 | 0 |
| futureseas | 5 | 4 | 1 |
| noaa-iea | 20 | 4 | 16 |
| alaska-groundfish-efh | 3 | 3 | 0 |
| NOAA-FEAT | 3 | 2 | 1 |
| noaa-garfo | 3 | 2 | 1 |
| ss3sim | 12 | 2 | 3 |
| afsc-ecofoci | 1 | 1 | 0 |
| r4ss | 4 | 1 | 2 |
library(gt)
tmp <- bind_rows(userrepotabs, .id = "username") %>%
subset(username %in% usernames_noaa) %>%
mutate(type="Individual account") %>%
group_by(type) %>%
summarize(n = length(unique(username)),
nrepos = length(name),
updated_2023_2022 = sum(lubridate::year(updated_at) %in% c("2023", "2022"), na.rm=TRUE),
updated_2021_2020 = sum(lubridate::year(updated_at) %in% c("2021", "2020"), na.rm=TRUE))
bind_rows(nrepos_by_type, tmp) %>%
ungroup() %>% gt() %>%
tab_style(
style = cell_fill(color = "lightgrey"),
locations = cells_body(rows = type=="Individual account")
) %>%
cols_label(
nrepos = html("Num<br>Repos"),
updated_2023_2022 = html("Updated in<br>2023-2022"),
updated_2021_2020 = html("Updated in<br>2021-2020")
) %>%
cols_align(
align = "center",
columns = c(nrepos, updated_2023_2022, updated_2021_2020)
) %>%
tab_header(
title = md(paste("Public NMFS Repositories on GitHub", Sys.Date())),
subtitle = md("non-Enterprise GH orgs, Enterprise GH orgs, work personal accounts")
) %>%
tab_source_note(
source_note = "GH org = GitHub organization. It is like a GitHub account where groups of GitHub users collaborate on a collection of repositories and manage those repositories."
) %>%
tab_source_note(
source_note = "Note: the vast majority of repos on individual accounts are 'sandboxy' in nature and are not products per se."
)| Public NMFS Repositories on GitHub 2023-04-05 | ||||
| non-Enterprise GH orgs, Enterprise GH orgs, work personal accounts | ||||
| type | n | Num Repos |
Updated in 2023-2022 |
Updated in 2021-2020 |
|---|---|---|---|---|
| non-Enterprise GH org | 28 | 481 | 336 | 108 |
| Enterprise GH org | 4 | 81 | 56 | 25 |
| Individual account | 229 | 2827 | 951 | 914 |
| GH org = GitHub organization. It is like a GitHub account where groups of GitHub users collaborate on a collection of repositories and manage those repositories. | ||||
| Note: the vast majority of repos on individual accounts are 'sandboxy' in nature and are not products per se. | ||||