NMFS repos

url <- "https://api.github.com/rate_limit"
res <- httr::GET(url,
                 httr::add_headers(Authorization = "token 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"))
res
non.ent.orgs <- c("noaa-fisheries-integrated-toolbox", "nmfs-fish-tools", "noaa-fims",
          "noaa-iea", "ecosystem-state", "futureseas",
          "pfmc-assessments", "pacific-hake", "nmfs-stock-synthesis", "r4ss", "ss3sim",
          "nwfsc-math-bio", "nwfsc-fram", "NOAA-FEAT", "nwfsc-cb", "NWFSC-OA-lab", "TIDE-NWFSC",
          "rverse-tutorials", "nmfs-opensci",
          "NOAA-EDAB", "PIFSCstockassessments",
          "afsc-assessments", "NMML", "afsc-gap-products", "afsc-ecofoci", "alaska-groundfish-efh",
          "us-amlr", "noaa-garfo"
          )
ent.orgs <- c("NEFSC", "SEFSC", "SWFSC", "PIFSC-NMFS-NOAA")

Make a fine grained PAT.

https://api.github.com/orgs/ORG/repos

# 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
library(dplyr)
orgtabs <- list()
for(tab_type in c("non.ent.orgs", "ent.orgs")){
  orgnames <- get(tab_type)
  tbl <- list()
  update_tbl <- FALSE
for(org in orgnames[which(!(orgnames %in% names(tbl)))]){
    update_tbl <- TRUE
url <- paste0("https://api.github.com/orgs/", org, "/repos?per_page=100")
res <- httr::GET(url,
                 httr::add_headers(authorization = "token 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"))
dat <- jsonlite::fromJSON(rawToChar(res$content))
dat$org <- org
dat$license_name = ifelse(inherits(dat$license, "logical"), NA, select(dat$license, "spdx_id")$spdx_id)[1]
tbl[[org]] <- dat
cat(org, " ")
}
if(update_tbl) orgtabs[[tab_type]] <- tbl
}
# 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
library(dplyr)
orgtabs <- list()
for(tab_type in c("non.ent.orgs", "ent.orgs")){
  orgnames <- get(tab_type)
  tbl <- list()
  update_tbl <- FALSE
for(org in orgnames[which(!(orgnames %in% names(tbl)))]){
    update_tbl <- TRUE
url <- paste0("https://api.github.com/search/repositories?q=org:", org)
res <- httr::GET(url,
                 httr::add_headers(authorization = "token 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"))
dat <- jsonlite::fromJSON(rawToChar(res$content))$items
dat$org <- org
dat$license_name = ifelse(inherits(dat$license, "logical"), NA, select(dat$license, "spdx_id")$spdx_id)[1]
tbl[[org]] <- dat
}
if(update_tbl) orgtabs[[tab_type]] <- tbl
}

dat <- dat %>% arrange(desc(pushed_at)) %>% mutate(last_update = as.Date(pushed_at, “%Y-%m-%d”), topic = ifelse(is.null(unlist(topics)), ““, unlist(topics))) %>% select(c(”org”, “name”, “language”, “last_update”, “license_name”, “description”, “topic”))

Make a table of the total number of repos.

tmptbl <- list()
tmptbl[["non-Enterprise GH org"]] <- bind_rows(orgtabs[[1]], .id = "GH_org") %>% 
  mutate(last_update = as.Date(pushed_at, "%Y-%m-%d"),
         topic = ifelse(is.null(unlist(topics)), "", unlist(topics))) %>%
  select(c("GH_org", "name", "language", "last_update", "license_name", "description", "topic"))
tmptbl[["Enterprise GH org"]] <- bind_rows(orgtabs[[2]], .id = "GH_org") %>% 
  mutate(last_update = as.Date(pushed_at, "%Y-%m-%d"),
         topic = ifelse(is.null(unlist(topics)), "", unlist(topics))) %>%
  select(c("GH_org", "name", "language", "last_update", "license_name", "description", "topic"))
org.df <- bind_rows(tmptbl, .id="type")
nrepos_by_org <- org.df %>% group_by(type, GH_org) %>% 
  summarize(nrepos = length(name),
            updated_2023_2022 = sum(lubridate::year(last_update) %in% c("2023", "2022"), na.rm=TRUE),
            updated_2021_2020 = sum(lubridate::year(last_update) %in% c("2021", "2020"), na.rm=TRUE)) %>%
  arrange(desc(updated_2023_2022))
`summarise()` has grouped output by 'type'. You can override using the
`.groups` argument.
nrepos_by_type <- org.df %>% group_by(type) %>% 
  summarize(n = length(unique(GH_org)),
            nrepos = length(name),
            updated_2023_2022 = sum(lubridate::year(last_update) %in% c("2023", "2022"), na.rm=TRUE),
            updated_2021_2020 = sum(lubridate::year(last_update) %in% c("2021", "2020"), na.rm=TRUE)) %>%
  arrange(desc(updated_2023_2022))
userdat <- read.csv("usernames_nmfs_rug.csv")
usernames_rug <- userdat$GitHub.username[userdat$GitHub.username != ""]
userdat <- read.csv("usernames_other_noaa.csv")
usernames_noaa <- c(usernames_rug, userdat$username[userdat$username != ""])
usernames_noaa <- stringr::str_trim(usernames_noaa)
#https://api.github.com/repos/NOAA-FIMS/FIMS/contributors
# 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
library(dplyr)
#  tbl <- list()
for(org in unique(org.df$GH_org)[!(unique(org.df$GH_org) %in% names(tbl))]){
  df <- org.df %>% subset(GH_org == org)
  df.repo <- NULL
  for(reponame in df$name){
url <- paste0("https://api.github.com/repos/", org, "/", reponame, "/contributors")
res <- httr::GET(url,
                 httr::add_headers(authorization = "token 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"))
if(rawToChar(res$content)=="") next
dat <- jsonlite::fromJSON(rawToChar(res$content))
if(length(dat) == 0) next
dat$org <- org
dat$repo <- reponame
df.repo <- rbind(df.repo, dat)
  }
  tbl[[org]] <- df.repo
  cat(org, "\n")
}
contributortabs <- tbl
df.contributors <- bind_rows(contributortabs)  %>% 
  group_by(login) %>% 
  summarize(nrepos = length(login),
            norgs = length(org),
            ncontrib = sum(contributions, na.rm=TRUE))
usernames_orgs <- df.contributors$login[!(df.contributors$login %in% usernames)]
usernames_all <- c(usernames_noaa, usernames_orgs)
usernames_all <- unique(usernames_all)

usernames_noaa <- c(usernames_noaa, 
                    usernames_all[stringr::str_detect(usernames_all, "NOAA") |   
                                  stringr::str_detect(usernames_all, "noaa") |
                                  stringr::str_detect(usernames_all, "Noaa")])
tmp <- bind_rows(usertabs)
usernames_noaa <- c(usernames_noaa, tmp$login[stringr::str_detect(tmp$email, "noaa") & !is.na(tmp$email)])
for(i in c("NOAA", "NMFS", "NEFSC", "PIFSC", "SWFSC", "SEFSC", "GARFO", "AFSC")){
usernames_noaa <- c(usernames_noaa, tmp$login[stringr::str_detect(tmp$company, i) & !is.na(tmp$company)])
usernames_noaa <- c(usernames_noaa, tmp$login[stringr::str_detect(tmp$bio, i) & !is.na(tmp$bio)])
}
usernames_noaa <- c(usernames_noaa, usernames_rug)
usernames_noaa <- unique(usernames_noaa)

usernames_not_noaa <- read.csv("usernames_not_noaa.csv")$username

# sort(usernames_all[!(usernames_all %in% usernames_noaa) & !(usernames_all %in% usernames_not_noaa)])
# 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
library(dplyr)
tbl <- list()
#tbl <- userrepotabs
for(username in usernames_noaa[which(!(usernames_noaa %in% names(tbl)))]){
  dat <- NULL
  if(is.null(usertabs[[username]]$public_repos)) next
  for(i in 1:(1+floor(usertabs[[username]]$public_repos/100))){
url <- paste0("https://api.github.com/users/", username, "/repos?per_page=100&page=i")
res <- httr::GET(url,
                 httr::add_headers(authorization = "token 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"))
dat <- bind_rows(dat, jsonlite::fromJSON(rawToChar(res$content)))
}
tbl[[username]] <- dat
cat(username, " ")
}
userrepotabs <- tbl
# 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
library(dplyr)
# tbl <- list()
# tbl <- usertabs
for(username in usernames_all[which(!(usernames_all %in% names(tbl)))]){
url <- paste0("https://api.github.com/users/", username)
res <- httr::GET(url,
                 httr::add_headers(authorization = "token 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"))
dat <- jsonlite::fromJSON(rawToChar(res$content))
tbl[[username]] <- dat
cat(username, "\n")
}
usertabs <- tbl
df.user <- bind_rows(usertabs, .id = "username")  %>% 
  subset(username %in% usernames_noaa) %>%
  summarize(nrepos = sum(public_repos, na.rm=TRUE))

Make the tables

library(gt)
nrepos_by_org %>% ungroup() %>% gt() %>%
  #   tab_style(
  #   style = cell_fill(color = "lightblue"),
  #   locations = cells_body(rows = type=="Enterprise GH org")
  # ) %>%
   cols_label(
    nrepos = html("Num<br>Repos"),
    updated_2023_2022 = html("Updated in<br>2023-2022"),
    updated_2021_2020 = html("Updated in<br>2021-2020")
  ) %>%
  cols_hide(columns = c(type)) %>%
  cols_align(
    align = "center",
    columns = c(nrepos, updated_2023_2022, updated_2021_2020)
  )
GH_org Num
Repos
Updated in
2023-2022
Updated in
2021-2020
nmfs-fish-tools 67 53 13
NEFSC 66 41 25
nmfs-opensci 34 30 4
afsc-gap-products 31 27 3
NOAA-EDAB 42 26 12
afsc-assessments 32 26 6
rverse-tutorials 38 24 12
pfmc-assessments 22 20 2
noaa-fims 17 15 2
us-amlr 16 13 3
PIFSCstockassessments 12 10 1
ecosystem-state 10 10 0
nmfs-stock-synthesis 10 10 0
noaa-fisheries-integrated-toolbox 11 10 1
nwfsc-cb 10 8 2
nwfsc-math-bio 11 8 1
NWFSC-OA-lab 15 7 8
nwfsc-fram 17 6 11
PIFSC-NMFS-NOAA 5 5 0
SEFSC 5 5 0
SWFSC 5 5 0
NMML 21 5 0
pacific-hake 10 5 3
TIDE-NWFSC 4 4 0
futureseas 5 4 1
noaa-iea 20 4 16
alaska-groundfish-efh 3 3 0
NOAA-FEAT 3 2 1
noaa-garfo 3 2 1
ss3sim 12 2 3
afsc-ecofoci 1 1 0
r4ss 4 1 2
library(gt)
tmp <- bind_rows(userrepotabs, .id = "username")  %>% 
  subset(username %in% usernames_noaa) %>%
  mutate(type="Individual account") %>%
  group_by(type) %>% 
  summarize(n = length(unique(username)),
            nrepos = length(name),
            updated_2023_2022 = sum(lubridate::year(updated_at) %in% c("2023", "2022"), na.rm=TRUE),
            updated_2021_2020 = sum(lubridate::year(updated_at) %in% c("2021", "2020"), na.rm=TRUE))
bind_rows(nrepos_by_type, tmp) %>% 
  ungroup() %>% gt() %>%
    tab_style(
    style = cell_fill(color = "lightgrey"),
    locations = cells_body(rows = type=="Individual account")
  ) %>%
   cols_label(
    nrepos = html("Num<br>Repos"),
    updated_2023_2022 = html("Updated in<br>2023-2022"),
    updated_2021_2020 = html("Updated in<br>2021-2020")
  ) %>%
  cols_align(
    align = "center",
    columns = c(nrepos, updated_2023_2022, updated_2021_2020)
  )  %>%
  tab_header(
    title = md(paste("Public NMFS Repositories on GitHub", Sys.Date())),
    subtitle = md("non-Enterprise GH orgs, Enterprise GH orgs, work personal accounts")
  ) %>%
    tab_source_note(
    source_note = "GH org = GitHub organization. It is like a GitHub account where groups of GitHub users collaborate on a collection of repositories and manage those repositories."
  ) %>%
    tab_source_note(
    source_note = "Note: the vast majority of repos on individual accounts are 'sandboxy' in nature and are not products per se."
  )
Public NMFS Repositories on GitHub 2023-04-05
non-Enterprise GH orgs, Enterprise GH orgs, work personal accounts
type n Num
Repos
Updated in
2023-2022
Updated in
2021-2020
non-Enterprise GH org 28 481 336 108
Enterprise GH org 4 81 56 25
Individual account 229 2827 951 914
GH org = GitHub organization. It is like a GitHub account where groups of GitHub users collaborate on a collection of repositories and manage those repositories.
Note: the vast majority of repos on individual accounts are 'sandboxy' in nature and are not products per se.