library ("pkgstatsAnalyses") here <- here::here () v_data_dir <- file.path (here, "vignettes", "data") datafile <- file.path (here, "data-raw", "pkgstats-results.Rds") f_fig1_urls <- file.path (v_data_dir, "fig01-urls.Rds") calc_fig1_urls <- !file.exists (f_fig1_urls) f_fig1_aut_ctb <- file.path (v_data_dir, "fig01-aut-ctb.Rds") calc_fig1_aut_ctb <- !file.exists (f_fig1_aut_ctb) f_fig1_lic <- file.path (v_data_dir, "fig01-lic.Rds") calc_fig1_lic <- !file.exists (f_fig1_lic) fig01_png <- file.path (here, "vignettes", "figures", "_fig01.png") calc_fig1_aut_ctb <- calc_fig1_aut_ctb & !file.exists (fig01_png) calc_fig1_lic <- !file.exists (f_fig1_lic) & !file.exists (fig01_png)
x <- load_pkgstats_data (datafile, raw = TRUE, latest = FALSE) # CRAN snapshot values: do1 <- function (x, year = 2018) { # message (year) xy <- x |> filter (year <= !!year) |> group_by (package) |> slice_max (date) return (length (which (!is.na (xy$urls))) / length (xy$urls)) } years <- sort (unique (x$year)) urls <- vapply (years, function (y) do1 (x, y), numeric (1)) # Git* URLs: x_git <- x x_git$urls [is.na (x_git$urls)] <- "" do1 <- function (x, year = 2018) { # message (year) xy <- x |> filter (year <= !!year) |> group_by (package) |> slice_max (date) return (length (grep ("[bg]it", xy$urls, ignore.case = TRUE)) / length (xy$urls)) } urls_git <- vapply (years, function (y) do1 (x_git, y), numeric (1L)) # annual values: xy <- x |> group_by (year) |> summarise (prop_urls = length (which (!is.na (urls))) / length (urls)) xy$type <- "annual" xy <- rbind ( xy, data.frame ( year = years, prop_urls = urls, type = "snapshot" ) ) xy_git <- x_git |> group_by (year) |> summarise (prop_urls = length (grep ("git", urls)) / length (urls)) xy_git$type <- "annual_git" xy_git <- rbind ( xy_git, data.frame ( year = years, prop_urls = urls_git, type = "snapshot_git" ) ) saveRDS (rbind (xy, xy_git), f_fig1_urls)
x <- load_pkgstats_data (datafile, raw = TRUE, latest = FALSE) do1 <- function (x, year = 2018) { # message (year) xy <- x |> filter (year <= !!year) |> group_by (package) |> slice_max (date) c (year = year, n_aut = mean (xy$desc_n_aut), n_ctb = mean (xy$desc_n_ctb), se_aut = sd (xy$desc_n_aut) / sqrt (nrow (xy)), se_ctb = sd (xy$desc_n_ctb) / sqrt (nrow (xy))) } years <- sort (unique (x$year)) n <- vapply (years, function (y) do1 (x, y), numeric (5)) n <- data.frame (t (n)) # Then values for actual years xy <- x |> group_by (year) |> summarise ( n_aut = mean (desc_n_aut), n_ctb = mean (desc_n_ctb), se_aut = sd (desc_n_aut) / sqrt (length (desc_n_aut)), se_ctb = sd (desc_n_ctb) / sqrt (length (desc_n_aut))) n$what <- "cran_by_year" xy$what <- "annual" n <- rbind (n, xy) saveRDS (n, f_fig1_aut_ctb)
x <- load_pkgstats_data (datafile, raw = TRUE, latest = FALSE) do1 <- function (x, year = 2018) { # message (year) xy <- x |> filter (year <= !!year) |> group_by (package) |> slice_max (date) lic <- unlist (strsplit (xy$license, "\\|")) lic <- unique (gsub ("^\\s*|\\s*$", "", lic)) c (year = year, n = length (lic)) } years <- sort (unique (x$year)) nlic <- vapply (years, function (y) do1 (x, y), numeric (2)) nlic <- data.frame (t (nlic)) nlic$what <- "cran_by_year" x_y <- x |> group_by (year) |> summarise (n = length (unique (gsub ("^\\s*|\\s*$", "", unlist (strsplit (license, "\\|")))))) x_y$what <- "annual" nlic <- rbind (nlic, x_y) saveRDS (nlic, f_fig1_lic)
dat <- readRDS (f_fig1_aut_ctb) lo <- dat |> mutate (aut_lo = n_aut - se_aut, ctb_lo = n_ctb - se_ctb) |> select (year, aut_lo, ctb_lo, what) |> pivot_longer (c (aut_lo, ctb_lo)) hi <- dat |> mutate (aut_hi = n_aut + se_aut, ctb_hi = n_ctb + se_ctb) |> select (year, aut_hi, ctb_hi, what) |> pivot_longer (c (aut_hi, ctb_hi)) fig1_dat <- dat |> rename (aut = n_aut, ctb = n_ctb) |> select (year, aut, ctb, what) |> pivot_longer (cols = c (aut, ctb)) fig1_dat$lo <- lo$value fig1_dat$hi <- hi$value dat_annual <- fig1_dat [fig1_dat$what == "annual", ] fig1_dat <- fig1_dat [fig1_dat$what == "cran_by_year", ] p1 <- fig1_dat |> ggplot (aes (x = year, y = value, colour = name)) + #geom_ribbon (aes (ymin = lo, ymax = hi, fill = name, colour = NULL), # alpha = 0.15, show.legend = FALSE) + geom_line () + #geom_ribbon (data = dat_annual, # aes (ymin = lo, ymax = hi, fill = name, colour = NULL), # alpha = 0.15, show.legend = FALSE) + geom_line (data = dat_annual, lty = 2) + ylab ("Number authors / contributors") + ggtitle ("A: Num aut / ctb") + theme (legend.title = element_blank(), legend.position = c (0.2, 0.4), legend.background = element_rect(fill='transparent', colour='transparent')) dat_urls <- readRDS (f_fig1_urls) dat_urls$what <- ifelse (grepl ("git", dat_urls$type), "git", "all") dat_urls_annual <- dat_urls [grep ("annual", dat_urls$type), ] dat_urls <- dat_urls [which (!grepl ("annual", dat_urls$type)), ] p2 <- dat_urls |> ggplot (aes (x = year, y = prop_urls, colour = what)) + geom_line () + geom_line (data = dat_urls_annual, lty = 2) + ylab ("Proportion pkgs with URLs") + ggtitle ("B: Prop URLs") + theme (legend.title = element_blank(), legend.position = c (0.2, 0.9), legend.background = element_rect(fill='transparent', colour='transparent')) p3 <- readRDS (f_fig1_lic) |> mutate (lty = (what == "cran_by_year")) |> ggplot (aes (x = year, y = n, lty = lty)) + geom_line () + theme (legend.position = "none") + ylab ("Number of distinct licenses") + ggtitle ("C: Num licenses") png (fig01_png, width = 7, height = 3.5, units = "in", res = 300) p1 + p2 + p3 graphics.off ()
knitr::include_graphics (fig01_png)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.