library ("pkgstatsAnalyses") here <- here::here () v_data_dir <- file.path (here, "vignettes", "data") logmean <- function (x, ...) { 10 ^ mean (log10 (x [which (x > 0)]), na.rm = TRUE) } datafile <- file.path (here, "data-raw", "pkgstats-results.Rds") fig04_png <- file.path (here, "vignettes", "figures", "_fig04.png")
x <- load_pkgstats_data (datafile, raw = TRUE, latest = FALSE) x$num_languages <- vapply (strsplit (x$languages, ","), length, integer (1)) x$doclines_per_fn_exp_mn [which (!is.finite (x$doclines_per_fn_exp_mn))] <- NA
f <- file.path (here, "data-raw", "languages.Rds") langs_exists <- file.exists (f) langs_exists <- TRUE # disable next 2 chunks
lang_one_year <- function (x, year = 2015) { x1 <- x |> filter (year <= !!year) |> group_by (package) |> slice_max (date) |> mutate (nlangs = mean (num_languages)) return (mean (x1$nlangs)) } years <- sort (unique (x$year)) n <- vapply (years, function (i) lang_one_year (x, i), numeric (1)) dat <- data.frame (year = years, n = n) saveRDS (dat, f)
# Not used! readRDS (f) |> rename (num_languages = n) |> ggplot (aes (x = year, y = num_languages)) + geom_line () -> p1
f <- file.path (v_data_dir, "fig04-fns-per-year.Rds") fns_per_year_exists <- file.exists (f) calcfns_per_year <- !fns_per_year_exists & !file.exists (fig04_png)
fns_per_year <- function (x, year = 2015) { x1 <- x |> filter (year <= !!year) |> group_by (package) |> slice_max (date) x1$loc_R [which (is.na (x1$loc_R))] <- 0L x1$loc_src [which (is.na (x1$loc_src))] <- 0L x1$loc_inst [which (is.na (x1$loc_inst))] <- 0L loc <- x1$loc_R + x1$loc_src + x1$loc_inst c (year = year, loc = logmean (loc), loc_R = logmean (x1$loc_R, na.rm = TRUE), loc_src = logmean (x1$loc_src, na.rm = TRUE), loc_inst = logmean (x1$loc_inst, na.rm = TRUE), loc_vignettes = logmean (x1$loc_vignettes, na.rm = TRUE), loc_tests = logmean (x1$loc_tests, na.rm = TRUE), n_fns_r_exported = logmean (x1$n_fns_r_exported, na.rm = TRUE), n_fns_r_not_exported = logmean (x1$n_fns_r_not_exported, na.rm = TRUE), n_fns_src = logmean (x1$n_fns_src, na.rm = TRUE), n_fns_per_file_r = logmean (x1$n_fns_per_file_r, na.rm = TRUE), n_fns_per_file_src = logmean (x1$n_fns_per_file_src, na.rm = TRUE), npars_exported_mn = logmean (x1$npars_exported_mn, na.rm = TRUE), npars_exported_md = logmean (x1$npars_exported_md, na.rm = TRUE), loc_per_fn_r_mn = logmean (x1$loc_per_fn_r_mn, na.rm = TRUE), loc_per_fn_r_md = logmean (x1$loc_per_fn_r_md, na.rm = TRUE), loc_per_fn_r_exp_mn = logmean (x1$loc_per_fn_r_exp_mn, na.rm = TRUE), loc_per_fn_r_exp_md = logmean (x1$loc_per_fn_r_exp_md, na.rm = TRUE), loc_per_fn_r_not_exp_mn = logmean (x1$loc_per_fn_r_not_exp_mn, na.rm = TRUE), loc_per_fn_r_not_exp_md = logmean (x1$loc_per_fn_r_not_exp_md, na.rm = TRUE), loc_per_fn_src_mn = logmean (x1$loc_per_fn_src_mn, na.rm = TRUE), loc_per_fn_src_md = logmean (x1$loc_per_fn_src_md, na.rm = TRUE), doclines_per_fn_exp_mn = logmean (x1$doclines_per_fn_exp_mn, na.rm = TRUE), doclines_per_fn_exp_md = logmean (x1$doclines_per_fn_exp_md, na.rm = TRUE), doclines_per_fn_not_exp_mn = logmean (x1$doclines_per_fn_not_exp_mn, na.rm = TRUE), doclines_per_fn_not_exp_md = logmean (x1$doclines_per_fn_not_exp_md, na.rm = TRUE), docchars_per_par_exp_mn = logmean (x1$docchars_per_par_exp_mn, na.rm = TRUE), docchars_per_par_exp_md = logmean (x1$docchars_per_par_exp_md, na.rm = TRUE)) } years <- sort (unique (x$year)) dat <- lapply (years, function (i) fns_per_year (x, i)) dat <- data.frame (do.call (rbind, dat)) dat$what <- "cran_by_year" dat$loc_inst [which (!is.finite (dat$loc_inst))] <- NA x$loc_R [which (is.na (x$loc_R))] <- 0L x$loc_src [which (is.na (x$loc_src))] <- 0L x$loc_inst [which (is.na (x$loc_inst))] <- 0L x$loc_total <- x$loc_R + x$loc_src + x$loc_inst xs <- x |> group_by (year) |> summarise ( loc = logmean (loc_total), loc_R = logmean (loc_R, na.rm = TRUE), loc_src = logmean (loc_src, na.rm = TRUE), loc_inst = logmean (loc_inst, na.rm = TRUE), loc_vignettes = logmean (loc_vignettes, na.rm = TRUE), loc_tests = logmean (loc_tests, na.rm = TRUE), n_fns_r_exported = logmean (n_fns_r_exported, na.rm = TRUE), n_fns_r_not_exported = logmean (n_fns_r_not_exported, na.rm = TRUE), n_fns_src = logmean (n_fns_src, na.rm = TRUE), n_fns_per_file_r = logmean (n_fns_per_file_r, na.rm = TRUE), n_fns_per_file_src = logmean (n_fns_per_file_src, na.rm = TRUE), npars_exported_mn = logmean (npars_exported_mn, na.rm = TRUE), npars_exported_md = logmean (npars_exported_md, na.rm = TRUE), loc_per_fn_r_mn = logmean (loc_per_fn_r_mn, na.rm = TRUE), loc_per_fn_r_md = logmean (loc_per_fn_r_md, na.rm = TRUE), loc_per_fn_r_exp_mn = logmean (loc_per_fn_r_exp_mn, na.rm = TRUE), loc_per_fn_r_exp_md = logmean (loc_per_fn_r_exp_md, na.rm = TRUE), loc_per_fn_r_not_exp_mn = logmean (loc_per_fn_r_not_exp_mn, na.rm = TRUE), loc_per_fn_r_not_exp_md = logmean (loc_per_fn_r_not_exp_md, na.rm = TRUE), loc_per_fn_src_mn = logmean (loc_per_fn_src_mn, na.rm = TRUE), loc_per_fn_src_md = logmean (loc_per_fn_src_md, na.rm = TRUE), doclines_per_fn_exp_mn = logmean (doclines_per_fn_exp_mn, na.rm = TRUE), doclines_per_fn_exp_md = logmean (doclines_per_fn_exp_md, na.rm = TRUE), doclines_per_fn_not_exp_mn = logmean (doclines_per_fn_not_exp_mn, na.rm = TRUE), doclines_per_fn_not_exp_md = logmean (doclines_per_fn_not_exp_md, na.rm = TRUE), docchars_per_par_exp_mn = logmean (docchars_per_par_exp_mn, na.rm = TRUE), docchars_per_par_exp_md = logmean (docchars_per_par_exp_md, na.rm = TRUE)) xs$what <- "annual" saveRDS (rbind (dat, xs), f)
loc <- readRDS (f) loc_cran <- loc |> filter (what == "cran_by_year") |> select (c (year, loc, loc_R, loc_src, loc_inst)) |> rename (total = loc, R = loc_R, src = loc_src, inst = loc_inst) |> pivot_longer (c (total, R, src, inst)) |> rename (type = name, number = value) loc |> filter (what == "annual") |> select (c (year, loc, loc_R, loc_src, loc_inst)) |> rename (total = loc, R = loc_R, src = loc_src, inst = loc_inst) |> #mutate (total = total * max (inst, na.rm = TRUE) / max (total, na.rm = TRUE)) |> pivot_longer (c (total, R, src, inst)) |> rename (type = name, number = value) |> ggplot (aes (x = year, y = number, colour = type)) + geom_line () + #geom_line (data = loc_cran, show.legend = FALSE, lty = 2) + theme (legend.title = element_blank(), legend.position = c (0.7, 0.85), legend.background = element_rect(fill='transparent', colour='transparent')) + ylab ("Lines of Code") + guides (color = guide_legend (ncol = 2)) + ggtitle ("A: Lines of Code") -> p1
loc_cran <- readRDS (f) |> filter (what == "cran_by_year") |> select (c (year, loc_per_fn_r_exp_mn, loc_per_fn_r_not_exp_mn, loc_per_fn_src_mn)) |> rename (R_exp = loc_per_fn_r_exp_mn, R_non = loc_per_fn_r_not_exp_mn, src = loc_per_fn_src_mn) |> pivot_longer (c (R_exp, R_non, src)) |> rename (type = name, number = value) x <- readRDS (f) |> filter (what == "annual") |> select (c (year, loc_per_fn_r_exp_mn, loc_per_fn_r_not_exp_mn, loc_per_fn_src_mn)) |> rename (R_exp = loc_per_fn_r_exp_mn, R_non = loc_per_fn_r_not_exp_mn, src = loc_per_fn_src_mn) x |> pivot_longer (c (R_exp, R_non, src)) |> rename (type = name, number = value) |> ggplot (aes (x = year, y = number, colour = type)) + geom_line () + #geom_line (data = loc_cran, show.legend = FALSE, lty = 2) + theme (legend.title = element_blank(), legend.position = c (0.8, 0.8), legend.background = element_rect(fill='transparent', colour='transparent')) + ylab ("Lines of Code") + scale_y_log10 () + ggtitle ("B: LoC per fn") -> p2 # decrease since 2015: x15 <- x [x$year >= 2015, ] slope_R_exp <- lm (R_exp ~ year, data = x15)$coefficients [2] slope_R_non <- lm (R_non ~ year, data = x15)$coefficients [2] slope_src <- lm (src ~ year, data = x15)$coefficients [2] #1 / slope_R_exp; 1 / slope_R_non; 1 / slope_src
nfns_cran <- readRDS (f) |> filter (what == "cran_by_year") |> select (c (year, n_fns_r_exported, n_fns_r_not_exported, n_fns_src)) |> rename (r_exp = n_fns_r_exported, r_non = n_fns_r_not_exported, src = n_fns_src) |> pivot_longer (c (r_exp, r_non, src)) |> rename (type = name, number = value) readRDS (f) |> filter (what == "annual") |> select (c (year, n_fns_r_exported, n_fns_r_not_exported, n_fns_src)) |> rename (r_exp = n_fns_r_exported, r_non = n_fns_r_not_exported, src = n_fns_src) |> pivot_longer (c (r_exp, r_non, src)) |> rename (type = name, number = value) |> ggplot (aes (x = year, y = number, colour = type)) + geom_line () + #geom_line (data = nfns_cran, show.legend = FALSE, lty = 2) + theme (legend.title = element_blank(), legend.position = c (0.15, 0.85), legend.background = element_rect(fill='transparent', colour='transparent')) + ylab ("Numbers of functions") + ggtitle ("C: Total functions") -> p3
nfns_cran <- readRDS (f) |> filter (what == "cran_by_year") |> select (c (year, n_fns_per_file_r, n_fns_per_file_src)) |> rename (R = n_fns_per_file_r, src = n_fns_per_file_src) |> pivot_longer (c (R, src)) |> rename (type = name, number = value) readRDS (f) |> filter (what == "annual") |> select (c (year, n_fns_per_file_r, n_fns_per_file_src)) |> rename (R = n_fns_per_file_r, src = n_fns_per_file_src) |> pivot_longer (c (R, src)) |> rename (type = name, number = value) |> ggplot (aes (x = year, y = number, colour = type)) + geom_line () + #geom_line (data = nfns_cran, show.legend = FALSE, lty = 2) + theme (legend.title = element_blank(), legend.position = c (0.85, 0.9), legend.background = element_rect(fill='transparent', colour='transparent')) + ylab ("Numbers of functions") + ggtitle ("D: Functions per file") -> p4
png (fig04_png, width = 7, height = 7, units = "in", res = 300) (p1 + p2) / (p3 + p4) graphics.off ()
knitr::include_graphics (fig04_png)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.