library ("pkgstatsAnalyses")
here <- here::here ()
v_data_dir <- file.path (here, "vignettes", "data")
logmean <- function (x, ...) {
    10 ^ mean (log10 (x [which (x > 0)]), na.rm = TRUE)
}

datafile <- file.path (here, "data-raw", "pkgstats-results.Rds")
fig04_png <- file.path (here, "vignettes", "figures", "_fig04.png")
x <- load_pkgstats_data (datafile, raw = TRUE, latest = FALSE)
x$num_languages <- vapply (strsplit (x$languages, ","), length,
                           integer (1))
x$doclines_per_fn_exp_mn [which (!is.finite (x$doclines_per_fn_exp_mn))] <- NA
f <- file.path (here, "data-raw", "languages.Rds")
langs_exists <- file.exists (f)
langs_exists <- TRUE # disable next 2 chunks
lang_one_year <- function (x, year = 2015) {
    x1 <- x |>
        filter (year <= !!year) |>
        group_by (package) |>
        slice_max (date) |>
        mutate (nlangs = mean (num_languages))

    return (mean (x1$nlangs))
}
years <- sort (unique (x$year))
n <- vapply (years, function (i) lang_one_year (x, i),
             numeric (1))
dat <- data.frame (year = years, n = n)
saveRDS (dat, f)
# Not used!
readRDS (f) |>
    rename (num_languages = n) |>
    ggplot (aes (x = year, y = num_languages)) +
        geom_line () -> p1
f <- file.path (v_data_dir, "fig04-fns-per-year.Rds")
fns_per_year_exists <- file.exists (f)
calcfns_per_year <- !fns_per_year_exists & !file.exists (fig04_png)
fns_per_year <- function (x, year = 2015) {

    x1 <- x |>
        filter (year <= !!year) |>
        group_by (package) |>
        slice_max (date)

    x1$loc_R [which (is.na (x1$loc_R))] <- 0L
    x1$loc_src [which (is.na (x1$loc_src))] <- 0L
    x1$loc_inst [which (is.na (x1$loc_inst))] <- 0L
    loc <- x1$loc_R + x1$loc_src + x1$loc_inst

    c (year = year,
       loc = logmean (loc),
       loc_R = logmean (x1$loc_R, na.rm = TRUE),
       loc_src = logmean (x1$loc_src, na.rm = TRUE),
       loc_inst = logmean (x1$loc_inst, na.rm = TRUE),
       loc_vignettes = logmean (x1$loc_vignettes, na.rm = TRUE),
       loc_tests = logmean (x1$loc_tests, na.rm = TRUE),
       n_fns_r_exported = logmean (x1$n_fns_r_exported, na.rm = TRUE),
       n_fns_r_not_exported = logmean (x1$n_fns_r_not_exported, na.rm = TRUE),
       n_fns_src = logmean (x1$n_fns_src, na.rm = TRUE),
       n_fns_per_file_r = logmean (x1$n_fns_per_file_r, na.rm = TRUE),
       n_fns_per_file_src = logmean (x1$n_fns_per_file_src, na.rm = TRUE),
       npars_exported_mn = logmean (x1$npars_exported_mn, na.rm = TRUE),
       npars_exported_md = logmean (x1$npars_exported_md, na.rm = TRUE),
       loc_per_fn_r_mn = logmean (x1$loc_per_fn_r_mn, na.rm = TRUE),
       loc_per_fn_r_md = logmean (x1$loc_per_fn_r_md, na.rm = TRUE),
       loc_per_fn_r_exp_mn = logmean (x1$loc_per_fn_r_exp_mn, na.rm = TRUE),
       loc_per_fn_r_exp_md = logmean (x1$loc_per_fn_r_exp_md, na.rm = TRUE),
       loc_per_fn_r_not_exp_mn = logmean (x1$loc_per_fn_r_not_exp_mn, na.rm = TRUE),
       loc_per_fn_r_not_exp_md = logmean (x1$loc_per_fn_r_not_exp_md, na.rm = TRUE),
       loc_per_fn_src_mn = logmean (x1$loc_per_fn_src_mn, na.rm = TRUE),
       loc_per_fn_src_md = logmean (x1$loc_per_fn_src_md, na.rm = TRUE),
       doclines_per_fn_exp_mn = logmean (x1$doclines_per_fn_exp_mn, na.rm = TRUE),
       doclines_per_fn_exp_md = logmean (x1$doclines_per_fn_exp_md, na.rm = TRUE),
       doclines_per_fn_not_exp_mn = logmean (x1$doclines_per_fn_not_exp_mn, na.rm = TRUE),
       doclines_per_fn_not_exp_md = logmean (x1$doclines_per_fn_not_exp_md, na.rm = TRUE),
       docchars_per_par_exp_mn = logmean (x1$docchars_per_par_exp_mn, na.rm = TRUE),
       docchars_per_par_exp_md = logmean (x1$docchars_per_par_exp_md, na.rm = TRUE))
}
years <- sort (unique (x$year))
dat <- lapply (years, function (i) fns_per_year (x, i))
dat <- data.frame (do.call (rbind, dat))
dat$what <- "cran_by_year"

dat$loc_inst [which (!is.finite (dat$loc_inst))] <- NA

x$loc_R [which (is.na (x$loc_R))] <- 0L
x$loc_src [which (is.na (x$loc_src))] <- 0L
x$loc_inst [which (is.na (x$loc_inst))] <- 0L
x$loc_total <- x$loc_R + x$loc_src + x$loc_inst

xs <- x |>
    group_by (year) |>
    summarise (
        loc = logmean (loc_total),
        loc_R = logmean (loc_R, na.rm = TRUE),
        loc_src = logmean (loc_src, na.rm = TRUE),
        loc_inst = logmean (loc_inst, na.rm = TRUE),
        loc_vignettes = logmean (loc_vignettes, na.rm = TRUE),
        loc_tests = logmean (loc_tests, na.rm = TRUE),
        n_fns_r_exported = logmean (n_fns_r_exported, na.rm = TRUE),
        n_fns_r_not_exported = logmean (n_fns_r_not_exported, na.rm = TRUE),
        n_fns_src = logmean (n_fns_src, na.rm = TRUE),
        n_fns_per_file_r = logmean (n_fns_per_file_r, na.rm = TRUE),
        n_fns_per_file_src = logmean (n_fns_per_file_src, na.rm = TRUE),
        npars_exported_mn = logmean (npars_exported_mn, na.rm = TRUE),
        npars_exported_md = logmean (npars_exported_md, na.rm = TRUE),
        loc_per_fn_r_mn = logmean (loc_per_fn_r_mn, na.rm = TRUE),
        loc_per_fn_r_md = logmean (loc_per_fn_r_md, na.rm = TRUE),
        loc_per_fn_r_exp_mn = logmean (loc_per_fn_r_exp_mn, na.rm = TRUE),
        loc_per_fn_r_exp_md = logmean (loc_per_fn_r_exp_md, na.rm = TRUE),
        loc_per_fn_r_not_exp_mn = logmean (loc_per_fn_r_not_exp_mn, na.rm = TRUE),
        loc_per_fn_r_not_exp_md = logmean (loc_per_fn_r_not_exp_md, na.rm = TRUE),
        loc_per_fn_src_mn = logmean (loc_per_fn_src_mn, na.rm = TRUE),
        loc_per_fn_src_md = logmean (loc_per_fn_src_md, na.rm = TRUE),
        doclines_per_fn_exp_mn = logmean (doclines_per_fn_exp_mn, na.rm = TRUE),
        doclines_per_fn_exp_md = logmean (doclines_per_fn_exp_md, na.rm = TRUE),
        doclines_per_fn_not_exp_mn = logmean (doclines_per_fn_not_exp_mn, na.rm = TRUE),
        doclines_per_fn_not_exp_md = logmean (doclines_per_fn_not_exp_md, na.rm = TRUE),
        docchars_per_par_exp_mn = logmean (docchars_per_par_exp_mn, na.rm = TRUE),
        docchars_per_par_exp_md = logmean (docchars_per_par_exp_md, na.rm = TRUE))
xs$what <- "annual"

saveRDS (rbind (dat, xs), f)
loc <- readRDS (f)
loc_cran <- loc |>
    filter (what == "cran_by_year") |>
    select (c (year, loc, loc_R, loc_src, loc_inst)) |>
    rename (total = loc,
            R = loc_R,
            src = loc_src,
            inst = loc_inst) |>
    pivot_longer (c (total, R, src, inst)) |>
    rename (type = name, number = value)
loc |>
    filter (what == "annual") |>
    select (c (year, loc, loc_R, loc_src, loc_inst)) |>
    rename (total = loc,
            R = loc_R,
            src = loc_src,
            inst = loc_inst) |>
    #mutate (total = total * max (inst, na.rm = TRUE) / max (total, na.rm = TRUE)) |>
    pivot_longer (c (total, R, src, inst)) |>
    rename (type = name, number = value) |>
    ggplot (aes (x = year, y = number, colour = type)) +
        geom_line () +
        #geom_line (data = loc_cran, show.legend = FALSE, lty = 2) +
        theme (legend.title = element_blank(),
               legend.position = c (0.7, 0.85),
               legend.background = element_rect(fill='transparent', colour='transparent')) +
        ylab ("Lines of Code") +
        guides (color = guide_legend (ncol = 2)) +
        ggtitle ("A: Lines of Code") -> p1
loc_cran <- readRDS (f) |>
    filter (what == "cran_by_year") |>
    select (c (year, loc_per_fn_r_exp_mn, loc_per_fn_r_not_exp_mn,
               loc_per_fn_src_mn)) |>
    rename (R_exp = loc_per_fn_r_exp_mn,
            R_non = loc_per_fn_r_not_exp_mn,
            src = loc_per_fn_src_mn) |>
    pivot_longer (c (R_exp, R_non, src)) |>
    rename (type = name, number = value)
x <- readRDS (f) |>
    filter (what == "annual") |>
    select (c (year, loc_per_fn_r_exp_mn, loc_per_fn_r_not_exp_mn,
               loc_per_fn_src_mn)) |>
    rename (R_exp = loc_per_fn_r_exp_mn,
            R_non = loc_per_fn_r_not_exp_mn,
            src = loc_per_fn_src_mn)
x |>
    pivot_longer (c (R_exp, R_non, src)) |>
    rename (type = name, number = value) |>
    ggplot (aes (x = year, y = number, colour = type)) +
        geom_line () +
        #geom_line (data = loc_cran, show.legend = FALSE, lty = 2) +
        theme (legend.title = element_blank(),
               legend.position = c (0.8, 0.8),
               legend.background = element_rect(fill='transparent', colour='transparent')) +
        ylab ("Lines of Code") +
        scale_y_log10 () +
        ggtitle ("B: LoC per fn") -> p2

# decrease since 2015:
x15 <- x [x$year >= 2015, ]
slope_R_exp <- lm (R_exp ~ year, data = x15)$coefficients [2]
slope_R_non <- lm (R_non ~ year, data = x15)$coefficients [2]
slope_src <- lm (src ~ year, data = x15)$coefficients [2]
#1 / slope_R_exp; 1 / slope_R_non; 1 / slope_src
nfns_cran <- readRDS (f) |>
    filter (what == "cran_by_year") |>
    select (c (year, n_fns_r_exported, n_fns_r_not_exported, n_fns_src)) |>
    rename (r_exp = n_fns_r_exported,
            r_non = n_fns_r_not_exported,
            src = n_fns_src) |>
    pivot_longer (c (r_exp, r_non, src)) |>
    rename (type = name, number = value)
readRDS (f) |>
    filter (what == "annual") |>
    select (c (year, n_fns_r_exported, n_fns_r_not_exported, n_fns_src)) |>
    rename (r_exp = n_fns_r_exported,
            r_non = n_fns_r_not_exported,
            src = n_fns_src) |>
    pivot_longer (c (r_exp, r_non, src)) |>
    rename (type = name, number = value) |>
    ggplot (aes (x = year, y = number, colour = type)) +
        geom_line () +
        #geom_line (data = nfns_cran, show.legend = FALSE, lty = 2) +
        theme (legend.title = element_blank(),
               legend.position = c (0.15, 0.85),
               legend.background = element_rect(fill='transparent', colour='transparent')) +
        ylab ("Numbers of functions") +
        ggtitle ("C: Total functions") -> p3
nfns_cran <- readRDS (f) |>
    filter (what == "cran_by_year") |>
    select (c (year, n_fns_per_file_r, n_fns_per_file_src)) |>
    rename (R = n_fns_per_file_r,
            src = n_fns_per_file_src) |>
    pivot_longer (c (R, src)) |>
    rename (type = name, number = value)
readRDS (f) |>
    filter (what == "annual") |>
    select (c (year, n_fns_per_file_r, n_fns_per_file_src)) |>
    rename (R = n_fns_per_file_r,
            src = n_fns_per_file_src) |>
    pivot_longer (c (R, src)) |>
    rename (type = name, number = value) |>
    ggplot (aes (x = year, y = number, colour = type)) +
        geom_line () +
        #geom_line (data = nfns_cran, show.legend = FALSE, lty = 2) +
        theme (legend.title = element_blank(),
               legend.position = c (0.85, 0.9),
               legend.background = element_rect(fill='transparent', colour='transparent')) +
        ylab ("Numbers of functions") +
        ggtitle ("D: Functions per file") -> p4
png (fig04_png, width = 7, height = 7, units = "in", res = 300)
(p1 + p2) / (p3 + p4)
graphics.off ()
knitr::include_graphics (fig04_png)


mpadge/pkgstats-analyses documentation built on Dec. 10, 2022, 4:20 a.m.