library ("pkgstatsAnalyses")
here <- here::here ()
v_data_dir <- file.path (here, "vignettes", "data")

datafile <- file.path (here, "data-raw", "pkgstats-results.Rds")
f_fig10 <- file.path (v_data_dir, "fig10.Rds")

fig10_png <- file.path (here, "vignettes", "figures", "_fig10.png")
calc_fig10 <- !file.exists (f_fig10) & !file.exists (fig10_png)
x <- load_pkgstats_data (datafile, raw = TRUE, latest = FALSE)
do1 <- function (x, year = 2018, cran_by_year = TRUE) {

    # message (year)

    if (cran_by_year) {
        xy <- x |>
            filter (year <= !!year) |>
            group_by (package) |>
            slice_max (date)
    } else {
        xy <- x |>
            filter (year == !!year)
    }

    tot_R <- xy$blank_lines_R + xy$comment_lines_R + xy$loc_R
    tot_src <- xy$blank_lines_src + xy$comment_lines_src + xy$loc_src

    tabs <- length (which (xy$indentation < 0)) /
        length (which (!is.na (xy$indentation)))
    indentation <- mean (xy$indentation [xy$indentation >= 0], na.rm = TRUE)

    c (year = year,
       blank_R = mean (xy$blank_lines_R / tot_R, na.rm = TRUE),
       blank_src = mean (xy$blank_lines_src / tot_src, na.rm = TRUE),
       comment_R = mean (xy$comment_lines_R / tot_R, na.rm = TRUE),
       comment_src = mean (xy$comment_lines_src / tot_src, na.rm = TRUE),
       rel_space_R = mean (xy$rel_space_R, na.rm = TRUE),
       tabs = tabs,
       indentation = indentation)
}
years <- sort (unique (x$year))

n1 <- vapply (years, function (y) do1 (x, y, cran_by_year = TRUE), numeric (8))
n1 <- data.frame (t (n1))
n1$what <- "cran_by_year"

n2 <- vapply (years, function (y) do1 (x, y, cran_by_year = FALSE), numeric (8))
n2 <- data.frame (t (n2))
n2$what <- "annual"
saveRDS (rbind (n1, n2), f_fig10)
x1_cran <- readRDS (f_fig10) |>
    filter (what == "cran_by_year") |>
    select (year, blank_R, blank_src, comment_R, comment_src) |>
    pivot_longer (cols = c (blank_R, blank_src, comment_R, comment_src))
p1 <- readRDS (f_fig10) |>
    filter (what == "annual") |>
    select (year, blank_R, blank_src, comment_R, comment_src) |>
    pivot_longer (cols = c (blank_R, blank_src, comment_R, comment_src)) |>
    ggplot (aes (x = year, y = value, colour = name)) +
        geom_line () +
        geom_line (data = x1_cran, lty = 2, show.legend = FALSE) +
        ggtitle ("A: Blank & comment lines") +
        ylab ("Proportion") +
        theme (legend.title = element_blank(),
               legend.position = c (0.2, 0.8),
               legend.background =
                   element_rect (fill = "transparent", colour = "transparent"))

x2 <- readRDS (f_fig10)
coeff_fig10_p2 <- max (x2$indentation) / max (x2$tabs)
colours_fig10_p2 <- c ("rel_space" = "red",
                       "tabs" = "blue",
                       "indentation" = "green")
x2_cran <- x2 |>
    filter (what == "cran_by_year") |>
    select (year, rel_space_R, tabs, indentation) |>
    rename (rel_space = rel_space_R) |>
    mutate (rel_space = rel_space * 4)

p2 <- x2 |>
    filter (what == "annual") |>
    select (year, rel_space_R, tabs, indentation) |>
    rename (rel_space = rel_space_R) |>
    mutate (rel_space = rel_space * 4) |>
    ggplot (aes (x = year)) +
    geom_line (aes (y = rel_space, col = "rel_space")) +
    geom_line (aes (y = tabs, col = "tabs")) +
    geom_line (aes (y = indentation / coeff_fig10_p2, col = "indentation")) +
    geom_line (data = x2_cran, aes (y = rel_space, col = "rel_space"), lty = 2) +
    geom_line (data = x2_cran, aes (y = tabs, col = "tabs"), lty = 2) +
    geom_line (data = x2_cran, aes (y = indentation / coeff_fig10_p2, col = "indentation"), lty = 2) +
    ylab ("space (x4) | tabs") +
    scale_y_continuous (
        sec.axis = sec_axis (~.*coeff_fig10_p2, name = "Indentation")) +
    ggtitle ("B: Indentation & Spacing") +
        theme (legend.title = element_blank(),
               legend.position = c (0.2, 0.2),
               legend.background = element_rect(fill='transparent', colour='transparent'))
f <- file.path (v_data_dir, "fig04-fns-per-year.Rds")
dat10c_cran <- readRDS (f) |>
    filter (what == "cran_by_year") |>
    select (c (year, npars_exported_mn, doclines_per_fn_exp_mn,
               docchars_per_par_exp_mn)) |>
    rename (npars = npars_exported_mn,
            doclines = doclines_per_fn_exp_mn,
            chars_per_par = docchars_per_par_exp_mn)

scale_10c <- max (dat10c_cran$npars) / max (dat10c_cran$chars_per_par)
colours_fig10_p3 <- c ("npars" = "red",
                       "doclines" = "blue",
                       "chars_per_par" = "green")

readRDS (f) |>
    filter (what == "annual") |>
    select (c (year, npars_exported_mn, doclines_per_fn_exp_mn,
               docchars_per_par_exp_mn)) |>
    rename (npars = npars_exported_mn,
            doclines = doclines_per_fn_exp_mn,
            chars_per_par = docchars_per_par_exp_mn) |>
    ggplot (aes (x = year)) +
        geom_line (data = dat10c_cran, aes (y = doclines, col = "doclines"), lty = 2) +
        geom_line (data = dat10c_cran, aes (y = chars_per_par, col = "chars_per_par"), lty = 2) +
        geom_line (data = dat10c_cran, aes (y = npars / scale_10c, col = "npars"), lty = 2) +
        geom_line (aes (y = doclines, col = "doclines")) +
        geom_line (aes (y = chars_per_par, col = "chars_per_par")) +
        geom_line (aes (y = npars / scale_10c, col = "npars")) +
        theme (legend.position = "none") +
        #theme (legend.title = element_blank(),
        #       legend.position = c (0.85, 0.9),
        #       legend.background = element_rect(fill='transparent', colour='transparent')) +
        ylab ("Documentation characters / lines") +
        scale_y_continuous (
            sec.axis = sec_axis (~.*scale_10c, name = "Num. pars.")) +
        ggtitle ("C: Params & Documentation") -> p3

# slope of npars:
s <- readRDS (f) |>
    filter (what == "annual", year >= 2005) |>
    select (c (year, npars_exported_mn)) |>
    rename (npars = npars_exported_mn)
s <- lm (npars ~ year, data = s)$coefficients [2]
#1 / s
# Average release intervals:
x <- load_pkgstats_data (datafile, raw = TRUE, latest = FALSE) |>
    group_by (package) |>
    arrange (date) |>
    summarise (interval = mean (diff (date))) |>
    filter (is.finite (interval)) |>
    mutate (interval = as.double (interval))
logmean (x$interval)
dat_10d <- load_pkgstats_data (datafile, raw = TRUE, latest = FALSE) |>
    group_by (package) |>
    mutate (seq = seq_along (package)) |>
    group_by (seq) |>
    summarise (
       npars = logmean (npars_exported_mn, na.rm = TRUE),
       doclines = logmean (doclines_per_fn_exp_mn, na.rm = TRUE),
       chars_per_par = logmean (docchars_per_par_exp_mn, na.rm = TRUE)) |>
    filter (seq <= 100)
scale_10d <- max (dat_10d$npars) / max (dat_10d$chars_per_par)
colours_fig10_p4 <- c ("npars" = "red",
                       "doclines" = "blue",
                       "chars_per_par" = "green")
dat_10d |>
    ggplot (aes (x = seq)) +
        geom_line (aes (y = doclines, col = "doclines")) +
        geom_line (aes (y = chars_per_par, col = "chars_per_par")) +
        geom_line (aes (y = npars / scale_10d, col = "npars")) +
        theme (legend.title = element_blank(),
               legend.position = c (0.7, 0.2),
               legend.background = element_rect(fill='transparent', colour='transparent')) +
        ylab ("Documentation characters / lines") +
        scale_y_continuous (
            sec.axis = sec_axis (~.*scale_10d, name = "Num. pars.")) +
        ggtitle ("D: Params & Documentation") -> p4

# slope of npars:
npars <- dat_10d |> filter (seq >= 50)
s <- lm (npars ~ seq, data = npars)$coefficients [2]
png (fig10_png, width = 7, height = 7, units = "in", res = 300)
(p1 | p2) / (p3 + p4)
graphics.off ()
knitr::include_graphics (fig10_png)


mpadge/pkgstats-analyses documentation built on Dec. 10, 2022, 4:20 a.m.