library ("pkgstatsAnalyses") here <- here::here () v_data_dir <- file.path (here, "vignettes", "data") logmean <- function (x, ...) { 10 ^ mean (log10 (x [which (x > 0)]), na.rm = TRUE) } n_files_file <- file.path (v_data_dir, "fig03-n-files-data.Rds") n_files_exists <- file.exists (n_files_file) datafile <- file.path (here, "data-raw", "pkgstats-results.Rds") fig03_png <- file.path (here, "vignettes", "figures", "_fig03.png") f_fig3_pkg_level <- file.path (v_data_dir, "fig03-pkg-n-files.Rds") do_fig3_pkg_n_files <- !file.exists (f_fig3_pkg_level) & !file.exists (fig03_png)
# takes a few tens of seconds x <- load_pkgstats_data (datafile, raw = TRUE, latest = FALSE) nfiles_one_year <- function (x, year = 2015) { x1 <- x |> filter (year <= !!year) |> group_by (package) |> slice_max (date) |> summarise (R = logmean (files_R), src = logmean (files_src), inst = logmean (files_inst), vignettes = logmean (files_vignettes), tests = logmean (files_tests), data = logmean (num_data_files)) n_mean <- colMeans (x1 [, -1], na.rm = TRUE) c (year = year, n_mean) } dat <- vapply (sort (unique (x$year)), function (i) nfiles_one_year (x, i), numeric (7)) dat <- data.frame (t (dat)) dat$year <- lubridate::year (paste0 (dat$year, "-01-01")) xy <- x |> group_by (year) |> summarise (R = logmean (files_R), src = logmean (files_src), inst = logmean (files_inst), vignettes = logmean (files_vignettes), tests = logmean (files_tests), data = logmean (num_data_files)) dat$what <- "cran_by_year" xy$what <- "annual" dat <- rbind (dat, xy) dat$inst [!is.finite (dat$inst)] <- NA dat$vignettes [!is.finite (dat$vignettes)] <- NA dat$tests [!is.finite (dat$tests)] <- NA dat$data [!is.finite (dat$data)] <- NA saveRDS (dat, n_files_file)
x_per_pkg <- load_pkgstats_data (datafile, raw = TRUE, latest = FALSE) |> group_by (package) |> summarise (R = files_R, src = files_src, inst = files_inst, vignettes = files_vignettes, tests = files_tests, data = num_data_files, seq = seq_along (R), .groups = "keep") |> mutate_at(vars (R, src, inst, vignettes, tests, data), ~replace (., !is.finite (.), NA)) |> group_by (seq) |> summarise (R = mean (R, na.rm = TRUE), src = mean (src, na.rm = TRUE), inst = mean (inst, na.rm = TRUE), vignettes = mean (vignettes, na.rm = TRUE), tests = mean (tests, na.rm = TRUE), data = mean (data, na.rm = TRUE), .groups = "keep") |> filter (seq > 1) |> pivot_longer (cols = c (R, src, inst, vignettes, tests, data)) |> rename (dir = name) saveRDS (x_per_pkg, f_fig3_pkg_level)
dat <- readRDS (n_files_file) |> mutate (inst = inst * max (src, na.rm = TRUE) / max (inst, na.rm = TRUE)) |> filter (what == "cran_by_year") |> pivot_longer (cols = c (R, src, inst, vignettes, tests, data)) |> rename (n_files = value, dir = name) dat_ann <- readRDS (n_files_file) |> mutate (inst = inst * max (src, na.rm = TRUE) / max (inst, na.rm = TRUE)) |> filter (what == "annual") |> pivot_longer (cols = c (R, src, inst, vignettes, tests, data)) |> rename (n_files = value, dir = name) p1 <- ggplot (dat, aes (x = year, y = n_files, color = dir)) + geom_line (lwd = 0.4, lty = 2) + geom_smooth (method = "loess", formula = "y ~ x", se = FALSE, lwd = 0.5) + #geom_line (data = dat_ann, lty = 2) + ggtitle ("A: Numbers of files per year") + ylab ("Num. files") + ylim (c (0, max (dat$n_files))) + theme (legend.position = "none") # dat_ann are very similar but much noisier, so not shown
p2 <- readRDS (f_fig3_pkg_level) |> filter (seq <= 20) |> ggplot (aes (x = seq, y = value, colour = dir)) + geom_line (lwd = 0.4, lty = 2) + geom_smooth (method = "loess", formula = "y ~ x", se = FALSE, span = 0.7, lwd = 0.5) + xlab ("Release Sequence") + ylab ("Num. files") + ggtitle ("B: Numbers of files per release") + guides (color = guide_legend (ncol = 2)) + theme (legend.title = element_blank(), legend.position = c (0.3, 0.85), legend.background = element_rect(fill='transparent', colour='transparent'))
png (fig03_png, width = 7, height = 3.5, units = "in", res = 300) p1 + p2 graphics.off ()
knitr::include_graphics (fig03_png)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.