library ("pkgstatsAnalyses")
here <- here::here ()
v_data_dir <- file.path (here, "vignettes", "data")

datafile <- file.path (here, "data-raw", "pkgstats-results.Rds")
f_fig1_urls <- file.path (v_data_dir, "fig01-urls.Rds")
calc_fig1_urls <- !file.exists (f_fig1_urls)

f_fig1_aut_ctb <- file.path (v_data_dir, "fig01-aut-ctb.Rds")
calc_fig1_aut_ctb <- !file.exists (f_fig1_aut_ctb)
f_fig1_lic <- file.path (v_data_dir, "fig01-lic.Rds")
calc_fig1_lic <- !file.exists (f_fig1_lic)

fig01_png <- file.path (here, "vignettes", "figures", "_fig01.png")
calc_fig1_aut_ctb <- calc_fig1_aut_ctb & !file.exists (fig01_png)
calc_fig1_lic <- !file.exists (f_fig1_lic) & !file.exists (fig01_png)
x <- load_pkgstats_data (datafile, raw = TRUE, latest = FALSE)
# CRAN snapshot values:
do1 <- function (x, year = 2018) {
    # message (year)
    xy <- x |>
        filter (year <= !!year) |>
        group_by (package) |>
        slice_max (date)

    return (length (which (!is.na (xy$urls))) / length (xy$urls))
}
years <- sort (unique (x$year))
urls <- vapply (years, function (y) do1 (x, y), numeric (1))
# Git* URLs:
x_git <- x
x_git$urls [is.na (x_git$urls)] <- ""
do1 <- function (x, year = 2018) {
    # message (year)
    xy <- x |>
        filter (year <= !!year) |>
        group_by (package) |>
        slice_max (date)

    return (length (grep ("[bg]it", xy$urls, ignore.case = TRUE)) / length (xy$urls))
}
urls_git <- vapply (years, function (y) do1 (x_git, y), numeric (1L))

# annual values:
xy <- x |>
    group_by (year) |>
    summarise (prop_urls = length (which (!is.na (urls))) / length (urls))
xy$type <- "annual"
xy <- rbind (
    xy,
    data.frame (
        year = years,
        prop_urls = urls,
        type = "snapshot"
    )
)

xy_git <- x_git |>
    group_by (year) |>
    summarise (prop_urls = length (grep ("git", urls)) / length (urls))
xy_git$type <- "annual_git"
xy_git <- rbind (
    xy_git,
    data.frame (
        year = years,
        prop_urls = urls_git,
        type = "snapshot_git"
    )
)
saveRDS (rbind (xy, xy_git), f_fig1_urls)
x <- load_pkgstats_data (datafile, raw = TRUE, latest = FALSE)
do1 <- function (x, year = 2018) {
    # message (year)
    xy <- x |>
        filter (year <= !!year) |>
        group_by (package) |>
        slice_max (date)
    c (year = year,
       n_aut = mean (xy$desc_n_aut),
       n_ctb = mean (xy$desc_n_ctb),
       se_aut = sd (xy$desc_n_aut) / sqrt (nrow (xy)),
       se_ctb = sd (xy$desc_n_ctb) / sqrt (nrow (xy)))
}
years <- sort (unique (x$year))
n <- vapply (years, function (y) do1 (x, y), numeric (5))
n <- data.frame (t (n))

# Then values for actual years
xy <- x |>
    group_by (year) |>
    summarise (
               n_aut = mean (desc_n_aut),
               n_ctb = mean (desc_n_ctb),
               se_aut = sd (desc_n_aut) / sqrt (length (desc_n_aut)),
               se_ctb = sd (desc_n_ctb) / sqrt (length (desc_n_aut)))
n$what <- "cran_by_year"
xy$what <- "annual"
n <- rbind (n, xy)
saveRDS (n, f_fig1_aut_ctb)
x <- load_pkgstats_data (datafile, raw = TRUE, latest = FALSE)
do1 <- function (x, year = 2018) {
    # message (year)
    xy <- x |>
        filter (year <= !!year) |>
        group_by (package) |>
        slice_max (date)
    lic <- unlist (strsplit (xy$license, "\\|"))
    lic <- unique (gsub ("^\\s*|\\s*$", "", lic))
    c (year = year, n = length (lic))
}
years <- sort (unique (x$year))
nlic <- vapply (years, function (y) do1 (x, y), numeric (2))
nlic <- data.frame (t (nlic))
nlic$what <- "cran_by_year"

x_y <- x |>
    group_by (year) |>
    summarise (n = length (unique (gsub ("^\\s*|\\s*$", "", unlist (strsplit (license, "\\|"))))))
x_y$what <- "annual"

nlic <- rbind (nlic, x_y)
saveRDS (nlic, f_fig1_lic)
dat <- readRDS (f_fig1_aut_ctb)
lo <- dat |>
    mutate (aut_lo = n_aut - se_aut,
            ctb_lo = n_ctb - se_ctb) |>
    select (year, aut_lo, ctb_lo, what) |>
    pivot_longer (c (aut_lo, ctb_lo))
hi <- dat |>
    mutate (aut_hi = n_aut + se_aut,
            ctb_hi = n_ctb + se_ctb) |>
    select (year, aut_hi, ctb_hi, what) |>
    pivot_longer (c (aut_hi, ctb_hi))
fig1_dat <- dat |>
    rename (aut = n_aut, ctb = n_ctb) |>
    select (year, aut, ctb, what) |>
    pivot_longer (cols = c (aut, ctb))
fig1_dat$lo <- lo$value
fig1_dat$hi <- hi$value

dat_annual <- fig1_dat [fig1_dat$what == "annual", ]
fig1_dat <- fig1_dat [fig1_dat$what == "cran_by_year", ]

p1 <- fig1_dat |>
    ggplot (aes (x = year, y = value, colour = name)) +
    #geom_ribbon (aes (ymin = lo, ymax = hi, fill = name, colour = NULL),
    #             alpha = 0.15, show.legend = FALSE) +
    geom_line () +
    #geom_ribbon (data = dat_annual,
    #             aes (ymin = lo, ymax = hi, fill = name, colour = NULL),
    #             alpha = 0.15, show.legend = FALSE) +
    geom_line (data = dat_annual, lty = 2) +
    ylab ("Number authors / contributors") +
    ggtitle ("A: Num aut / ctb") +
    theme (legend.title = element_blank(),
           legend.position = c (0.2, 0.4),
           legend.background = element_rect(fill='transparent', colour='transparent'))

dat_urls <- readRDS (f_fig1_urls)
dat_urls$what <- ifelse (grepl ("git", dat_urls$type), "git", "all")
dat_urls_annual <- dat_urls [grep ("annual", dat_urls$type), ]
dat_urls <- dat_urls [which (!grepl ("annual", dat_urls$type)), ]

p2 <- dat_urls |>
    ggplot (aes (x = year, y = prop_urls, colour = what)) +
    geom_line () +
    geom_line (data = dat_urls_annual, lty = 2) +
    ylab ("Proportion pkgs with URLs") +
    ggtitle ("B: Prop URLs") +
    theme (legend.title = element_blank(),
           legend.position = c (0.2, 0.9),
           legend.background = element_rect(fill='transparent', colour='transparent'))

p3 <- readRDS (f_fig1_lic) |>
    mutate (lty = (what == "cran_by_year")) |>
    ggplot (aes (x = year, y = n, lty = lty)) +
    geom_line () +
    theme (legend.position = "none") +
    ylab ("Number of distinct licenses") +
    ggtitle ("C: Num licenses")

png (fig01_png, width = 7, height = 3.5, units = "in", res = 300)
p1 + p2 + p3
graphics.off ()
knitr::include_graphics (fig01_png)


mpadge/pkgstats-analyses documentation built on Dec. 10, 2022, 4:20 a.m.