#' Transform data to form new variables
#'
#' The raw data contain many related raw count variables. This function converts
#' many of these to relative proportions.
#'
#' @param x Result of \link{load_pkgstats_data} with `raw = TRUE`.
#' @return Transformed version of input, so some variables transformed and new
#' variables added.
#' @export
transform_pkgstats_data <- function (x) {
transform_new_vals (x) |>
transform_relative_vals ()
}
transform_new_vals <- function (x) {
vals <- c ("depends", "imports", "suggests", "languages", "linking_to")
for (v in vals) {
x [paste0 ("n", v)] <- vapply (strsplit (x [[v]], ","),
length,
integer (1))
}
f <- grep ("^files\\_", names (x), value = TRUE)
x [f] [is.na (x [f])] <- 0L # All 0 anyway
x$files_total <- rowSums (x [f])
x <- x [, -match (vals, names (x))]
# Split indentation into binary "tab" column:
x$tab_indent <- x$indentation == -1L
x$indentation [x$indentation == -1L] <- NA
# Translations -> count
#x$translations [x$translations == "NA"] <- NA_character_
# Translations not yet right, so remove for now
x <- x [-match ("translations", names (x))]
return (x)
}
#' Transform values to relative measures
#'
#' Conversions at present are:
#' 1. blank & comment lines in each directory converted to relative amounts per
#' directory (R, src, inst)
#' 2. Add new measure of total numbers of lines of code.
#' @noRd
transform_relative_vals <- function (x) {
# convert blank & comment lines to relative proportions:
i1 <- grep ("^blank\\_", names (x), value = TRUE)
i2 <- grep ("^loc\\_[^per]", names (x), value = TRUE)
i3 <- grep ("^comment\\_", names (x), value = TRUE)
for (i in seq_along (i1)) {
x [which (is.na (x [i1 [i]])), i1 [i]] <- 0L
x [which (is.na (x [i2 [i]])), i2 [i]] <- 0L
x [which (is.na (x [i3 [i]])), i3 [i]] <- 0L
nlines <- x [i1 [i]] + x [i2 [i]] + x [i3 [i]]
x [paste0 (i1 [i], "_rel")] <- x [i1 [i]] / nlines
x [paste0 (i3 [i], "_rel")] <- x [i3 [i]] / nlines
}
x$loc_total <- x$loc_R + x$loc_src + x$loc_inst
types <- c ("blank_lines", "comment_lines", "loc")
dirs <- c ("R", "src", "inst")
combs <- apply (expand.grid (types, dirs), 1,
function (i) paste0 (i, collapse = "_"))
x$lines_total <- rowSums (x [combs], na.rm = TRUE)
x$blank_lines_all_rel <- (x$blank_lines_R +
x$blank_lines_src +
x$blank_lines_inst) / x$lines_total
x$comment_lines_all_rel <- (x$comment_lines_R +
x$comment_lines_src +
x$comment_lines_inst) / x$lines_total
x$loc_R [x$files_R == 0] <- NA_integer_
x$loc_src [x$files_src == 0] <- NA_integer_
x$loc_inst [x$files_inst == 0] <- NA_integer_
x$loc_vignettes [x$files_vignettes == 0] <- NA_integer_
x$loc_tests [x$files_tests == 0] <- NA_integer_
# Then remove the original absolute counts of blank & comment lines:
x <- x [, -match (c (i1, i3), names (x))]
return (x)
}
#' Rescale the main `pkgstats` data for CRAN packages
#' @param x Result of \link{load_pkgstats_data} with `raw = TRUE`.
#' @noRd
rescale_data <- function (x) {
classes <- vapply (x, class, character (1), USE.NAMES = FALSE)
int_vals <- which (classes == "integer")
dbl_vals <- which (classes == "numeric" & !names (x) == "date_wt")
ivs <- sort (c (int_vals, dbl_vals))
nvals <- apply (x [, ivs], 2, function (i) length (table (i, useNA = "no")))
ivs <- ivs [which (nvals > 1)]
x_ivs <- x [, ivs]
x_ivs <- scale (x_ivs)
return (tibble::tibble (x_ivs))
}
m_rescale_data <- memoise::memoise (rescale_data)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.