# Summary functions -------------------------------------------------------
#' Missing values check
#'
#' This function is designed to be used internally.
#' @param x A numeric vector.
#' @param na_type A vector of strings that defines the type of missing value criteria e.g "n", "con", "prop", "FUN".
#' @param na_consecutive_n An integer that defines the maximum number of allowed consecutive missing values.
#' @param na_max_n An integer that defines the maximum number of allowed missing values.
#' @param na_max_prop An integer that defines the maximum allowed proportion of missing values.
#' @param na_min_n An integer that defines the minimum number of allowed missing values.
#' @param na_FUN A function that returns a boolean.
#' @param ... Additional arguments that could be passed internally.
#'
#' @return A boolean check.
#' @author Shadrack Kibet
#'
#' @examples
na_check <- function(x, na_type = c(), na_consecutive_n = NULL, na_max_n = NULL, na_min_n = NULL, na_max_prop = NULL, na_FUN = NULL, ...) {
res <- c()
k <- 1
for (i in na_type) {
## Added this to avoid error when "" is trancated. Not sure why "" is removed in some instances.
## Works differently with main summary function when you have a single case/multiple cases of na_type.
if (i == "'n'" || i == "n") {
res[k] <- summary_count_missing(x) <= na_max_n
}
else if (i == "'prop'" || i == "prop") {
res[k] <- (summary_count_missing(x) / summary_count(x)) <= na_max_prop / 100
}
else if (i == "'n_non_miss'" || i == "n_non_miss") {
res[k] <- summary_count_non_missing(x) >= na_min_n
}
else if (i == "'FUN'" || i == "FUN") {
res[k] <- na_FUN(x, ...)
}
else if (i == "'con'" || i == "con") {
is_na_rle <- rle(is.na(x))
res[k] <- max(is_na_rle$lengths[is_na_rle$values]) <= na_consecutive_n
}
else {
stop("Invalid na_type specified for missing values check.")
}
if (!res[k]) {
return(FALSE)
}
k <- k + 1
}
return(all(res))
}
#' Sum function
#'
#' This function returns the total of a numeric vector, it allows you to specify missing value criteria with their respective thresholds.
#' @param x A numeric vector.
#' @param na.rm A logical value indicating whether missing values should be stripped before the computation proceeds.
#' @param na_type A vector of strings that defines the type of missing value criteria e.g "n", "con", "prop", "FUN".
#' @param ... Additional parameters that could be passed internally. for example `na_consecutive_n = 3` to define the maximum allowed number of consecutive missing values. Other parameters include "na_max_n", "na_min_n", "n_non_miss", "na_max_prop" for maximum, minimum and proportion of missing values.
#'
#' @return The total of the numeric vector.
#' @author Shadrack Kibet
#' @export
#'
#' @examples
#' x <- c(1:5, NA, NA, NA, 6:10)
#' summary_sum(x, na.rm = TRUE, na_type = "n", na_max_n = 2)
#' summary_sum(x, na.rm = TRUE, na_type = "n_non_miss", na_min_n = 2)
#' summary_sum(x, na.rm = TRUE, na_type = "con", na_consecutive_n = 2)
summary_sum <- function(x, na.rm = FALSE, na_type = "", ...) {
if (na.rm && na_type != "" && !na_check(x, na_type = na_type, ...)) {
return(NA)
} else {
return(sum(x, na.rm = na.rm))
}
}
#' Length function
#'
#' @param x An R object, a vector or a factor
#' @param ... Additional arguments.
#'
#' @return The length of the R object.
#' @author Shadrack Kibet
#' @export
#'
#' @examples
#' x <- c(1:5, NA, NA, NA, 6:10)
#' summary_length(x, na.rm = TRUE, na_type = "n", na_max_n = 2)
summary_length <- function(x, ...) {
return(length(x))
}
#' Count missing function
#'
#' @param x A numeric vector.
#' @param ... Additional arguments.
#'
#' @return The count of missing values.
#' @author Shadrack Kibet
#' @export
#'
#' @examples
#' x <- c(1:5, NA, NA, NA, 6:10)
#' summary_count_missing(x, na.rm = TRUE, na_type = "n", na_max_n = 2)
summary_count_missing <- function(x, ...) {
return(summary_sum(is.na(x)))
}
#' Count non-missing function
#'
#' @param x A numeric vector.
#' @param ... Additional arguments.
#'
#' @return The count of non missng values.
#' @author Shadrack Kibet
#' @export
#'
#' @examples
#' x <- c(1:5, NA, NA, NA, 6:10)
#' summary_count_non_missing(x, na.rm = TRUE, na_type = "n", na_max_n = 2)
summary_count_non_missing <- function(x, ...) {
return(summary_sum(!is.na(x)))
}
#' Maximum function
#'
#' @param x A numeric vector.
#' @param na.rm A logical value indicating whether missing values should be stripped before the computation proceeds.
#' @param na_type A vector of strings that defines the type of missing value criteria e.g "n", "con", "prop", "FUN".
#' @param ... Additional parameters that could be passed internally. for example `na_consecutive_n = 3` to define the maximum allowed number of consecutive missing values. Other parameters include "na_max_n", "na_min_n", "n_non_miss", "na_max_prop" for maximum, minimum and proportion of missing values.
#'
#' @return The maximum value.
#' @author Shadrack Kibet
#' @export
#'
#' @examples
#' x <- c(1:5, NA, NA, NA, 6:10)
#' summary_max(x, na.rm = TRUE, na_type = "n", na_max_n = 2)
#' summary_max(x, na.rm = TRUE, na_type = "n_non_miss", na_min_n = 2)
#' summary_max(x, na.rm = TRUE, na_type = "con", na_consecutive_n = 2)
summary_max <- function(x, na.rm = FALSE, na_type = "", ...) {
# TODO This prevents warning and -Inf being retured. Is this desirable?
if (length(x) == 0 || (na.rm && length(x[!is.na(x)]) == 0)) {
return(NA)
}
if (na.rm && na_type != "" && !na_check(x, na_type = na_type, ...)) {
return(NA)
} else {
return(max(x, na.rm = na.rm))
}
}
#' Minimum function
#'
#' @param x A numeric vector.
#' @param na.rm A logical value indicating whether missing values should be stripped before the computation proceeds.
#' @param na_type A vector of strings that defines the type of missing value criteria e.g "n", "con", "prop", "FUN".
#' @param ... Additional parameters that could be passed internally. for example "na_consecutive_n = 3" to define the maximum allowed number of consecutive missing values. Other parameters include "na_max_n", "na_min_n", "n_non_miss", "na_max_prop" for maximum, minimum and proportion of missing values
#'
#' @return The minimum value.
#' @author Shadrack Kibet
#' @export
#'
#' @examples
#' x <- c(5, 3, 0.3, 4, NA, NA, NA, 6:10)
#' summary_min(x, na.rm = TRUE, na_type = "n", na_max_n = 2)
#' summary_min(x, na.rm = TRUE, na_type = "n_non_miss", na_min_n = 2)
#' summary_min(x, na.rm = TRUE, na_type = "con", na_consecutive_n = 2)
summary_min <- function(x, na.rm = FALSE, na_type = "", ...) {
# TODO This prevents warning and Inf being retured. Is this desirable?
if (length(x) == 0 || (na.rm && length(x[!is.na(x)]) == 0)) {
return(NA)
}
if (na.rm && na_type != "" && !na_check(x, na_type = na_type, ...)) {
return(NA)
} else {
return(min(x, na.rm = na.rm))
}
}
#' Mean function
#'
#' Function that returns the arithmetic mean an R object. Allows trimming of values and flexible definition of missing values.
#' @param x An R object.
#' @param na.rm A logical value indicating whether missing values should be stripped before the computation proceeds.
#' @param trim A fraction between 0 and 0.5 of observations to be trimmed from each end of x before mean is computed.
#' @param na_type A vector of strings that defines the type of missing value criteria e.g "n", "con", "prop", "FUN".
#' @param ... Additional parameters that could be passed internally. for example "na_consecutive_n = 3" to define the maximum allowed number of consecutive missing values. Other parameters include "na_max_n", "na_min_n", "n_non_miss", "na_max_prop" for maximum, minimum and proportion of missing values
#'
#' @return Returns the average value.
#' @author Shadrack Kibet
#' @export
#'
#' @examples
#' x <- c(1:10, NA, NA, NA, 2, 5, 6, 7)
#' summary_mean(x, na.rm = TRUE, na_type = "n", na_max_n = 2)
#' summary_mean(x, na.rm = TRUE, na_type = "n_non_miss", na_min_n = 2)
#' summary_mean(x, na.rm = TRUE, na_type = "con", na_consecutive_n = 2)
summary_mean <- function(x, na.rm = FALSE, trim = 0, na_type = "", ...) {
if (length(x) == 0 || (na.rm && length(x[!is.na(x)]) == 0)) {
return(NA)
} else {
if (na.rm && na_type != "" && !na_check(x, na_type = na_type, ...)) {
return(NA)
} else {
return(mean(x, na.rm = na.rm, trim = trim))
}
}
}
#' Median function
#'
#' @param x A numeric vector.
#' @param na.rm A vector of strings that defines the type of missing value criteria e.g "n", "con", "prop", "FUN".
#' @param na_type A vector of strings that defines the type of missing value criteria e.g "n", "con", "prop", "FUN".
#' @param ... Additional parameters that could be passed internally. for example "na_consecutive_n = 3" to define the maximum allowed number of consecutive missing values. Other parameters include "na_max_n", "na_min_n","n_non_miss", "na_max_prop"` for maximum, minimum and proportion of missing values.
#'
#' @return The middle value in the series.
#' @author Shadrack Kibet
#' @export
#'
#' @examples
summary_median <- function(x, na.rm = FALSE, na_type = "", ...){
if (na.rm && na_type != "" && !na_check(x, na_type = na_type, ...)) {
return(NA)
} else {
return(median(x, na.rm = na.rm))
}
}
#' Mode function
#'
#' @param x A numeric vector.
#' @param na.rm A vector of strings that defines the type of missing value criteria e.g "n", "con", "prop", "FUN".
#' @param na_type A vector of strings that defines the type of missing value criteria e.g "n", "con", "prop", "FUN".
#' @param ... Additional parameters that could be passed internally. for example "na_consecutive_n = 3" to define the maximum allowed number of consecutive missing values. Other parameters include "na_max_n", "na_min_n","n_non_miss", "na_max_prop"` for maximum, minimum and proportion of missing values.
#'
#' @return The most appearing number in the series. Incase there is a tie, the first modal value is only returned.
#' @author Shadrack Kibet
#' @export
#'
#' @examples
#' x <- c(1:10, NA, NA, NA, 2, 5, 6, 7)
#' summary_mode(x)
summary_mode <- function(x, na.rm = FALSE, na_type = "", ...){
if (na.rm && na_type != "" && !na_check(x, na_type = na_type, ...)) {
return(NA)
} else {
if (anyNA(x)&!na.rm) return(NA)
x <- x[!is.na(x)]
ux <- unique(x)
return(ux[which.max(tabulate(match(x, ux)))])
}
}
#' Standard Deviation function
#'
#' @param x A numeric vector.
#' @param na.rm A vector of strings that defines the type of missing value criteria e.g "n", "con", "prop", "FUN".
#' @param na_type A vector of strings that defines the type of missing value criteria e.g "n", "con", "prop", "FUN".
#' @param ... Additional parameters that could be passed internally. for example "na_consecutive_n = 3" to define the maximum allowed number of consecutive missing values. Other parameters include "na_max_n", "na_min_n","n_non_miss", "na_max_prop"` for maximum, minimum and proportion of missing values.
#'
#' @return Returns the standard deviation value.
#' @author Shadrack Kibet
#' @export
#'
#' @examples
#' x <- c(1:10, NA, NA, NA, 2, 5, 6, 7)
#' summary_sd(x, na.rm = TRUE, na_type = "n", na_max_n = 2)
#' summary_sd(x, na.rm = TRUE, na_type = "n_non_miss", na_min_n = 2)
#' summary_sd(x, na.rm = TRUE, na_type = "con", na_consecutive_n = 2)
summary_sd <- function(x, na.rm = FALSE, na_type = "", ...) {
if (na.rm && na_type != "" && !na_check(x, na_type = na_type, ...)) {
return(NA)
} else {
return(sd(x, na.rm = na.rm))
}
}
#' Variance function
#'
#' @param x A numeric vector.
#' @param na.rm A vector of strings that defines the type of missing value criteria e.g "n", "con", "prop", "FUN".
#' @param na_type A vector of strings that defines the type of missing value criteria e.g "n", "con", "prop", "FUN".
#' @param ... Additional parameters that could be passed internally. for example "na_consecutive_n = 3" to define the maximum allowed number of consecutive missing values. Other parameters include "na_max_n", "na_min_n", "n_non_miss", "na_max_prop" for maximum, minimum and proportion of missing values.
#'
#' @return Returns the variance value.
#' @author Shadrack Kibet
#' @export
#'
#' @examples
#' x <- c(1:10, NA, NA, NA, 2, 5, 6, 7)
#' summary_var(x, na.rm = TRUE, na_type = "n", na_max_n = 2)
#' summary_var(x, na.rm = TRUE, na_type = "n_non_miss", na_min_n = 2)
#' summary_var(x, na.rm = TRUE, na_type = "con", na_consecutive_n = 2)
summary_var <- function(x, na.rm = FALSE, na_type = "", ...) {
if (na.rm && na_type != "" && !na_check(x, na_type = na_type, ...)) {
return(NA)
} else {
return(var(x, na.rm = na.rm))
}
}
#' Quantile function
#'
#' This function calculates the respective quantile value depending on the probability value defined. It also allows flexible missing values definition.
#' @param x A numeric vector.
#' @param na.rm A logical value indicating whether missing values should be stripped before the computation proceeds.
#' @param probs A numeric vector of probability values between `0` and `1`.
#' @param na_type A vector of strings that defines the type of missing value criteria e.g "n", "con", "prop", "FUN".
#' @param ... Additional parameters that could be passed internally. for example `na_consecutive_n = 3` to define the maximum allowed number of consecutive missing values. Other parameters include "na_max_n", "na_min_n", "n_non_miss", "na_max_prop" for maximum, minimum and proportion of missing values.
#'
#' @return Returns the quantile value.
#' @author Shadrack Kibet
#' @export
#'
#' @examples
#' x <- c(1:10, NA, NA, NA, 2, 5, 6, 7)
#' summary_quantile(x, na.rm = TRUE, na_type = "n", na_max_n = 3, probs = 0.25) # lower quartile
#' summary_quantile(x, na.rm = TRUE, na_type = "con", na_consecutive_n = 3, probs = 0.75) # upper quartile
summary_quantile <- function(x, na.rm = FALSE, probs, na_type = "", ...) {
if (!na.rm && anyNA(x)) {
return(NA)
}
# This prevents multiple values being returned
if (na.rm && na_type != "" && !na_check(x, na_type = na_type, ...)) {
return(NA)
} else {
return(quantile(x, na.rm = na.rm, probs = probs)[[1]])
}
}
#' Counts function
#'
#' This function returns the counts based on a defined threshold, it allows use of inequality signs and flexible definition of missng values.
#' @param x A numeric vector.
#' @param count_test The inequlity to be applied in the test i.e `>, <, >=, <=, ==`.
#' @param count_value The count value threshold. This is used with `count_test`.
#' @param na.rm A logical value indicating whether missing values should be stripped before the computation proceeds.
#' @param na_type A vector of strings that defines the type of missing value criteria e.g "n", "con", "prop", "FUN".
#' @param ... Additional parameters that could be passed internally. for example "na_consecutive_n = 3" to define the maximum allowed number of consecutive missing values. Other parameters include "na_max_n", "na_min_n","n_non_miss", "na_max_prop" for maximum, minimum and proportion of missing values.
#'
#' @return The count value.
#' @author Shadrack Kibet
#' @export
#'
#' @examples
#' x <- c(1:10, NA, NA, NA, 2, 5, 6, 7)
#' summary_counts(x, count_test = ">", count_value = 2, na.rm = TRUE, na_type = "n", na_max_n = 3, probs = 0.25)
#' summary_counts(x, count_test = "<", count_value = 5, na.rm = TRUE, na_type = "con", na_consecutive_n = 3, probs = 0.75)
summary_counts <- function(x, count_test = "==", count_value, na.rm = FALSE, na_type = "", ...) {
if (na.rm && na_type != "" && !na_check(x, na_type = na_type, ...)) {
return(NA)
} else {
if (!na.rm) {
if (sum(is.na(x)) > 0) {
return(NA)
}
return(length(x[eval(parse(text = paste("x", count_value, sep = count_test)))]))
}
else {
y <- na.omit(x)
return(length(y[eval(parse(text = paste("y", count_value, sep = count_test)))]))
}
}
}
#' Proportions function
#'
#' This function returns the proportion based on a threshold and inequality defined. It also allow flexible definition of missing values.
#'
#' @param x A numeric vector.
#' @param prop_test The inequlity to be applied in the test i.e `>, <, >=, <=, ==`.
#' @param prop_value The proportion value threshold. This is used with `prop_test`.
#' @param as_percentage Boolean value set to TRUE if percentages are required.
#' @param na.rm A logical value indicating whether missing values should be stripped before the computation proceeds.
#' @param na_type A vector of strings that defines the type of missing value criteria e.g "n", "con", "prop", "FUN".
#' @param ... Additional parameters that could be passed internally. for example "na_consecutive_n = 3" to define the maximum allowed number of consecutive missing values. Other parameters include "na_max_n", "na_min_n", "n_non_miss", "na_max_prop" for maximum, minimum and proportion of missing values.
#' @param dp Integer indicating the number of decimal places (round) to be used. By default "dp=2".
#'
#' @return The proportion value.
#' @author Shadrack Kibet
#' @export
#'
#' @examples
#' x <- c(1:10, NA, NA, NA, 2, 5, 6, 7)
#' summary_proportions(x, prop_test = ">", prop_value = 2, na.rm = TRUE, na_type = "n", na_max_n = 3, probs = 0.25)
#' summary_proportions(x, prop_test = "<", prop_value = 5, na.rm = TRUE, na_type = "con", na_consecutive_n = 3, probs = 0.75)
#' summary_proportions(x, prop_test = "<", prop_value = 5, as_percentage = TRUE, na.rm = TRUE, na_type = "con", na_consecutive_n = 3, probs = 0.75)
summary_proportions <- function(x, prop_test = "==", prop_value, as_percentage = FALSE, na.rm = FALSE, na_type = "", dp = 2, ...) {
if (na.rm && na_type != "" && !na_check(x, na_type = na_type, ...)) {
return(NA)
} else {
if (!na.rm) {
if (sum(is.na(x)) > 0) {
return(NA)
}
y <- x[eval(parse(text = paste("x", prop_value, sep = prop_test)))]
if (!as_percentage) {
return(round(length(y) / length(x), digits = dp))
}
else {
return(round((length(y) / length(x) * 100), digits = dp))
}
}
else {
remove.na <- na.omit(x)
y <- remove.na[eval(parse(text = paste("remove.na", prop_value, sep = prop_test)))]
if (!as_percentage) {
return(round(length(y) / length(remove.na), digits = dp))
}
else {
return(noquote(paste0(round(length(y) / length(remove.na) * 100, digits = dp), sep = "%")))
}
}
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.