#' Convert vectors (including factors) to numeric.
#'
#' @param var A vector. Can be a factor.
#' @return Returns \code{var} as a numeric vector.
#' @examples
#' as_numeric(c("1", "2", "3"))
#' as_numeric(factor(c("1", "2", "3")))
#' @return Numeric vector.
#' @export
as_numeric <- function(var) {
var <- as.numeric(as.character(var))
return(var)
}
#' Create assertions with custom error messages.
#'
#' @param condition A condition statement that returns a logical value.
#' @param message A string. A custom error message to return. This is optional.
#' @examples
#' var <- 2
#' assert(is.numeric(var), "This variable is not a numeric!")
#' @export
assert <- function(condition,
message = NULL) {
if(!condition) {
if(is.null(message)) {
stop(FALSE, call. = FALSE)
} else {
stop(message, call. = FALSE)
}
}
}
#' Pull the LHS of a formula.
#'
#' @param form A formula as text or formula.
#' @export
formula_lhs <- function(form) {
assert(purrr::is_formula(form),
"Object doesn't appear to be a valid formula.")
lhs <- trimws(form[2])
return(lhs)
}
#' Pull the RHS of a formula including '~'.
#'
#' @param form A formula as text or formula.
#' @export
formula_rhs <- function(form) {
assert(purrr::is_formula(form),
"Object doesn't appear to be a valid formula.")
rhs <- paste("~", form[3])
return(rhs)
}
#' Initialize parallel processing
#'
#' @export
initialize_parallel <- function(){
future::plan(future::multiprocess)
}
#' Check if vector-like object is binary.
#'
#' @param var A vector-like object. This can also be a factor.
#' @return Logical value indicating if input is binary or not.
#' @examples
#' var <- c(1, 1, 0, 0, 1)
#' is_binary(var)
#' @export
is_binary <- function(var) {
ux <- unique(var)
if (length(ux) != 2) {
return(FALSE)
} else {
if (!all(var %in% c(0, 1))) {
message(
"The input has only two unique values, but they are not in {0, 1}"
)
return(FALSE)
} else {
return(TRUE)
}
}
}
#' Check if object is a Sparse Matrix
#'
#' @param dat Data object.
#' @return Logical indicating if input is sparse Matrix.
#' @export
is_sparseMatrix <- function(dat) {
return(methods::is(dat, "sparseMatrix"))
}
#' Find the modal value of a vector.
#'
#' @param x A vector-like object.
#' @param na.rm A logical. Whether to remove \code{NA}s when computing.
#' @param all.modes A logical. Indicate whether to return all modes.
#' @return Single mode or vector of all modal values.
#' @examples
#' var <- c(1, 1, 2, 2, 3)
#' mode(var)
#' mode(var, all.modes = TRUE)
#' @export
mode <- function(x,
na.rm = TRUE,
all.modes = FALSE) {
if (any(!is.na(x))) {
if(na.rm == TRUE) {
ux <- unique(x)[!is.na(unique(x))]
} else {
ux <- unique(x)
}
} else {
ux <- NA
}
modes <- which(tabulate(match(x, ux)) == max(tabulate(match(x, ux))))
if (all.modes == FALSE) {
return(ux[modes[[1]]])
} else {
return(ux[modes])
}
}
#' Convert vector-like object to a factor with NA as reference level.
#'
#' @param var Vector-like object.
#' @return Factor with NA as reference level if appropriate.
#' @examples
#' var <- c(4, 5, NA, 6, 7)
#' na_ref(var)
#' @export
na_ref <- function(var) {
var <- factor(var,
exclude = NULL,
levels = c(NA,
unique(var)[!is.na(unique(var))]))
var <- droplevels(var)
return(var)
}
#' Remove duplicate coumns from matrices or dataframes
#'
#' @param dat Matrix, dataframe, or sparse matrix.
#' @param return.sparse Logical. Return object as a sparse matrix? Default is F.
#' @return Input object minus duplicate columns.
#' @export
remove_duplicates <- function(dat,
return.sparse = FALSE){
is_df <- is.data.frame(dat)
is_sM <- is_sparseMatrix(dat)
if(!is_sM) dat <- as.matrix(dat)
dup.cols <- as.vector(duplicated.matrix(t(dat)))
dat <- dat[, !dup.cols]
if(return.sparse == FALSE & !is_sM) {
if(is_df) {
return(tibble::as_tibble(dat))
} else {
return(dat)
}
} else {
return(Matrix::Matrix(dat, sparse = TRUE))
}
}
#' Shuffles standard data objects.
#'
#' @param dat Matrix, dataframe, or vector.
#' @return Input object randomly re-ordered.
#' @export
shuffle <- function(dat) {
is_df <- is.data.frame(dat)
is_mat <- is.matrix(dat)
if(is_df | is_mat) {
return(dat[sample(1:nrow(dat)), ])
} else {
return(dat[sample(1:length(dat))])
}
}
#' Set sorted factor levels. Specify specific base level if desired.
#'
#' @param var Input Vector.
#' @param base.level String indicating base level. Argument is optional.
#' @return Factor with ascending sorted levels and user-specified base level.
#' @export
sort_factor <- function(var,
base.level = NULL) {
if(!is.null(base.level)) {
var <- factor(var,
levels = c(
base.level,
sort(unique(var))[which(sort(unique(var)) != base.level)]
))
return(var)
} else {
var <- factor(var,
levels = sort(unique(var)))
return(var)
}
}
#' Checks the sparsity of each column of a dataframe, matrix, or sparse matrix.
#'
#' @param input Dataframe, matrix, or sparse matrix.
#' @param count.na.zero Logical. Should NAs be counted as zeros?
#' @return Dataframe with the respective column names and sparsity.
#' @examples
#' df <- data.frame(a = c(0, 0, 1), b = c(1, 1, 0), c = c(0, 0, 0))
#' sparsity(df)
#' @export
sparsity <- function(input,
count.na.zero = FALSE) {
sparsity <- dplyr::rename(
tibble::enframe(
apply(input, 2, function(i) {
Matrix::nnzero(i, na.counted = count.na.zero)/nrow(input)
})
),
"VARIABLE" = "name",
"PERC_SPARSE" = "value"
)
return(sparsity)
}
#' Standardizes vector, dataframe, or matrix.
#'
#' @param var Vector, dataframe, or matrix. Will not standardize factors,
#' characters, or binary (0,1) vectors or columns.
#' @return Standardized input object.
#' @export
standardize <- function(var) {
# Function that scales any non-factor/character vector
stdz <- function(x){
if(!is.factor(x) & !is.character(x)) {
x <- as.vector(scale(x))
}
return(x)
}
# Checks for data.frame and matrix classes
if(is.data.frame(var)) {
var <- dplyr::bind_cols(lapply(var, stdz))
} else if (is.matrix(var)) {
var <- apply(var, 2, stdz)
} else {
var <- stdz(var)
}
return(var)
}
strat_sample <- function(df, strat) {
# Append a column of ids
df <- dplyr::select(dplyr::mutate(df, id = 1:nrow(df)), `strat`, "id")
# Get stratified bootstrap sample
ids <- sort(
dplyr::pull(
dplyr::ungroup(
dplyr::sample_n(
dplyr::mutate(
dplyr::group_by(df, get(strat)),
"N" = dplyr::n()
),
size = get("N"),
replace = TRUE
)
),
"id"
)
)
return(ids)
}
#' Returns N stratified bootstrap samples of a dataframe.
#'
#' @param df Dataframe to sample from.
#' @param strat String specifying strata column.
#' @param nboot Numeric value specifying number of bootstrap samples.
#' @param parallel Logical value. Helpful when \code{nboot} is large.
#' @return List of N vectors containing bootstrap indices.
#' @export
strat_sample_boot <- function(df,
strat,
nboot,
parallel = TRUE) {
# Create a named vector for each bootstrap sample
boot_list <- paste0("Sample", 1:nboot)
# Create samples in parallel or not
if(parallel == FALSE) {
# Create n samples using strat_sample function
samples <- lapply(boot_list, function(i) {
return(strat_sample(df = df, strat = strat))
})
} else {
# Initialize parallel processing
future::plan(future::multiprocess)
# Create n samples using strat_sample function
samples <- future.apply::future_lapply(boot_list, function(i) {
return(strat_sample(df = df, strat = strat))
})
}
# Return list of n bootstrap samples with names
names(samples) <- boot_list
return(samples)
}
#' Create a train, test, and validation split of a dataset.
#'
#' @param y Outcome vector.
#' @param train.p Fraction of observations in the training set.
#' @param test.p Fraction of observations in the test set
#' @return A list with train, test, and validation indices.
#' @export
train_test_validate <- function(y,
train.p,
test.p) {
rand_idx <- shuffle(1:length(y))
train_idx <- floor(train.p*length(y))
test_idx <- floor(test.p*length(y)) + train_idx
train <- rand_idx[1:train_idx]
test <- rand_idx[(train_idx + 1):test_idx]
validate <- rand_idx[(test_idx + 1):length(y)]
assert(sum(c(train, test, validate) == rand_idx) == length(y))
return(list(train = sort(train),
test = sort(test),
validate = sort(validate)))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.