R/helpers.R

Defines functions replacement return_if clean_strings underscore expand_df split_col

Documented in clean_strings expand_df replacement split_col underscore

#' Split a column into multiple columns.
#'
#' @param .data a dataframe
#' @param .col_name a character string representing a column name found in
#' \code{.data}.
#' @param .sep a character string used to split the column (\code{.col_name}).
#' @param .new_names_vec a character vector of names to name the new
#'  split columns.
#' @return A data frame.
#' @examples
#' @export

split_col <- function(.data, .col_name, .sep, .new_names_vec) {
  if (length(.sep) != 1) stop(paste(".sep must be length 1.",
                                    "You supplied length:",
                                    length(.sep)))
  split.df <- data.frame(
    do.call(rbind, strsplit(x = .data[[.col_name]], split = .sep)),
    stringsAsFactors = FALSE)

  if (ncol(split.df) != length(.new_names_vec)) {
    stop(paste(".new_names_vec must be the same length as the number of",
               "columns produced during the split.",
               ".new_names_vec:", length(.new_names_vec),
               "Number of split columns:", ncol(split.df)))
  }

  names(split.df) <- .new_names_vec

  final.df <- cbind(.data, split.df)

  return(final.df)

}

#' Split a column into multiple columns.
#'
#' @param .data a dataframe
#' @param .key_col a quoted column name that can be found in \code{.data},
#' which will be used to join the expanded data frame back to the original
#' data frame.
#' @param .expand_vec a vector of unique values that will be used to expand
#' the data frame.
#' @return A data frame.
#' @examples
#' @export

expand_df <- function(.data, .key_col, .expand_vec) {
  if (!identical(unique(.expand_vec), .expand_vec)) {
    stop(".expand_vec must represent unique values.")
  }
  expanded.df <- expand.grid(seg_id = unique(.data[[.key_col]]),
                             use = .expand_vec,
                             stringsAsFactors = FALSE)

  final.df <- merge(expanded.df, .data, by = .key_col, all = TRUE)
  return(final.df)
}

#' Replace non-numeric or non-character values with "_".
#'
#' @param .vec a character vector.
#' @return A character vector.
#' @examples
#' @export

underscore <- function(.vec) {
  gsub("[^A-Za-z0-9]", "_",.vec)
}

#' Standardized string formatting.
#'
#' @param .vec a character vector.
#' @return A character vector.
#' @examples
#' @export

clean_strings <- function(.vec) {
  underscore(trimws(tolower(.vec)))
}

return_if <- function(.arg_vec, logical_fun, ..., invert = FALSE) {
  logical.vec <- vapply(.arg_vec, logical_fun, ...,  FUN.VALUE = NA)

  if (invert == TRUE) logical.vec <- !logical.vec

  if (any(logical.vec)) {
    return(NA_real_)
  }

}

#' Replace values in a column based on values specified in another data frame.
#'
#' @param .x data frame to be joined.
#' @param .y data frame to be joined.
#' @param .by the column name(s) to be used merge .x and .y
#' @param .replace_col the name of the column that requires replacement values.
#' @return A data frame.
#' @examples
#' @export

replacement <- function(.x, .y, .by, .replace_col) {
  merged.df <- merge(x = .x,
                     y = .y,
                     by = .by,
                     all = TRUE)

  x_col <- paste0(.replace_col, ".x")
  y_col <- paste0(.replace_col, ".y")

  merged.df[.replace_col] <- ifelse(!is.na(merged.df[[y_col]]),
                                    merged.df[[y_col]],
                                    merged.df[[x_col]])
  final.df <- subset(x = merged.df,
                     select = !names(merged.df) %in% c(x_col, y_col))

  return(final.df)
}
BWAM/stayCALM documentation built on May 21, 2020, 3:24 p.m.