R/splitstackshape_helper_functions.R

#' Extract All Names From a Dataset Other Than the Ones Listed
#' 
#' A convenience function for `setdiff(names(data), -some_vector_of_names-)`.
#' 
#' @param data The input `data.frame`.
#' @param toremove The `names` you want to exclude.
#' @return A character vector of the remaining names.
#' @author Ananda Mahto
#' @seealso [base::setdiff()]
#' @examples
#' 
#' mydf <- data.frame(a = 1:2, b = 3:4, c = 5:6)
#' splitstackshape:::othernames(mydf, "a")
#' 
othernames <- function(data, toremove) {
  setdiff(names(data), Names(data, toremove))
}
NULL



#' Dataset Names as a Character Vector, Always
#' 
#' A convenience function using either character vectors or numeric vectors to
#' specify a subset of `names` of a `data.frame`.
#' 
#' 
#' @param data The input `data.frame`.
#' @param invec The `names` you want.
#' @return A character vector of the desired names.
#' @author Ananda Mahto
#' @examples
#' 
#' mydf <- data.frame(a = 1:2, b = 3:4, c = 5:6)
#' splitstackshape:::Names(mydf, c("a", "c"))
#' splitstackshape:::Names(mydf, c(1, 3))
#' 
Names <- function(data, invec) {
  if (!is.numeric(invec)) invec <- match(invec, names(data))
  names(data)[invec]
}
NULL



#' Read Concatenated Character Vectors Into a data.frame
#' 
#' Originally a helper function for the [concat.split.compact()] function. This 
#' function has now been effectively replaced by [cSplit()].
#' 
#' 
#' @param data The input data.
#' @param col.prefix The desired column prefix for the output `data.frame`.
#' @param sep The character that acts as a delimiter.
#' @param \dots Other arguments to pass to [utils::read.table()].
#' @return A `data.frame`.
#' @author Ananda Mahto
#' @seealso [utils::read.table()]
#' @examples
#' 
#' vec <- c("a,b", "c,d,e", "f, g", "h, i, j,k")
#' splitstackshape:::read.concat(vec, "var", ",")
#' 
#' ## More than 5 lines the same
#' ## `read.table` would fail with this
#' vec <- c("12,51,34,17", "84,28,17,10", "11,43,28,15",
#' "80,26,17,91", "10,41,25,13", "97,35,23,12,13")
#' splitstackshape:::read.concat(vec, "var", ",")
#' 
read.concat <- function(data, col.prefix, sep, ...) {
  if (!is.character(data)) data <- as.character(data)
  zz <- textConnection(data)
  x <- count.fields(zz, sep = sep)
  close(zz)
  t1 <- read.table(text = data, sep = sep, fill = TRUE,
                   row.names = NULL, header = FALSE,
                   strip.white = TRUE,
                   col.names = paste("v", sequence(max(x))),
                   ...)
  names(t1) <- paste(col.prefix, seq(ncol(t1)), sep = "_")
  t1
}
NULL



#' Create a Numeric Matrix from a List of Values
#' 
#' Create a numeric matrix from a list of values
#' 
#' This is primarily a helper function for the [concat.split()] function when 
#' creating the "expanded" structure. The input is anticipated to be a `list` of 
#' values obtained using [base::strsplit()].
#' 
#' @param listOfValues A `list` of input values to be inserted in a matrix.
#' @param fill The initializing fill value for the empty matrix.
#' @param mode Either `"binary"` or `"value"`. Defaults to `"binary"`.
#' @return A `matrix`.
#' @author Ananda Mahto
#' @seealso [base::strsplit()], [charMat()].
#' @examples
#' 
#' invec <- c("1,2,4,5,6", "1,2,4,5,6", "1,2,4,5,6",
#'            "1,2,4,5,6", "-1,1,2,5,6", "1,2,5,6")
#' A <- strsplit(invec, ",")
#' splitstackshape:::numMat(A)
#' splitstackshape:::numMat(A, fill = 0)
#' splitstackshape:::numMat(A, mode = "value")
#' 
numMat <- function(listOfValues, fill = NA, mode = "binary") {
  listOfValues <- lapply(listOfValues, as.integer)
  len  <- length(listOfValues)
  vec  <- unlist(listOfValues, use.names = FALSE)
  slvl <- seq(min(vec, na.rm = TRUE), max(vec, na.rm = TRUE))
  out  <- matrix(fill, nrow = len, ncol = length(slvl), dimnames = list(NULL, slvl))
  i.idx <- rep(seq_len(len), vapply(listOfValues, length, integer(1L)))
  j.idx <- match(vec, slvl)
  out[na.omit(cbind(i.idx, j.idx))] <- switch(mode, binary = 1L, value = na.omit(vec), 
                                     stop("'mode' must be 'binary' or 'value'"))
  out
}
NULL


#' Create a Binary Matrix from a List of Character Values
#' 
#' Create a binary matrix from a list of character values
#' 
#' This is primarily a helper function for the [concat.split()] function when 
#' creating the "expanded" structure. The input is anticipated to be a `list` of 
#' values obtained using [base::strsplit()].
#' 
#' @param listOfValues A `list` of input values to be inserted in a matrix.
#' @param fill The initializing fill value for the empty matrix.
#' @param mode Either `"binary"` or `"value"`. Defaults to `"binary"`.
#' @return A \code{matrix}.
#' @author Ananda Mahto
#' @seealso [base::strsplit()], [numMat()].
#' @examples
#' 
#' invec <- c("rock,electro","electro","rock,jazz")
#' A <- strsplit(invec, ",")
#' splitstackshape:::charMat(A)
#' splitstackshape:::charMat(A, 0)
#' splitstackshape:::charMat(A, mode = "value")
#' 
charMat <- function(listOfValues, fill = NA, mode = "binary") {
  len   <- length(listOfValues)
  vec   <- unlist(listOfValues, use.names = FALSE)
  lvl   <- sort(unique(vec))
  out   <- matrix(fill, nrow = len, ncol = length(lvl), 
                  dimnames = list(NULL, lvl))
  i.idx <- rep(seq.int(len), vapply(listOfValues, length, integer(1L)))
  j.idx <- match(vec, lvl)
  out[cbind(i.idx, j.idx)] <- switch(mode, binary = 1L, value = vec, 
                                     stop("'mode' must be 'binary' or 'value'"))
  out
}
NULL



#' Split Basic Alphanumeric Strings Which Have No Separators
#' 
#' Used to split strings like "Abc8" into "Abc" and "8".
#' 
#' 
#' @param data The vector of strings to be split.
#' @param charfirst Is the string constructed with characters at the start or
#' numbers? Defaults to `TRUE`.
#' @return A `data.frame` with two columns, `.var` and `.time_1`.
#' @note This is a helper function for the [Stacked()] and [Reshape()] functions.
#' @author Ananda Mahto
#' @seealso [base::strsplit()]
#' @examples
#' 
#' x <- paste0("Var", LETTERS[1:3], 1:3)
#' splitstackshape:::NoSep(x)
#' 
#' y <- paste0(1:3, "Var", LETTERS[1:3])
#' splitstackshape:::NoSep(y, charfirst = FALSE)
#' 
NoSep <- function(data, charfirst = TRUE) {
  if (isTRUE(charfirst)) {
    Pattern <- "([[:alpha:]]+)([[:digit:]]+)"
    Names <- c(".var", ".time_1")
  } else {
    Pattern <- "([[:digit:]]+)([[:alpha:]]+)"
    Names <- c(".time_1", ".var")
  }
  setNames(data.frame(gsub(Pattern, "\\1", data), 
                      gsub(Pattern, "\\2", data)), Names)
}
NULL



#' Convert All Factor Columns to Character Columns
#' 
#' Sometimes, we forget to use the `stringsAsFactors` argument when using
#' [utils::read.table()] and related functions. By default, R converts character 
#' columns to factors. Instead of re-reading the data, the `FacsToChars` 
#' function will identify which columns are currently factors, and convert them 
#' all to characters.
#' 
#' 
#' @param mydf The name of your `data.frame`
#' @author Ananda Mahto
#' @seealso [utils::read.table()]
#' @examples
#' 
#' ## Some example data
#' dat <- data.frame(title = c("title1", "title2", "title3"),
#'          author = c("author1", "author2", "author3"),
#'          customerID = c(1, 2, 1))
#' 
#' str(dat) # current structure
#' dat2 <- splitstackshape:::FacsToChars(dat)
#' str(dat2) # Your new object
#' str(dat)  # Original object is unaffected
#' 
FacsToChars <- function(mydf) {
  mydf[sapply(mydf, is.factor)] <- 
    lapply(mydf[sapply(mydf, is.factor)], as.character)
  mydf
}
NULL

trim <- function(x) gsub("^\\s+|\\s+$", "", x)
NULL

.collapseMe <- function(invec, atStart = TRUE) {
  if (isTRUE(atStart)) paste(sprintf("^%s", invec), collapse = "|")
  else paste(sprintf("%s$", invec), collapse = "|")
}
NULL

.stripWhite <- function(invec, delim = ",") {
  gsub("^\\s+|\\s+$", "",
       gsub(sprintf("\\s+[%s]\\s+|\\s+[%s]|[%s]\\s+",
                    delim, delim, delim), delim, invec))
}
NULL

vGrep <- Vectorize(grep, "pattern", SIMPLIFY = FALSE)
NULL

.noEmpty <- function(invec) {
  invec[invec != ""]
}
NULL

.pad <- function(invec) {
  nchars <- max(nchar(invec))
  sprintf(paste0("%0", nchars, "d"), invec)
}
NULL
mrdwab/splitstackshape documentation built on May 23, 2019, 7:16 a.m.