R/operators.R

Defines functions dvf simp mod pf cf convert_unix insPack

Documented in cf convert_unix dvf insPack mod pf simp

#' Divide and Format
#'
#' @description dvf is a shorthand for division and formatting with specified number of decimal points
#'
#' @param a numerator
#' @param b denominator
#' @param decimals number of decimal points to display
#'
#' @return formatted a/b computation. Division by zero yields Inf,
#'     division by Inf yields zero
#'
#' @export
#'
#' @seealso \code{round} in base package
#'
#' @examples
#' dvf(1,3)
#' dvf(1,3,decimals = 1)
#' dvf(1,3,decimals = 8)
dvf <- function(a, b, decimals = 3) round(a/b, decimals)


#' Simple Imputation
#'
#' @description simp is a useful function to quickly impute missing values with naive methods.
#'
#' @param x vector
#' @param method method of imputation, possible values are "mean", "median", "zero" or "flag".
#'
#' @export
#'
#' @seealso to apply more sophisticated imputations see packages mice, missForest, Hmisc, mi and norm
#'
#' @examples
#' # impute airquaity ozone data
#' anyNA(airquality$Ozone)
#' ozone <- simp(airquality$Ozone)
#' anyNA(ozone)
#'
#' # impute the entire airquality dataset with mean (default method)
#' airquality2 <- apply(airquality, 2, simp)
#' anyNA(airquality2)
#'
#' # impute with the median
#' airquality3 <- apply(airquality, 2, function(x) simp(x, "median"))
#' anyNA(airquality3)
simp <- function(x, method = 'mean') {
    switch(method,
           mean   =  x[is.na(x)] <- mean(x, na.rm = TRUE),
           median =  x[is.na(x)] <- stats::median(x, na.rm = TRUE),
           flag   =  x[is.na(x)] <- "NA",
           zero   =  x[is.na(x)] <- 0)
    return(x)
  }


#' Mode
#'
#' @description find the most frequently occurring value in a vector.
#'    If the vector contains more than a single mode, then the first one to encounter is returned.
#'
#' @param x vector, not limited to numerical.
#'
#' @return the mode
#'
#' @export
#'
#' @examples
#' table(mtcars$cyl)
#' mod(mtcars$cyl)
#' mod(c("John", "Kevin", "Terry", "John", "Sven"))
mod <- function(x) {
  ux <- unique(x)
  ux[which.max(tabulate(match(x, ux)))]
}


#' Percent Format
#'
#' @description shorthand for scales::percent_format, formats fraction to percent.
#'
#' @return percent formatted vector
#'
#' @param x numeric value or numeric vector
#'
#' @export
#'
#' @seealso \code{percent} in scales package
#'
#' @examples
#' # useful for evaluation metrics
#' accuracy = 92/100
#' recall = 45/51
#' pf(accuracy)
#' pf(recall)
#'
#' # can be applied to vectors
#' pf(seq(0.1, 0.2, 0.05))
pf <- function(x) {

  # make sure there are only finite numbers
  for (val in x)
    if (is.infinite(val)| is.nan(val))
      stop("Infinite/NaN value in input")

  scales::percent(x)
}



#' Comma Format
#'
#' @description shorthand for scales::comma, print value in comma format
#'
#' @return comma formatted vector
#'
#' @param x numeric value or numeric vector
#'
#' @export
#'
#' @seealso \code{comma} in scales package
#'
#' @examples
#' # useful for evaluation metrics
#' big_num = 1000000L
#' cf(big_num)
#'
#' # vectorized version
#' big_vec = seq(1e+10, 5e+10, 1e+10)
#' cf(big_num)
cf <- function(x) {

  # make sure there are only finite numbers
  for (val in x)
    if (is.infinite(val)| is.nan(val))
      stop("Infinite/NaN value in input")
  scales::comma(x)

}


#' Unix to datetime conversion
#'
#' @param x numeric vector of unix formatted dates
#' @param ms unix time includes miliseconds?
#' @param timezone default is UTC
#'
#' @return vector of datetimes
#'
#' @export
#'
#' @examples
#' # useful for evaluation metrics
#' unix_origin <- 0L
#' convert_unix(unix_origin)
#'
#' # 2 days after origin, disregarding miliseconds
#' two_days <-(2 * 24 * 60 * 60)
#' convert_unix(two_days, ms = FALSE)
convert_unix <- function(x , ms = TRUE, timezone = "UTC") {

  # do not proceed unless timestamp is numerical
  stopifnot(is.numeric(x))

  # use miliseconds
  mil <- if (ms == TRUE) 1000 else 1

  # convert timestamp to dtm
  as.POSIXct(x / mil, origin = "1970-01-01", tz = timezone)
}


#' Safely install a package
#'
#' @param pkg package name
#'
#' @return installs package if necessary
#' @export
#'
#' @examples
#' # package magrittr installation
#' insPack("magrittr")
#'
#' # existing packages will not be installed
#' insPack("base")
insPack <- function(pkg) {
  if (!pkg %in% installed.packages()) {
    print(paste("Installing", pkg))
    install.packages(pkg)
  } else {
    print(paste(pkg, "already installed!"))
  }
}
ShaulAb/typeless documentation built on May 28, 2019, 3:15 p.m.