R/scale.R

Defines functions yjScale

Documented in yjScale

#' Scale a data frame or matrix
#'
#' @description an improvement of the base R scale function. Unlike R's standard scale function, this
#' allows for factor columns or character columns to be present in the data, and will simply
#' leave those untouched without throwing an error.
#' @param data a data frame or vector
#' @param wts weights for each row. optional and defaults to NULL.
#' @param center should the data be centered? Defaults to TRUE.
#' @param scale should the data be scaled? Defaults to TRUE.
#' @export
#' @return A data frame
#' @examples
#' Scale(data)
#'
Scale = function (data, wts = NULL, center = TRUE, scale = TRUE) {

  if (isTRUE(is.vector(data))) {
    Vector = "YES"
    data = cbind.data.frame(x = data)
  } else {
    Vector = "NO"
    data = as.data.frame(data)
  }


  if (is.null(wts)){
    Scalefun = function(x) {
      Mean = mean(x)
      Sd = sd(x)
      x = x - Mean
      x = x/Sd
      return(x)
    }

    ind <- sapply(data, is.numeric)
    scaled.data = data
    scaled.data[ind] <- lapply(scaled.data[ind], Scalefun)

    if (!scale){
      Sds = apply(data[ind], 2, sd)
      scaled.data[ind] <- scaled.data[ind] * Sds
    }

    if (!center){
      Means = apply(data[ind], 2, mean)
      scaled.data[ind] <- scaled.data[ind] + Means
    }
  }
  else if (!is.null(wts)){

      wScalefun = function(x, w) {

        if (length(unique(x)) == 1){
          return(x)
        } else {
          w = w / sum(w)
          h1 = 1/(1 - sum(w * w))
          Mean = sum(x * w)
          Sd = sqrt(h1 * (sum(w * x^2) - Mean^2))
          x = x - Mean
          x = x/Sd
          return(x)
        }
      }

      ind <- sapply(data, is.numeric)
      scaled.data = data
      scaled.data[ind] <- lapply(scaled.data[ind], function(x) wScalefun(x, wts))

      if (!scale){
        w = wts / sum(wts)
        h1 = 1/(1 - sum(w * w))
        Means = colSums(scaled.data[ind] * w)
        Sds = sqrt(h1 * (colSums(w * scaled.data[ind]^2) - Means^2))
        scaled.data[ind] <- scaled.data[ind] * Sds
      }

      if (!center && !scale){
        scaled.data[ind] <- scaled.data[ind] + Means
      }

      if (!center && scale){
        w = w / sum(w)
        h1 = 1/(1 - sum(w * w))
        Means = colSums(scaled.data[ind] * w)
        scaled.data[ind] <- scaled.data[ind] + Means
      }

    }

    if (Vector=="YES") {
      return(as.vector(as.matrix(scaled.data)))
    }
    else {
      return(as.data.frame(scaled.data))
    }
}



#' Apply Yeo-Johnson transformation to data frame or matrix
#'
#' @description This will apply the bestNormalize package's yeo-johnson transform function to the numeric
#' columns of a data frame. It leaves factor variables alone. The returned numeric variables are also centered
#' to zero and unit scaled.
#' @param data a data frame or vector
#' @export
#' @return A data frame
#' @examples
#' yjScale(data)
#'
yjScale = function(data){

  if (isTRUE(is.vector(data))) {
    Vector = "YES"
    data = cbind.data.frame(x = data)
  } else {
    Vector = "NO"
    data = as.data.frame(data)
  }


  Scalefun = function(x) {
    bestNormalize::yeojohnson(x, standardize = TRUE)$x.t
  }

  ind <- sapply(data, is.numeric)
  scaled.data = data
  scaled.data[ind] <- lapply(scaled.data[ind], Scalefun)

  if (Vector=="YES") {
    return(as.vector(as.matrix(scaled.data)))
  }
  else {
    return(as.data.frame(scaled.data))
  }
}




#' Scale a data frame or matrix with custom scaling and centering functions
#'
#' @description an improvement of the base R scale function. Unlike R's standard scale function, this
#' allows for factor columns or character columns to be present in the data, and will simply
#' leave those untouched without throwing an error. This function takes a center function and
#' scale function, allowing the user to customize the type of scaling. However, each must be a univariate
#' function. For example, 'colMeans' will not work, but 'mean' will.
#'
#' @param data a data frame or vector
#' @param center a function for estimating the center of each column. defaults to mean.
#' @param scale a function for estimating the scale of each column. defaults to sd.
#' @export
#' @return A data frame
#' @examples
#' Scale2(data, median, mad)
#'
Scale2 = function (data, center = mean, scale = sd) {

  if (isTRUE(is.vector(data))) {
    Vector = "YES"
    data = cbind.data.frame(x = data)
  } else {
    Vector = "NO"
    data = as.data.frame(data)
  }

    if (is.function(center)){
      centerfun <- center
    }
    if (!is.function(center) && !is.null(center)){
      stop("center must be a function or NULL")
    }

    if (is.function(scale)){
      scalefun <- scale
    }
    if (!is.function(scale) && !is.null(scale)){
      stop("scale must be a function or NULL")
    }

    if (is.null(scale)){
      make.scalefun <- function(centerfun){
        function(x) {
          Mean = centerfun(x)
          x = x - Mean
          return(x)
        }
      }
      Scalefun <- make.scalefun(centerfun)
    }
    else if (is.null(center)){
      make.scalefun <- function(scalefun){
        function(x) {
          Sd = scalefun(x)
          x = x/Sd
          return(x)
        }
      }
      Scalefun <- make.scalefun(scalefun)
    }
    else{
      make.scalefun <- function(centerfun, scalefun){
        function(x) {
          Mean = centerfun(x)
          Sd = scalefun(x)
          x = x - Mean
          x = x/Sd
          return(x)
        }
      }
      Scalefun <- make.scalefun(centerfun, scalefun)
    }
    ind <- sapply(data, is.numeric)
    scaled.data = data
    scaled.data[ind] <- lapply(scaled.data[ind], Scalefun)

    if (Vector=="YES") {
      return(as.vector(as.matrix(scaled.data)))
    }
    else {
      return(as.data.frame(scaled.data))
    }
  }
abnormally-distributed/cvreg documentation built on May 3, 2020, 3:45 p.m.