#' Scale a data frame or matrix
#'
#' @description an improvement of the base R scale function. Unlike R's standard scale function, this
#' allows for factor columns or character columns to be present in the data, and will simply
#' leave those untouched without throwing an error.
#' @param data a data frame or vector
#' @param wts weights for each row. optional and defaults to NULL.
#' @param center should the data be centered? Defaults to TRUE.
#' @param scale should the data be scaled? Defaults to TRUE.
#' @export
#' @return A data frame
#' @examples
#' Scale(data)
#'
Scale = function (data, wts = NULL, center = TRUE, scale = TRUE) {
if (isTRUE(is.vector(data))) {
Vector = "YES"
data = cbind.data.frame(x = data)
} else {
Vector = "NO"
data = as.data.frame(data)
}
if (is.null(wts)){
Scalefun = function(x) {
Mean = mean(x)
Sd = sd(x)
x = x - Mean
x = x/Sd
return(x)
}
ind <- sapply(data, is.numeric)
scaled.data = data
scaled.data[ind] <- lapply(scaled.data[ind], Scalefun)
if (!scale){
Sds = apply(data[ind], 2, sd)
scaled.data[ind] <- scaled.data[ind] * Sds
}
if (!center){
Means = apply(data[ind], 2, mean)
scaled.data[ind] <- scaled.data[ind] + Means
}
}
else if (!is.null(wts)){
wScalefun = function(x, w) {
if (length(unique(x)) == 1){
return(x)
} else {
w = w / sum(w)
h1 = 1/(1 - sum(w * w))
Mean = sum(x * w)
Sd = sqrt(h1 * (sum(w * x^2) - Mean^2))
x = x - Mean
x = x/Sd
return(x)
}
}
ind <- sapply(data, is.numeric)
scaled.data = data
scaled.data[ind] <- lapply(scaled.data[ind], function(x) wScalefun(x, wts))
if (!scale){
w = wts / sum(wts)
h1 = 1/(1 - sum(w * w))
Means = colSums(scaled.data[ind] * w)
Sds = sqrt(h1 * (colSums(w * scaled.data[ind]^2) - Means^2))
scaled.data[ind] <- scaled.data[ind] * Sds
}
if (!center && !scale){
scaled.data[ind] <- scaled.data[ind] + Means
}
if (!center && scale){
w = w / sum(w)
h1 = 1/(1 - sum(w * w))
Means = colSums(scaled.data[ind] * w)
scaled.data[ind] <- scaled.data[ind] + Means
}
}
if (Vector=="YES") {
return(as.vector(as.matrix(scaled.data)))
}
else {
return(as.data.frame(scaled.data))
}
}
#' Apply Yeo-Johnson transformation to data frame or matrix
#'
#' @description This will apply the bestNormalize package's yeo-johnson transform function to the numeric
#' columns of a data frame. It leaves factor variables alone. The returned numeric variables are also centered
#' to zero and unit scaled.
#' @param data a data frame or vector
#' @export
#' @return A data frame
#' @examples
#' yjScale(data)
#'
yjScale = function(data){
if (isTRUE(is.vector(data))) {
Vector = "YES"
data = cbind.data.frame(x = data)
} else {
Vector = "NO"
data = as.data.frame(data)
}
Scalefun = function(x) {
bestNormalize::yeojohnson(x, standardize = TRUE)$x.t
}
ind <- sapply(data, is.numeric)
scaled.data = data
scaled.data[ind] <- lapply(scaled.data[ind], Scalefun)
if (Vector=="YES") {
return(as.vector(as.matrix(scaled.data)))
}
else {
return(as.data.frame(scaled.data))
}
}
#' Scale a data frame or matrix with custom scaling and centering functions
#'
#' @description an improvement of the base R scale function. Unlike R's standard scale function, this
#' allows for factor columns or character columns to be present in the data, and will simply
#' leave those untouched without throwing an error. This function takes a center function and
#' scale function, allowing the user to customize the type of scaling. However, each must be a univariate
#' function. For example, 'colMeans' will not work, but 'mean' will.
#'
#' @param data a data frame or vector
#' @param center a function for estimating the center of each column. defaults to mean.
#' @param scale a function for estimating the scale of each column. defaults to sd.
#' @export
#' @return A data frame
#' @examples
#' Scale2(data, median, mad)
#'
Scale2 = function (data, center = mean, scale = sd) {
if (isTRUE(is.vector(data))) {
Vector = "YES"
data = cbind.data.frame(x = data)
} else {
Vector = "NO"
data = as.data.frame(data)
}
if (is.function(center)){
centerfun <- center
}
if (!is.function(center) && !is.null(center)){
stop("center must be a function or NULL")
}
if (is.function(scale)){
scalefun <- scale
}
if (!is.function(scale) && !is.null(scale)){
stop("scale must be a function or NULL")
}
if (is.null(scale)){
make.scalefun <- function(centerfun){
function(x) {
Mean = centerfun(x)
x = x - Mean
return(x)
}
}
Scalefun <- make.scalefun(centerfun)
}
else if (is.null(center)){
make.scalefun <- function(scalefun){
function(x) {
Sd = scalefun(x)
x = x/Sd
return(x)
}
}
Scalefun <- make.scalefun(scalefun)
}
else{
make.scalefun <- function(centerfun, scalefun){
function(x) {
Mean = centerfun(x)
Sd = scalefun(x)
x = x - Mean
x = x/Sd
return(x)
}
}
Scalefun <- make.scalefun(centerfun, scalefun)
}
ind <- sapply(data, is.numeric)
scaled.data = data
scaled.data[ind] <- lapply(scaled.data[ind], Scalefun)
if (Vector=="YES") {
return(as.vector(as.matrix(scaled.data)))
}
else {
return(as.data.frame(scaled.data))
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.