#' @title Transforms the Numeric Variables of a given Dataset based on a skewness criterion
#'
#' @description This function transforms numeric variables of a given data based on skeweness (and kurtosis criterion).
#' There are five transformation methods; square root, log and inverse.
#' Square root transformation transforms a numeric variable by takingthe square root of the variable.
#' Log transformation transforms a numeric variable by taking the log of the variable.
#' Power transformation transforms a numeric variable by taking a power p of the variable.
#' Without the skew bounds this function acts as apply(MARGIN = 2)
#'
#' @param dataset A dataset to be transformed, the dataset can have mixed types.
#'
#' @param method A charactor object denoting the method of transformation used.
#' One of two possible options; "log" or "power".
#'
#' @param skew_bound A vector length two representing the lower and upper skeweness bounds.
#' Default is NULL.
#'
#' @param p The power in association with the power transformations, default is NULL
#'
#' @return Outputs the transformed dataset as data frame.
#'
#' @import moments
#'
#' @export
#'
#' @seealso \code{\link{remove_variables}}, \code{\link{derive_variables}}, \code{\link{extract_variables}}, \code{\link{impute_variables}}, \code{\link{standardise_variables}}
#'
#' @examples
#' # Example Data
#' x1 <- rnorm(n = 60, mean = 50, sd = 10)
#' x2 <- rpois(n = 60, lambda = 50)
#' x3 <- sample(x = 1:10, size = 60, replace = TRUE)
#' x4 <- rep(x = c("yes", "no"), times = 30)
#' x5 <- rep(x = c("high", "medium", "low"), times = 20)
#' x6 <- sample(x = c("yes", "no"), size = 60, replace = TRUE)
#' # Save as a data frame
#' data <- as.data.frame(cbind(x1, x2, x3, x4, x5, x6))
#' # Transform the Numeric Variables
#' transformation_x(data)
#'
transform_variables <- function (dataset,
skew_bound = NULL,
method = c("log", "power"),
p = NULL)
{
#------------------------------------------------------------------------#
# When Skew_bounds != NULL #
#------------------------------------------------------------------------#
# Match the specified method argument with the possible options
method <- match.arg(method)
if(!is.null(skew_bound)){
for (i in 1:ncol(dataset)){
if (is.numeric(dataset[,i]) && skewness(dataset[,i], na.rm = T) >= min(skew_bound) && skewness(dataset[,i], na.rm = T) <= max(skew_bound)) {
if (method == "log") {
# Guarenteed to work for non-negative data
dataset[,i] <- dataset[,i] + 1
dataset[,i] <- log(dataset[,i])
} else if (method == "power") {
# Guarenteed to work for non-negative data
dataset[,i] <- dataset[,i] + 1
dataset[,i] <- (dataset[,i])^p
}
}
}
#-----------------------------------------------------------------------#
# When Skew_bounds = NULL #
#-----------------------------------------------------------------------#
} else if(is.null(skew_bound)){
for (i in 1:ncol(dataset)){
if (is.numeric(dataset[,i])) {
if (method == "log") {
# Guarenteed to work for non-negative data
dataset[,i] <- dataset[,i] + 1
dataset[,i] <- log(dataset[,i])
} else if (method == "power") {
# Guarenteed to work for non-negative data
dataset[,i] <- dataset[,i] + 1
dataset[,i] <- (dataset[,i])^p
}
}
}
}
transformed_data <- dataset
return(transformed_data)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.