#' @title Standardise the Numeric Variables of a given Dataset
#'
#' @description This function standardises numeric variables of a given data.
#' There are three methods; range standardisation, normalisation standardisation, and median absolute deviation standardisation.
#' Range standardisation standardises the numeric variables to a specified range, the default is [0, 1].
#' Normalisation standardisation standardises the numeric variables to have mean 0 and standard deviation 1.
#' Median Absolute Devation standardisation standardises the numeric variables to have median 0 and median absolute deviation 1.
#'
#' @param dataset A dataset to be standardised, the dataset can have mixed types.
#'
#' @param method A charactor object denoting the method of standardisation used.
#' One of three possible options; "range", "norm", "MAD".
#'
#' @param lower_bound The lower bound of the range standardisation, default is 0.
#'
#' @param upper_bound The upper bound of the range standardisation, default is 1.
#'
#' @param file_name A character object indicating the file name when saving the data frame.
#' The default is NULL.
#' The name must include the .csv suffixs.
#'
#' @param directory A character object specifying the directory where the data frame is to be saved as a .csv file.
#'
#' @return Outputs the standardised dataset as data frame.
#'
#' @export
#'
#' @seealso \code{\link{remove_variables}}, \code{\link{derive_variables}}, \code{\link{extract_variables}}, \code{\link{impute_variables}}, \code{\link{transform_variables}}
#'
#' @examples
#' # Example Data
#' x1 <- rnorm(n = 60, mean = 50, sd = 10)
#' x2 <- rpois(n = 60, lambda = 50)
#' x3 <- sample(x = 1:10, size = 60, replace = TRUE)
#' # Standardise the Numeric Variables
#' standardise_x(dataset = x1, method = "range")
#' standardise_x(dataset = iris, method = "range", lower_bound = 10, upper_bound = 100)
#' standardise_x(dataset = x2, method = "norm")
#' standardise_x(dataset = x3, method = "MAD")
#'
standardise_variables <- function(dataset,
method = c("range", "norm", "MAD"),
lower_bound = 0,
upper_bound = 1,
file_name = NULL,
directory = NULL)
{
#-------------------------------------------------------------------------------#
# If dataset is a data frame #
#-------------------------------------------------------------------------------#
if(is.data.frame(x = dataset)){
# Make sure the datset is converted to a data frame
dataset <- as.data.frame(x = dataset)
# Create the standardised dataset
standardised_dataset <- dataset
# Match the specified method argument with the possible options
method <- match.arg(method)
if(method == "range"){
for (i in 1:ncol(dataset)) {
if (is.numeric(dataset[,i])){
# Define the range standardising function
standardise_range <- function(vector, lower_bound, upper_bound){
standardised_vector <- ((((vector - min(vector)) / (max(vector) - min(vector))) * (upper_bound - lower_bound)) + lower_bound)
return(standardised_vector)
}
# Apply the range standardising function to the dataset
standardised_dataset[,i] <- standardise_range(vector = dataset[,i], lower_bound = lower_bound, upper_bound = upper_bound)
}
}
} else if(method == "norm"){
for (i in 1:ncol(dataset)) {
if (is.numeric(dataset[,i])){
# Define the normalisation standardising function
standardise_norm <- function(vector){
standardised_vector <- ((vector- mean(vector)) / sd(vector))
return(standardised_vector)
}
# Apply the range standardising function to the dataset
standardised_dataset[,i] <- standardise_norm(vector = dataset[,i])
}
}
} else if (method == "MAD"){
for (i in 1:ncol(dataset)) {
if (is.numeric(dataset[,i])){
# Define the normalisation standardising function
standardise_MAD <- function(vector){
standardised_vector <- ((vector - median(vector, na.rm = TRUE)) / median(abs(vector - median(vector, na.rm = TRUE)), na.rm = TRUE))
return(standardised_vector)
}
# Apply the range standardising function to the dataset
standardised_dataset[,i] <- standardise_MAD(vector = dataset[,i])
}
}
}
#-------------------------------------------------------------------------------#
# If dataset is a vector #
#-------------------------------------------------------------------------------#
} else if(is.vector(dataset)){
# Match the specified method argument with the possible options
method <- match.arg(method)
if(method == "range"){
standardised_dataset <- ((((dataset - min(dataset)) / (max(dataset) - min(dataset))) * (upper_bound - lower_bound)) + lower_bound)
} else if(method == "norm") {
standardised_dataset <- ((dataset- mean(dataset)) / sd(dataset))
} else if(method == "MAD") {
standardised_dataset <- ((dataset - median(dataset, na.rm = T)) / median(abs(dataset - median(dataset, na.rm = T))))
}
}
if(!is.null(directory)) {
write.csv(x = standardised_dataset,
file = paste(directory, "/", file_name, sep = ""),
row.names = F)
}
# return the standardised dataset
return(standardised_dataset)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.