R/createCategorical.R

#' This function allows the user to create new variables derived from actual input or output data stored in a NONMEM object or a data.frame.
#' The function "bins" the data in a user-chosen manner.  The new "binned" variable will be added to the "additionalVars" data frame of the object, or as a new column if the 
#' object is a data.frame. The methods for creating the categorical variables are:
#' \itemize{
#'  \item{unique:}{Variable is coerced into a factor with no "binning" at all}
#'  \item{explicitcuts:}{The user supplies "cut points" for partitioning the data}
#'  \item{range:}{The user specifies a number of cut points, and they are the generated by cutting the range of the values of the variable into the given number}
#'  \item{logrange:}{As above, but done by range of the log of the variable}
#'  \item{counts:}{User specifies number of cut points, and the bins are then generated so that the bins have equal amounts of data}
#' }
#' @title Add binned variable to NONMEM object 
#' @param obj An object of class NMBasicModel, NMRun, NMSimModel, NMSimDataGen or data.frame
#' @param varName The variable to "bin"
#' @param newVar The name of the new variable
#' @param breaks The number of breaks
#' @param binType The type of "binning"
#' @param labels The labels for the "bins"
#' @param ... Additional arguments that apply to different classes. These are dataType which specifies what data to use, i.e input or 
#'   		  output, for NMBasicModel, NMRun, NMSimModel and NMSimDataGen, and problemNum which specifies the required run for an NMRun object
#' @note If the \code{binType} is \code{counts} and calculation of quantiles generates duplicates, a warning will be eliminated and the duplicates
#' will be discarded.
#' @return A new object with a user-specified "binned" variable added
#' @keywords datagen
#' @examples
#' x <- addDerivedCategorical(mtcars, "mpg", breaks = 6, binType = "counts" )
#' show(x$mpg.CUT)
#' @author Mango Solutions


addDerivedCategorical <- function(obj, varName, newVar = paste(varName, ".CUT", sep = ""), breaks = 5, binType = "range", labels = NULL, ...)
{
	NULL
}
setGeneric("addDerivedCategorical")


addDerivedCategorical.NMBasicModel <- function(obj, varName, newVar = paste(varName, ".CUT", sep = ""), breaks = 5, binType = "range", labels = NULL, dataType = "output")
{
	#Get input or output dataframe
	df <- switch(dataType, 
			output = obj@outputData, 
			input = obj@inputData,
			RNMImportStop(msg = "Data type must be input or output!\n"))
	
	#Various logical conditions that must pass
	if(class(df) == "list")
		RNMImportStop(msg = "Slot output is a list!\n")
	if(is.na(match(varName, names(df))))
		RNMImportStop(msg = "Variable name not in data frame!\n")
	if(!(is.numeric(df[[varName]])))
		RNMImportStop(msg = "Variable not numeric!\n")
	if(binType == "explicitcuts" && length(breaks) == 1)
		RNMImportWarning(msg = "Breaks not specified for explicit cuts. Cutting by range.\n")
	if(binType == "counts" && ((nrow(df) %% breaks) != 0))
		RNMImportWarning(msg = "Data cannot be cut so that bins have equal amounts!\n")
	
	#Perform cut on data
	facData <- switch(binType, 		
			   unique = factor(df[[varName]]), 
			   explicitcuts = cut(df[[varName]], breaks, labels, include.lowest = TRUE),
			   range = cut(df[[varName]], breaks, labels),	
			   logrange = cut(log(df[[varName]]), breaks, labels),
			   counts = {
				   createQuants <- quantile(df[[varName]], seq(0, 1, len = breaks + 1))
				   if(any(duplicated(createQuants))) RNMImportWarning("Duplicated breakpoints found - taking uniques")
				   cut(df[[varName]], unique(createQuants), labels, include.lowest = TRUE) 
			   },
	 		   RNMImportStop(msg = "Bin type not defined!\n"))
	#Create column in data frame object
	obj@additionalVars[[newVar]] <- facData
	invisible(obj)
}
setMethod("addDerivedCategorical", signature(obj = "NMBasicModel"), addDerivedCategorical.NMBasicModel)
setMethod("addDerivedCategorical", signature(obj = "NMBasicModelNM7"), addDerivedCategorical.NMBasicModel)

addDerivedCategorical.NMRun <- function(obj, varName, newVar = paste(varName, ".CUT", sep = ""), breaks = 5, binType = "range", labels = NULL, problemNum = 1, dataType = "output")
{
	specObj <- getProblem(obj, problemNum)
	newObj <- addDerivedCategorical(specObj, varName, newVar, breaks, binType, labels, dataType)
	invisible(newObj)
}
setMethod("addDerivedCategorical", signature(obj = "NMRun"), addDerivedCategorical.NMRun)

setOldClass("data.frame")
addDerivedCategorical.data.frame <- function(obj, varName, newVar = paste(varName, ".CUT", sep = ""), breaks = 5, binType = "range", labels = NULL)
{
	#Various logical conditions that must pass
	if(is.na(match(varName, names(obj))))
		RNMImportStop(msg = "Variable name not in data frame!\n")
	if(!(is.numeric(obj[[varName]])))
		RNMImportStop(msg = "Variable not numeric!\n")  
	if(binType == "explicitcuts" && length(breaks) == 1)
		RNMImportWarning(msg = "Breaks not specified for explicit cuts. Cutting by range.\n")
	if(binType == "counts" && ((nrow(obj) %% breaks) != 0))
		RNMImportWarning(msg = "Data cannot be cut so that bins have equal amounts!\n")
	
	#Perform cut on data
	facData <- switch(binType, 		
			unique = factor(obj[[varName]]), 
			explicitcuts = cut(obj[[varName]], breaks, labels, include.lowest = TRUE),
			range = cut(obj[[varName]], breaks, labels),	
			logrange = cut(log(obj[[varName]]), breaks, labels),
			counts = {
				createQuants <- quantile(obj[[varName]], seq(0, 1, len = breaks + 1))
				if(any(duplicated(createQuants))) warning("Duplicated breakpoints found - taking uniques")
				cut(obj[[varName]], unique(createQuants), labels, include.lowest = TRUE) 
			},
			RNMImportStop(msg = "Bin type not defined!\n"))
	#Create column in data frame object
	obj[[newVar]] <- facData
	invisible(obj)
}
setMethod("addDerivedCategorical", signature(obj = "data.frame"), addDerivedCategorical.data.frame)

addDerivedCategorical.NMSimModel <- function(obj, varName, newVar = paste(varName, ".CUT", sep = ""), breaks = 5, binType = "range", labels = NULL, dataType = "output")
{
	#Get input or output dataframe
	df <- switch(dataType, 
			output = obj@outputData, 
			input = obj@inputData,
			RNMImportStop(msg = "Data type must be input or output!\n"))
	#Various logical conditions that must pass
	if(is.na(match(varName, names(df))))
		RNMImportStop(msg = "Variable name not in data frame!\n")
	if(!(is.numeric(df[[varName]])))
		RNMImportStop(msg = "Variable not numeric!\n")  
	if(binType == "explicitcuts" && length(breaks) == 1)
		RNMImportWarning(msg = "Breaks not specified for explicit cuts. Cutting by range.\n")
	if(binType == "counts" && ((nrow(df) %% breaks) != 0))
		RNMImportWarning(msg = "Data cannot be cut so that bins have equal amounts!\n")
	
	#Perform cut on data
	facData <- switch(binType, 		
			unique = factor(df[[varName]]), 
			explicitcuts = cut(df[[varName]], breaks, labels, include.lowest = TRUE),
			range = cut(df[[varName]], breaks, labels),	
			logrange = cut(log(df[[varName]]), breaks, labels),
			counts = {
				createQuants <- quantile(df[[varName]], seq(0, 1, len = breaks + 1))
				if(any(duplicated(createQuants))) warning("Duplicated breakpoints found - taking uniques")
				cut(df[[varName]], unique(createQuants), labels, include.lowest = TRUE) 
			},
			RNMImportStop(msg = "Bin type not defined!\n"))
	#Create column in data frame object
	obj@additionalVars[[newVar]] <- facData
	invisible(obj)
}
setMethod("addDerivedCategorical", signature(obj = "NMSimModel"), addDerivedCategorical.NMSimModel)
setMethod("addDerivedCategorical", signature(obj = "NMSimModelNM7"), addDerivedCategorical.NMSimModel)

addDerivedCategorical.NMSimDataGen <- function(obj, varName, newVar = paste(varName, ".CUT", sep = ""), breaks = 5, binType = "range", labels = NULL, dataType = "output")
{
	#Get input or output dataframe
	df <- switch(dataType, 
			output = obj@outputData, 
			input = obj@inputData,
			RNMImportStop(msg = "Data type must be input or output!\n"))
	
	#Various logical conditions that must pass
	if(is.na(match(varName, names(df))))
		RNMImportStop(msg = "Variable name not in data frame!\n")
	if(!(is.numeric(df[[varName]])))
		RNMImportStop(msg = "Variable not numeric!\n")  
	if(binType == "explicitcuts" && length(breaks) == 1)
		RNMImportWarning(msg = "Breaks not specified for explicit cuts. Cutting by range.\n")
	if(binType == "counts" && ((nrow(df) %% breaks) != 0))
		RNMImportWarning(msg = "Data cannot be cut so that bins have equal amounts!\n")
	
	#Perform cut on data
	facData <- switch(binType, 		
			unique = factor(df[[varName]]), 
			explicitcuts = cut(df[[varName]], breaks, labels, include.lowest = TRUE),
			range = cut(df[[varName]], breaks, labels),	
			logrange = cut(log(df[[varName]]), breaks, labels),
			counts = {
				createQuants <- quantile(df[[varName]], seq(0, 1, len = breaks + 1))
				if(any(duplicated(createQuants))) warning("Duplicated breakpoints found - taking uniques")
				cut(df[[varName]], unique(createQuants), labels, include.lowest = TRUE) 
			},
			RNMImportStop(msg = "Bin type not defined!\n"))
	#Create column in data frame object
	obj@additionalVars[[newVar]] <- facData
	invisible(obj)
}
setMethod("addDerivedCategorical", signature(obj = "NMSimDataGen"), addDerivedCategorical.NMSimDataGen)
MangoTheCat/RNMImport documentation built on May 8, 2019, 4:36 p.m.