#' This function allows the user to create new variables derived from actual input or output data stored in a NONMEM object or a data.frame.
#' The function "bins" the data in a user-chosen manner. The new "binned" variable will be added to the "additionalVars" data frame of the object, or as a new column if the
#' object is a data.frame. The methods for creating the categorical variables are:
#' \itemize{
#' \item{unique:}{Variable is coerced into a factor with no "binning" at all}
#' \item{explicitcuts:}{The user supplies "cut points" for partitioning the data}
#' \item{range:}{The user specifies a number of cut points, and they are the generated by cutting the range of the values of the variable into the given number}
#' \item{logrange:}{As above, but done by range of the log of the variable}
#' \item{counts:}{User specifies number of cut points, and the bins are then generated so that the bins have equal amounts of data}
#' }
#' @title Add binned variable to NONMEM object
#' @param obj An object of class NMBasicModel, NMRun, NMSimModel, NMSimDataGen or data.frame
#' @param varName The variable to "bin"
#' @param newVar The name of the new variable
#' @param breaks The number of breaks
#' @param binType The type of "binning"
#' @param labels The labels for the "bins"
#' @param ... Additional arguments that apply to different classes. These are dataType which specifies what data to use, i.e input or
#' output, for NMBasicModel, NMRun, NMSimModel and NMSimDataGen, and problemNum which specifies the required run for an NMRun object
#' @note If the \code{binType} is \code{counts} and calculation of quantiles generates duplicates, a warning will be eliminated and the duplicates
#' will be discarded.
#' @return A new object with a user-specified "binned" variable added
#' @keywords datagen
#' @examples
#' x <- addDerivedCategorical(mtcars, "mpg", breaks = 6, binType = "counts" )
#' show(x$mpg.CUT)
#' @author Mango Solutions
addDerivedCategorical <- function(obj, varName, newVar = paste(varName, ".CUT", sep = ""), breaks = 5, binType = "range", labels = NULL, ...)
{
NULL
}
setGeneric("addDerivedCategorical")
addDerivedCategorical.NMBasicModel <- function(obj, varName, newVar = paste(varName, ".CUT", sep = ""), breaks = 5, binType = "range", labels = NULL, dataType = "output")
{
#Get input or output dataframe
df <- switch(dataType,
output = obj@outputData,
input = obj@inputData,
RNMImportStop(msg = "Data type must be input or output!\n"))
#Various logical conditions that must pass
if(class(df) == "list")
RNMImportStop(msg = "Slot output is a list!\n")
if(is.na(match(varName, names(df))))
RNMImportStop(msg = "Variable name not in data frame!\n")
if(!(is.numeric(df[[varName]])))
RNMImportStop(msg = "Variable not numeric!\n")
if(binType == "explicitcuts" && length(breaks) == 1)
RNMImportWarning(msg = "Breaks not specified for explicit cuts. Cutting by range.\n")
if(binType == "counts" && ((nrow(df) %% breaks) != 0))
RNMImportWarning(msg = "Data cannot be cut so that bins have equal amounts!\n")
#Perform cut on data
facData <- switch(binType,
unique = factor(df[[varName]]),
explicitcuts = cut(df[[varName]], breaks, labels, include.lowest = TRUE),
range = cut(df[[varName]], breaks, labels),
logrange = cut(log(df[[varName]]), breaks, labels),
counts = {
createQuants <- quantile(df[[varName]], seq(0, 1, len = breaks + 1))
if(any(duplicated(createQuants))) RNMImportWarning("Duplicated breakpoints found - taking uniques")
cut(df[[varName]], unique(createQuants), labels, include.lowest = TRUE)
},
RNMImportStop(msg = "Bin type not defined!\n"))
#Create column in data frame object
obj@additionalVars[[newVar]] <- facData
invisible(obj)
}
setMethod("addDerivedCategorical", signature(obj = "NMBasicModel"), addDerivedCategorical.NMBasicModel)
setMethod("addDerivedCategorical", signature(obj = "NMBasicModelNM7"), addDerivedCategorical.NMBasicModel)
addDerivedCategorical.NMRun <- function(obj, varName, newVar = paste(varName, ".CUT", sep = ""), breaks = 5, binType = "range", labels = NULL, problemNum = 1, dataType = "output")
{
specObj <- getProblem(obj, problemNum)
newObj <- addDerivedCategorical(specObj, varName, newVar, breaks, binType, labels, dataType)
invisible(newObj)
}
setMethod("addDerivedCategorical", signature(obj = "NMRun"), addDerivedCategorical.NMRun)
setOldClass("data.frame")
addDerivedCategorical.data.frame <- function(obj, varName, newVar = paste(varName, ".CUT", sep = ""), breaks = 5, binType = "range", labels = NULL)
{
#Various logical conditions that must pass
if(is.na(match(varName, names(obj))))
RNMImportStop(msg = "Variable name not in data frame!\n")
if(!(is.numeric(obj[[varName]])))
RNMImportStop(msg = "Variable not numeric!\n")
if(binType == "explicitcuts" && length(breaks) == 1)
RNMImportWarning(msg = "Breaks not specified for explicit cuts. Cutting by range.\n")
if(binType == "counts" && ((nrow(obj) %% breaks) != 0))
RNMImportWarning(msg = "Data cannot be cut so that bins have equal amounts!\n")
#Perform cut on data
facData <- switch(binType,
unique = factor(obj[[varName]]),
explicitcuts = cut(obj[[varName]], breaks, labels, include.lowest = TRUE),
range = cut(obj[[varName]], breaks, labels),
logrange = cut(log(obj[[varName]]), breaks, labels),
counts = {
createQuants <- quantile(obj[[varName]], seq(0, 1, len = breaks + 1))
if(any(duplicated(createQuants))) warning("Duplicated breakpoints found - taking uniques")
cut(obj[[varName]], unique(createQuants), labels, include.lowest = TRUE)
},
RNMImportStop(msg = "Bin type not defined!\n"))
#Create column in data frame object
obj[[newVar]] <- facData
invisible(obj)
}
setMethod("addDerivedCategorical", signature(obj = "data.frame"), addDerivedCategorical.data.frame)
addDerivedCategorical.NMSimModel <- function(obj, varName, newVar = paste(varName, ".CUT", sep = ""), breaks = 5, binType = "range", labels = NULL, dataType = "output")
{
#Get input or output dataframe
df <- switch(dataType,
output = obj@outputData,
input = obj@inputData,
RNMImportStop(msg = "Data type must be input or output!\n"))
#Various logical conditions that must pass
if(is.na(match(varName, names(df))))
RNMImportStop(msg = "Variable name not in data frame!\n")
if(!(is.numeric(df[[varName]])))
RNMImportStop(msg = "Variable not numeric!\n")
if(binType == "explicitcuts" && length(breaks) == 1)
RNMImportWarning(msg = "Breaks not specified for explicit cuts. Cutting by range.\n")
if(binType == "counts" && ((nrow(df) %% breaks) != 0))
RNMImportWarning(msg = "Data cannot be cut so that bins have equal amounts!\n")
#Perform cut on data
facData <- switch(binType,
unique = factor(df[[varName]]),
explicitcuts = cut(df[[varName]], breaks, labels, include.lowest = TRUE),
range = cut(df[[varName]], breaks, labels),
logrange = cut(log(df[[varName]]), breaks, labels),
counts = {
createQuants <- quantile(df[[varName]], seq(0, 1, len = breaks + 1))
if(any(duplicated(createQuants))) warning("Duplicated breakpoints found - taking uniques")
cut(df[[varName]], unique(createQuants), labels, include.lowest = TRUE)
},
RNMImportStop(msg = "Bin type not defined!\n"))
#Create column in data frame object
obj@additionalVars[[newVar]] <- facData
invisible(obj)
}
setMethod("addDerivedCategorical", signature(obj = "NMSimModel"), addDerivedCategorical.NMSimModel)
setMethod("addDerivedCategorical", signature(obj = "NMSimModelNM7"), addDerivedCategorical.NMSimModel)
addDerivedCategorical.NMSimDataGen <- function(obj, varName, newVar = paste(varName, ".CUT", sep = ""), breaks = 5, binType = "range", labels = NULL, dataType = "output")
{
#Get input or output dataframe
df <- switch(dataType,
output = obj@outputData,
input = obj@inputData,
RNMImportStop(msg = "Data type must be input or output!\n"))
#Various logical conditions that must pass
if(is.na(match(varName, names(df))))
RNMImportStop(msg = "Variable name not in data frame!\n")
if(!(is.numeric(df[[varName]])))
RNMImportStop(msg = "Variable not numeric!\n")
if(binType == "explicitcuts" && length(breaks) == 1)
RNMImportWarning(msg = "Breaks not specified for explicit cuts. Cutting by range.\n")
if(binType == "counts" && ((nrow(df) %% breaks) != 0))
RNMImportWarning(msg = "Data cannot be cut so that bins have equal amounts!\n")
#Perform cut on data
facData <- switch(binType,
unique = factor(df[[varName]]),
explicitcuts = cut(df[[varName]], breaks, labels, include.lowest = TRUE),
range = cut(df[[varName]], breaks, labels),
logrange = cut(log(df[[varName]]), breaks, labels),
counts = {
createQuants <- quantile(df[[varName]], seq(0, 1, len = breaks + 1))
if(any(duplicated(createQuants))) warning("Duplicated breakpoints found - taking uniques")
cut(df[[varName]], unique(createQuants), labels, include.lowest = TRUE)
},
RNMImportStop(msg = "Bin type not defined!\n"))
#Create column in data frame object
obj@additionalVars[[newVar]] <- facData
invisible(obj)
}
setMethod("addDerivedCategorical", signature(obj = "NMSimDataGen"), addDerivedCategorical.NMSimDataGen)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.