Nothing
#' ODT Internal Functions
#'
#' Internal functions used by `trainTree` in the various
#' steps of the algorithm. These functions are not intended for
#' direct use by end users and are primarily for internal logic
#' and calculations within the package.
#'
#' @keywords internal
#' @name InternalFunctions
#' @return A list of internal outputs generated by these functions.
#'
#' @import matrixStats
#' @import partykit
#'
NULL
#' @rdname InternalFunctions
#' @title Get Optimal Treatment
#' @name getTreatment
#' @description This internal function calculates the optimal treatment based on patient sensitivity data.
#' @param PatientSensitivity A matrix representing drug response values (e.g., IC50), where rows correspond to patients/samples and columns correspond to drugs.
#' @param weights A numeric vector indicating which samples are considered; only samples with weights equal to 1 are included in the analysis.
#' @return An integer indicating the index of the optimal treatment based on the minimum sum of sensitivity values.
#'
#' @keywords internal
getTreatment <- function(PatientSensitivity, weights) {
# Filter PatientSensitivity based on weights, retaining only samples marked with weight 1
PatientSensitivity <- PatientSensitivity[weights == 1, , drop = FALSE]
# Calculate the sum of treatment responses across samples
sumtreat <- colSums2(PatientSensitivity)
# Identify the index of the treatment with the minimum sum of responses
biomk <- which.min(sumtreat)
names(biomk) <- colnames(PatientSensitivity)[biomk] # Assign name to the optimal treatment index
return(biomk) # Return the index of the optimal treatment
}
#' @rdname InternalFunctions
#' @title Get Split Information
#' @description This internal function calculates the optimal split point based on a specified gene in the expression data.
#' @param gene The index of the gene used for splitting the data.
#' @param PatientData A matrix representing patient features, where rows correspond to patients/samples and columns correspond to genes/features.
#' @param PatientSensitivity A matrix representing drug response values (e.g., IC50), where rows correspond to patients/samples and columns correspond to drugs.
#' @return A list containing:
#' - `sumic50`: The minimum summed IC50 values for the two groups.
#' - `Treatment1`: The treatment associated with the first group.
#' - `Treatment2`: The treatment associated with the second group.
#' - `split`: The index of the optimal split point.
#' - `expressionSplit`: The expression value at the split point adjusted by a small epsilon.
#'
#' @keywords internal
getSplit <- function(gene, PatientData, PatientSensitivity) {
# Order samples based on the specified gene for ascending and descending order
I <- order(PatientData[, gene])
Id <- order(PatientData[, gene], decreasing = TRUE)
# Calculate cumulative sums of PatientSensitivity for both orderings
M1 <- rbind(0, colCumsums(PatientSensitivity[I, ]))
M2 <- rbind(0, colCumsums(PatientSensitivity[Id, ]))
M2 <- M2[c(nrow(M1):1), , drop = FALSE]
# Determine the optimal split point by minimizing the sum of row minimums
th <- which.min(rowMins(M1) + rowMins(M2))
sumic50 <- min(rowMins(M1) + rowMins(M2))
# Determine the treatments for the two groups based on the split point
Treatment1 = max.col(-M1)[th]
names(Treatment1) = colnames(PatientSensitivity)[Treatment1]
Treatment2 = max.col(-M2)[th]
names(Treatment2) = colnames(PatientSensitivity)[Treatment2]
# Return a list with the relevant split information
return(list(
sumic50 = sumic50,
Treatment1 = Treatment1,
Treatment2 = Treatment2,
split = th,
expressionSplit = PatientData[I[th], gene] - 1e-6
)) # Adjust expression value slightly
}
#' @rdname InternalFunctions
#' @title Calculate Sum IC50 Value for Gene Patient Response
#' @description This internal function calculates the summed IC50 value based on the response of patients to a specific gene.
#' @param genePatientResponse A numeric vector representing the response of patients to a particular gene.
#' @param PatientSensitivity A matrix representing drug response values (e.g., IC50), where rows correspond to patients/samples and columns correspond to drugs.
#' @return A numeric value representing the minimum summed IC50 value derived from the patient responses.
#'
#' @keywords internal
getsumic50v3 <- function(genePatientResponse, PatientSensitivity) {
# Order the patient responses based on the gene response
I <- order(genePatientResponse) # Ascending order
Id <- I[length(I):1] # Descending order
# Calculate cumulative sums for both orderings
M1 <- colCumsums(PatientSensitivity, rows = I)
M2 <- colCumsums(PatientSensitivity, rows = Id)
# Combine row minimums from both cumulative sums to find the summed IC50
dummy <- c(0, rowMins(M1)) + c(rowMins(M2, rows = (nrow(M2):1)), 0)
sumic50 <- min(dummy) # Find the minimum summed IC50 value
return(sumic50) # Return the minimum summed IC50 value
}
#' @rdname InternalFunctions
#' @title Find Optimal Split for Expression Data
#' @description This internal function identifies the optimal split point in expression data based on patient sensitivity (IC50).
#' @param PatientSensitivity A matrix representing drug response values (e.g., IC50), where rows correspond to patients/samples and columns correspond to drugs.
#' @param PatientData A matrix representing patient features, where rows correspond to patients/samples and columns correspond to genes/features.
#' @param minimum An integer specifying the minimum number of samples required for a valid split (default is 1).
#' @param weights A numeric vector indicating which samples are under study; defaults to NULL, meaning all samples are considered.
#' @param verbose A logical value indicating whether to print additional information during execution (default is FALSE).
#' @return A `partysplit` object representing the optimal split based on the specified patient sensitivity and expression data.
#'
#' @keywords internal
findsplitExp <- function(PatientSensitivity, PatientData, minimum = 1, weights = NULL, verbose = FALSE) {
# PatientSensitivity: IC50
# PatientData: gene expression data
if (verbose)
print("Split!")
# Apply weights if provided
if (!is.null(weights)) {
PatientSensitivity <- PatientSensitivity[weights == 1, ]
PatientData <- PatientData[weights == 1, ]
}
# Check if PatientSensitivity is a matrix and has sufficient rows
if (is.matrix(PatientSensitivity) == TRUE) {
if (nrow(PatientSensitivity) <= minimum)
return(NULL)
} else {
return(NULL) # If not a matrix, return NULL
}
# Find the gene that minimizes the IC50 values
Gene <- which.min(apply(PatientData, 2, getsumic50v3, PatientSensitivity)) # Find optimal split
names(Gene) <- colnames(PatientData[, Gene]) # Assign gene name to the optimal gene index
# Get the split information
Output <- getSplit(Gene, PatientData, PatientSensitivity)
# Return a party split object with the relevant information
return(
partysplit(
varid = as.integer(Gene),
breaks = Output$expressionSplit,
index = c(1L, 2L),
info = list(treatments = c(names(Output$Treatment1), names(Output$Treatment2)))
)
)
}
#' @rdname InternalFunctions
#' @title Grow Decision Tree for Expression Data
#' @description This internal function recursively builds a decision tree based on expression data and patient sensitivity (IC50).
#' @param id A unique identifier for the node in the decision tree (default is 1L).
#' @param PatientSensitivity A matrix representing drug response values (e.g., IC50), where rows correspond to patients/samples and columns correspond to drugs.
#' @param PatientData A matrix representing patient features, where rows correspond to patients/samples and columns correspond to genes/features.
#' @param minbucket An integer specifying the minimum number of samples required in a child node for further splitting (default is 10).
#' @param weights A numeric vector indicating which samples are under study; defaults to NULL, meaning all samples are considered.
#' @return A `partynode` object representing the current node and its children in the decision tree.
#'
#' @keywords internal
#'
growtreeExp <- function(id = 1L,
PatientSensitivity,
PatientData,
minbucket = 10,
weights = NULL) {
# Initialize weights if not provided
if (is.null(weights))
weights <- rep(1L, nrow(PatientData))
# Check if there are enough observations; if not, stop here and return a leaf node
if (sum(weights) < minbucket)
return(partynode(id = id, info = names(
getTreatment(PatientSensitivity, weights)
)))
# Find the best split using the findsplitExp function
sp <- findsplitExp(PatientSensitivity, PatientData, minimum = minbucket, weights)
# Convert PatientData to a data frame for easier handling
datadf <- as.data.frame(PatientData)
# No split found, stop here and return a leaf node
if (is.null(sp)) {
return(partynode(id = id, info = names(
getTreatment(PatientSensitivity, weights)
)))
}
# Split the data based on the identified split
kidids <- kidids_split(sp, data = datadf)
# Check if all observations belong to the same child; if so, return a leaf node
if (length(unique(kidids[weights == 1])) == 1) {
return(partynode(id = id, info = names(
getTreatment(PatientSensitivity, weights)
)))
}
# Check if the minimum number of samples in any child node is less than minbucket; if so, stop here
if (min(table(kidids[weights == 1])) < minbucket) {
return(partynode(id = id, info = names(
getTreatment(PatientSensitivity, weights)
)))
}
# Set up all daughter nodes
kids <- vector(mode = "list", length = max(kidids, na.rm = TRUE))
for (kidid in 1:length(kids)) {
# Select observations for the current node
w <- weights
w[kidids != kidid] <- 0
# Get the next node ID
if (kidid > 1) {
myid <- max(nodeids(kids[[kidid - 1]]))
} else {
myid <- id
}
# Start recursion on this daughter node
kids[[kidid]] <- growtreeExp(
id = as.integer(myid + 1),
PatientSensitivity,
PatientData,
weights = w,
minbucket = minbucket
)
}
# Return a party node object representing the current node and its children
return(partynode(
id = as.integer(id),
split = sp,
kids = kids,
info = ""
))
}
#' @rdname InternalFunctions
#' @title Find Optimal Split in Mutation Data
#' @description This internal function identifies the optimal split point in a mutation matrix based on
#' patient sensitivity data (IC50) and the presence of specific mutations.
#' @param PatientSensitivity A matrix representing drug response values (e.g., IC50), where rows correspond to patients/samples and columns correspond to drugs.
#' @param X (PatientData) A matrix representing patient features, where rows correspond to patients/samples and columns correspond to genes/features.
#' @param minimum An integer specifying the minimum number of samples required for a valid split (default is 1).
#' @param weights A numeric vector indicating which samples are included in the analysis; only samples with weights equal to 1 are considered.
#' @return A `partysplit` object if a valid split is found, or `NULL` if no valid split can be determined.
#'
#' @keywords internal
#'
findsplitMut <- function(PatientSensitivity, X, minimum = 1, weights) {
# Filter PatientSensitivity and mutation matrix X by weights
PatientSensitivity <- PatientSensitivity[weights == 1, ]
X <- X[weights == 1, ]
# Check if PatientSensitivity is a matrix and has enough rows
if (is.matrix(PatientSensitivity) == TRUE) {
if (nrow(PatientSensitivity) <= minimum)
return(NULL) # Not enough samples for split
} else {
return(NULL) # Return NULL if PatientSensitivity is not a matrix
}
# Identify the maximum value in the mutation matrix (mutation) and minimum value (wildtype)
mut <- max(X) # Mutation is the largest value of X
WT <- min(X) # Wildtype is the smallest value of X
# Calculate treatment outcomes based on mutation presence
tA <- t(PatientSensitivity) %*% (X == mut) # Treatment with mutation
tB <- t(PatientSensitivity) %*% (X == WT) # Treatment for wildtype
# tA Sum of the IC50 of the mutated samples treated with each treatment
# tB Sum of the IC50 of the wildtype samples treated with each treatment
# The biomarker mutation
optimal <- colMins(tA) + colMins(tB) # Optimal selection of treatments for each mutation
biomk <- which.min(optimal) # Identify the optimal mutation
names(biomk) <- colnames(tA)[biomk] # Assign name to the optimal mutation
# If both mutated and WT samples return the same treatment, don't split
if (length(unique(X[, biomk])) == 1)
return(NULL)
# Ensure there are patients with and without the mutation to justify a split
# Identify the treatments for the mutated and wildtype samples
Treatment1Mut <- which.min(tA[, biomk]) # Treatment for mutated samples
Treatment1WT <- which.min(tB[, biomk]) # Treatment for wildtype samples
# Check if the same treatment is suggested for both groups; if so, don't split
if (Treatment1Mut == Treatment1WT)
return(NULL) # Ensure that the treatments differ
# Create an index for output with names indicating mutation and wildtype
index = c(1L, 2L)
names(index) <- c(paste0(names(biomk), "mut"), paste0(names(biomk), "WT"))
# Return a party split object with the variable ID, index, and treatment information
return(partysplit(
varid = as.integer(biomk),
index = index,
info = list(treatments = c(names(Treatment1Mut), names(Treatment1WT)))
))
}
#' @rdname InternalFunctions
#' @title Grow Decision Tree for Mutation Data
#' @description This internal function recursively builds a decision tree based on patient data and
#' drug sensitivity responses, specifically designed for mutation data.
#' @param id A unique identifier for the node in the decision tree (default is 1L).
#' @param PatientSensitivity A matrix representing drug response values (e.g., IC50), where rows correspond to patients/samples and columns correspond to drugs.
#' @param PatientData A matrix representing patient features, where rows correspond to patients/samples and columns correspond to genes/features.
#' @param minbucket An integer specifying the minimum number of samples required in a child node for further splitting (default is 10).
#' @param weights A numeric vector indicating which samples are under study; defaults to NULL, meaning all samples are considered.
#' @param findsplit A function used to find the best split; defaults to `findsplitMut`.
#' @return A `partynode` object representing the current node and its children in the decision tree.
#'
#' @keywords internal
#'
growtreeMut <- function(id = 1L,
PatientSensitivity,
PatientData,
minbucket = 10,
weights = NULL,
findsplit = findsplitMut) {
# nolint: line_length_linter.
# Set default weights if none are provided
if (is.null(weights)) {
weights <- rep(1L, nrow(PatientData)) # Each observation gets equal weight
}
# Check if the total weight of observations is less than "minbucket"
# If so, stop recursion and return a leaf node with treatment info
if (sum(weights) < minbucket) {
return(partynode(id = id, info = names(
getTreatment(PatientSensitivity, weights)
))) # nolint: line_length_linter.
}
# Find the best split using the provided function
sp <- findsplitMut(PatientSensitivity, PatientData, minimum = minbucket, weights)
# If no valid split is found, stop here and return a node with the current treatment info
if (is.null(sp)) {
return(partynode(id = id, info = names(
getTreatment(PatientSensitivity, weights)
)))
}
# Convert PatientData to a data frame for processing
datadf <- as.data.frame(PatientData)
# Get the child node identifiers based on the split
kidids <- kidids_split(sp, data = datadf)
# If there's only one child node type, return a leaf node with treatment info
if (length(unique(kidids)) == 1) {
return(partynode(id = id, info = names(
getTreatment(PatientSensitivity, weights)
)))
}
# If any of the splits result in fewer observations than minbucket, return the current node
if (min(table(kidids[weights == 1, drop = FALSE])) < minbucket) {
return(partynode(id = id, info = names(
getTreatment(PatientSensitivity, weights)
)))
}
# Initialize a list to hold the child nodes
kids <- vector(mode = "list", length = max(kidids, na.rm = TRUE)) # na.rm=TRUE removes any missing values
# Iterate through each child node
for (kidid in 1:length(kids)) {
# Select observations for the current child node
w <- weights
w[kidids != kidid] <- 0 # Set weights to 0 for observations not selected in this split
# Determine the next node ID
if (kidid > 1) {
myid <- max(nodeids(kids[[kidid - 1]])) # Get the last used ID from the previous child
} else {
myid <- id # For the first child, use the current node ID
}
# Recursively call growtreeMut to create the child node
kids[[kidid]] <- growtreeMut(
id = as.integer(myid + 1),
PatientSensitivity,
PatientData,
weights = w,
minbucket = minbucket,
findsplit = findsplitMut
)
}
# Return the current node with its children
return(partynode(
id = as.integer(id),
split = sp,
kids = kids,
info = ""
))
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.