#' Run backpropagation for a single layer. In backpropagation, a gradient is
#' is computed by taking partial derivatives for each of the model weights. Given
#' a learning rate, the weights are adjusted according to the gradient.
#' @param modelResults An object of the ModelResults class.
#' @param prunedModels The models that remain after pruning.
#' @param Y.pred The predicted phenotype value.
#' @param minCutoff Mininum cutoff for the prediction.
#' @param maxCutoff Maximum cutoff for the prediction.
#' @param useCutoff Whether or not to use the cutoff for prediction. Default is FALSE.
#' @param weights The current value of the weights
#' @param averaging If TRUE, then averaging is used to combine predictors rather than
#' retaining the same functional form for both the input and the output.
Backpropagate <- function(modelResults, prunedModels, Y.pred, minCutoff, maxCutoff, useCutoff = FALSE,
weights, averaging = FALSE){
# Calculate gradient.
gradient <- NULL
if(averaging == TRUE){
gradient <- computeGradientForAveraging(modelResults = modelResults, prunedModels = prunedModels,
minCutoff = minCutoff,
maxCutoff = maxCutoff,
useCutoff = useCutoff,
weights = weights)
}else{
gradient <- computeGradient(modelResults = modelResults, prunedModels = prunedModels,
minCutoff = minCutoff,
maxCutoff = maxCutoff,
useCutoff = useCutoff,
weights = weights)
}
# Set previous weights.
prevWeights <- modelResults@current.metaFeature.weights
# Update gradient.
modelResults@current.gradient <- as.matrix(gradient)
modelResults@iteration.tracking[modelResults@current.iteration+1,
which(grepl("Gradient",
colnames(modelResults@iteration.tracking)))] <- gradient
# Set current weights, previous weights, and gradient.
# Reference for the optimization algorithms: https://arxiv.org/abs/1609.04747
if(modelResults@optimization.type == "SGD"){
# Stochastic Gradient Descent
modelResults@current.metaFeature.weights <- prevWeights -
modelResults@learning.rate * modelResults@current.gradient
# Set the new weights and gradient.
}else if(modelResults@optimization.type == "momentum"){
# Momentum
gamma <- 0.9
update.vec <- gamma * modelResults@previous.update.vector +
modelResults@learning.rate * modelResults@current.gradient
if(modelResults@current.iteration == 1){
update.vec <- modelResults@learning.rate * modelResults@current.gradient
}
modelResults@current.metaFeature.weights <- prevWeights - update.vec
modelResults@previous.update.vector <- update.vec
}else if(modelResults@optimization.type == "adagrad"){
# Adagrad
G <- modelResults@sum.square.gradients
epsilon <- 0.00000001
update.vec <- (modelResults@learning.rate / sqrt(G+epsilon)) * modelResults@current.gradient
if(modelResults@current.iteration == 1){
update.vec <- modelResults@learning.rate * modelResults@current.gradient
}
modelResults@current.metaFeature.weights <- prevWeights - update.vec
modelResults@sum.square.gradients <- modelResults@sum.square.gradients +
(modelResults@current.gradient^2)
}else if(modelResults@optimization.type == "adam"){
# ADAM
beta1 <- 0.9
beta2 <- 0.999
epsilon <- 0.00000001
m <- beta1 * modelResults@previous.momentum +
(1-beta1) * modelResults@current.gradient
v <- beta2 * modelResults@previous.update.vector +
(1-beta2) * (modelResults@current.gradient^2)
m.hat <- m / (1 - (beta1)^modelResults@current.iteration)
v.hat <- v / (1 - (beta2)^modelResults@current.iteration)
update.vec <- (modelResults@learning.rate * m.hat) / (sqrt(v.hat)+epsilon)
modelResults@current.metaFeature.weights <- prevWeights - update.vec
modelResults@previous.momentum <- m
modelResults@previous.update.vector <- v
}
# Update the weights for this iteration.
modelResults@iteration.tracking[modelResults@current.iteration+1,
which(grepl("Weight",
colnames(modelResults@iteration.tracking)))] <- modelResults@current.metaFeature.weights
# Return.
return(modelResults)
}
#' Compute the gradient for a single layer neural network.
#' @param modelResults An object of the ModelResults class.
#' @param prunedModels The models that remain after pruning.
#' @param minCutoff Mininum cutoff for the prediction.
#' @param maxCutoff Maximum cutoff for the prediction.
#' @param useCutoff Whether or not to use the cutoff for prediction. Default is FALSE.
#' @param weights Current weights for the model
computeGradient <- function(modelResults, prunedModels, minCutoff, maxCutoff, useCutoff = FALSE,
weights = weights){
# Extract analyte types
S_start <- unlist(prunedModels@sources)
S_end <- unlist(prunedModels@targets)
S <- unlist(prunedModels@pairs)
analyteTypeOut <- modelResults@model.input@input.data@analyteType2
analyteTypeIn <- modelResults@model.input@input.data@analyteType1
if(modelResults@model.input@independent.var.type == 2){
analyteTypeIn <- modelResults@model.input@input.data@analyteType2
}
if(modelResults@model.input@outcome == 1){
analyteTypeOut <- modelResults@model.input@input.data@analyteType1
}
Aind <- data.frame(analyteTypeIn)
Aout <- data.frame(analyteTypeOut)
if(!is.null(ncol(analyteTypeIn))){
Aind <- data.frame(analyteTypeIn[S_start,])
Aout <- data.frame(analyteTypeOut[S_end,])
}
if(nrow(Aind) == 1){
Aind <- t(Aind)
Aout <- t(Aout)
}
# Extract covariates
C <- NA
if(length(modelResults@model.input@covariates) > 0 && modelResults@model.input@covariates[1] != ""){
C <- modelResults@model.input@input.data@sampleMetaData[,modelResults@model.input@covariates]
}
# Extract other components for gradient.
P_hat <- CompositePrediction(pairs = S, modelResults = modelResults,
minCutoff = minCutoff,
maxCutoff = maxCutoff,
useCutoff = useCutoff,
weights = weights,
sources = S_start,
targets = S_end)
P <- modelResults@model.input@true.phenotypes
Beta <- modelResults@model.input@model.properties[S,]
subsetMeta <- lapply(modelResults@model.input@metaFeatures, function(metafeature){
return(as.data.frame(metafeature[,S]))
})
M <- t(as.data.frame(do.call(cbind, subsetMeta)))
rownames(M) <- names(modelResults@model.input@metaFeatures)
phi <- modelResults@current.metaFeature.weights
# Compute the gradient for each sample.
sampGradients <- lapply(1:length(P), function(k){
# Compute the derivative of P-hat.
PhatDeriv <- 2 * (P[k] - P_hat[k])
# Extract the independent, outcome, and covariate values.
AindLocal <- data.frame(Aind[,k])
if(length(AindLocal) == ncol(Aind) && ncol(Aind) > 1){
AindLocal <- t(AindLocal)
colnames(AindLocal) <- colnames(Aind)
}
AoutLocal <- data.frame(Aout[,k])
if(length(AoutLocal) == ncol(Aout) && ncol(Aout) > 1){
AoutLocal <- t(AoutLocal)
colnames(AoutLocal) <- colnames(Aout)
}
CovarLocal <- NA
if(length(modelResults@model.input@covariates) > 0 && modelResults@model.input@covariates[1] != ""){
CovarLocal <- data.frame(C[k,])
colnames(CovarLocal) <- colnames(C)
}
MLocal <- data.frame(M[,k])
if(length(MLocal) == ncol(M) && ncol(M) > 1){
MLocal <- t(MLocal)
colnames(MLocal) <- colnames(M)
}
# Compute the denominator.
dhat <- Dhat(Aind = as.matrix(AindLocal),
phi = phi,
M = MLocal,
Beta2 = Beta[,"type"],
Beta3 = Beta[,"a:type"])
# Compute the numerator.
nhat <- Nhat(Aind = as.matrix(AindLocal),
Aout = as.matrix(AoutLocal),
C = CovarLocal,
phi = phi,
M = MLocal,
Beta0 = Beta[,"(Intercept)"],
Beta1 = Beta[,"a"],
BetaC = Beta[,unlist(modelResults@model.input@covariates)])
# Compute the derivative for each value of phi.
phiGradients <- lapply(1:length(phi), function(gamma){
# Extract the relevant component of M.
Mgamma <- MLocal[gamma,]
# Compute the derivative of P-hat with respect to the denominator.
dhatprime <- DhatPrime(Aind = as.matrix(AindLocal),
M = Mgamma,
Beta2 = Beta[,"type"],
Beta3 = Beta[,"a:type"])
# Compute the derivative of P-hat with respect to the numerator.
nhatprime <- NhatPrime(Aind = as.matrix(AindLocal),
Aout = as.matrix(AoutLocal),
C = CovarLocal,
M = Mgamma,
Beta0 = Beta[,"(Intercept)"],
Beta1 = Beta[,"a"],
BetaC = Beta[,unlist(modelResults@model.input@covariates)])
return((dhat * nhatprime - nhat * dhatprime) / (dhat ^ 2))
})
# Obtain the subgraph derivative.
subgraphDerivDF <- as.data.frame(phiGradients)
colnames(subgraphDerivDF) <- names(modelResults@model.input@metaFeatures)
return(PhatDeriv * subgraphDerivDF)
})
sampGradientsDF <- do.call(rbind, sampGradients)
gradients <- colSums(sampGradientsDF)
# Return the derivative.
return(-1 * gradients)
}
#' Compute the gradient for a single layer neural network.
#' @param modelResults An object of the ModelResults class.
#' @param prunedModels The models that remain after pruning.
#' @param minCutoff Mininum cutoff for the prediction.
#' @param maxCutoff Maximum cutoff for the prediction.
#' @param useCutoff Whether or not to use the cutoff for prediction. Default is FALSE.
#' @param weights Current weights for the model
computeGradientForAveraging <- function(modelResults, prunedModels, minCutoff, maxCutoff, useCutoff = FALSE,
weights = weights){
# Extract analyte types
S_start <- unlist(prunedModels@sources)
S_end <- unlist(prunedModels@targets)
S <- unlist(prunedModels@pairs)
analyteTypeOut <- modelResults@model.input@input.data@analyteType2
analyteTypeIn <- modelResults@model.input@input.data@analyteType1
if(modelResults@model.input@independent.var.type == 2){
analyteTypeIn <- modelResults@model.input@input.data@analyteType2
}
if(modelResults@model.input@outcome == 1){
analyteTypeOut <- modelResults@model.input@input.data@analyteType1
}
Aind <- data.frame(analyteTypeIn)
Aout <- data.frame(analyteTypeOut)
if(!is.null(ncol(analyteTypeIn))){
Aind <- data.frame(analyteTypeIn[S_start,])
Aout <- data.frame(analyteTypeOut[S_end,])
}
# Extract covariates
C <- NA
if(length(modelResults@model.input@covariates) > 0 && modelResults@model.input@covariates[1] != ""){
C <- modelResults@model.input@input.data@sampleMetaData[,modelResults@model.input@covariates]
}
# Extract other components for gradient.
P_hat <- CompositePrediction(pairs = S, modelResults = modelResults,
minCutoff = minCutoff,
maxCutoff = maxCutoff,
useCutoff = useCutoff,
weights = weights,
sources = S_start,
targets = S_end,
averaging = TRUE)
P <- modelResults@model.input@true.phenotypes
Beta <- modelResults@model.input@model.properties[S,]
M <- as.data.frame(do.call(rbind, modelResults@model.input@metaFeatures)[,S])
rownames(M) <- names(modelResults@model.input@metaFeatures)
phi <- modelResults@current.metaFeature.weights
# Compute the gradient for each sample.
sampGradients <- lapply(1:length(P), function(k){
# Compute the derivative of P-hat.
PhatDeriv <- 2 * (P[k] - P_hat[k])
# Extract the independent, outcome, and covariate values.
AindLocal <- data.frame(Aind[,k])
if(length(AindLocal) == ncol(Aind) && ncol(Aind) > 1){
AindLocal <- t(AindLocal)
colnames(AindLocal) <- colnames(Aind)
}
AoutLocal <- data.frame(Aout[,k])
if(length(AoutLocal) == ncol(Aout) && ncol(Aout) > 1){
AoutLocal <- t(AoutLocal)
colnames(AoutLocal) <- colnames(Aout)
}
CovarLocal <- NA
if(length(modelResults@model.input@covariates) > 0 && modelResults@model.input@covariates[1] != ""){
CovarLocal <- data.frame(C[k,])
colnames(CovarLocal) <- colnames(C)
}
AoutLocal <- data.frame(Aout[,k])
if(length(AoutLocal) == ncol(Aout) && ncol(Aout) > 1){
AoutLocal <- t(AoutLocal)
colnames(AoutLocal) <- colnames(Aout)
}
# Compute the denominator.
term1 <- as.matrix(Beta[,"type"])
term2 <- AindLocal * as.matrix(Beta[,"a:type"])
dhat <- term1 + term2
# Compute the numerator.
C = CovarLocal
term2 <- as.matrix(Beta[,"(Intercept)"])
term3 <- AindLocal * as.matrix(Beta[,"a"])
term4 <- 0
if(length(C) > 0 && !is.na(C)){
for(covariate in colnames(C)){
BetaC = as.matrix(Beta[,covariate])
covarMat <- matrix(rep(unlist(C[covariate]), nrow(BetaC)), nrow = nrow(BetaC))
term4 <- term4 + BetaC * covarMat
}
}
nhat <- term1 - (term2 + term3 + term4)
# Compute the final value.
predVec <- do.call(cbind, lapply(1:nrow(M), function(i){
return(as.data.frame(nhat / dhat))
}))
PijDeriv <- colSums(-1 * t(M) * predVec)
# Return
return(PijDeriv * PhatDeriv)
})
sampGradientsDF <- do.call(rbind, sampGradients)
gradients <- colSums(sampGradientsDF)
# Return the derivative.
return(-1 * gradients)
}
#' Compute the D-hat value, which is the denominator value of the composite prediction
#' for a sample k and a subgraph lambda.
#' @param Aind The matrix containing the values of the independent analyte type.
#' @param phi The weights of the meta-features.
#' @param M The meta-features.
#' @param Beta2 The phenotype term coefficients.
#' @param Beta3 The interaction term coefficients.
Dhat <- function(Aind, phi, M, Beta2, Beta3){
phiMat <- t(matrix(rep(phi, nrow(Aind)), nrow = nrow(Aind)))
sumWeights <- colSums(phiMat * M)
term1 <- sum(Beta2 * sumWeights)
term2 <- sum(Aind * Beta3 * sumWeights)
return(term1 + term2)
}
#' Compute the D-hat value, which is the derivative of the denominator value of the composite prediction
#' for a sample k, a subgraph lambda, and a meta-feature metric gamma.
#' @param Aind The matrix containing the values of the independent analyte type.
#' @param M The meta-features.
#' @param Beta2 The phenotype term coefficients.
#' @param Beta3 The interaction term coefficients.
DhatPrime <- function(Aind, M, Beta2, Beta3){
term1 <- sum(Beta2 * M)
term2 <- sum(Aind * Beta3 * M)
return(term1 + term2)
}
#' Compute the N-hat value, which is the numerator value of the composite prediction
#' for a sample k and a subgraph lambda.
#' @param Aind The matrix containing the values of the independent analyte type.
#' @param Aout The matrix containing the values of the outcome analyte type.
#' @param phi The weights of the meta-features.
#' @param M The meta-features.
#' @param Beta0 The intercept coefficients.
#' @param Beta1 The analyte coefficients.
#' @param BetaC The covariate coefficients.
#' @param C The covariates for the predictors of interest. If no covariates,
#' this value should be NA.
Nhat <- function(Aind, Aout, phi, M, Beta0, Beta1, BetaC, C){
phiMat <- t(matrix(rep(phi, nrow(Aind)), nrow = nrow(Aind)))
sumWeights <- colSums(phiMat * M)
term1 <- sum(Aout * sumWeights)
term2 <- sum(Beta0 * sumWeights)
term3 <- sum(Aind * Beta1 * sumWeights)
# Find term 4 if applicable.
term4 <- 0
if(length(C) > 0){
for(covariate in colnames(C)){
covarMat <- matrix(rep(unlist(C), nrow(BetaC)), nrow = nrow(BetaC))
sumWeightsMat <- matrix(rep(unlist(sumWeights), ncol(covarMat)), ncol = ncol(covarMat))
term4 <- term4 + sum(BetaC * covarMat * sumWeightsMat)
}
}
return(term1 - (term2 + term3 + term4))
}
#' Compute the N-hat value, which is the derivative of the numerator value of the composite prediction
#' for a sample k, a subgraph lambda, and a meta-feature metric gamma.
#' @param Aind The matrix containing the values of the independent analyte type.
#' @param Aout The matrix containing the values of the outcome analyte type.
#' @param M The meta-feature values.
#' @param Beta0 The intercept coefficients.
#' @param Beta1 The analyte coefficients.
#' @param BetaC The covariate coefficients.
#' @param C The covariates for the predictors of interest. If no covariates,
#' this value should be NA.
NhatPrime <- function(Aind, Aout, M, Beta0, Beta1, BetaC, C){
term1 <- sum(Aout * M)
term2 <- sum(Beta0 * M)
term3 <- sum(Aind * Beta1 * M)
# Find term 4 if applicable.
term4 <- 0
if(length(C) > 0){
for(covariate in colnames(C)){
covarMat <- matrix(rep(unlist(C), nrow(BetaC)), nrow = nrow(BetaC))
sumWeightsMat <- matrix(rep(unlist(M), ncol(covarMat)), ncol = ncol(covarMat))
term4 <- term4 + sum(BetaC * covarMat * sumWeightsMat)
}
}
return(term1 - (term2 + term3 + term4))
}
#' Compute the derivative of the error by the predictor.
#' @param modelResults The results of training (so far).
#' @param pred The predicted value of a single sample for a single parameter
#' of interest.
DerivativeOfActivation <- function(modelResults, pred){
# Take the derivative of the sigmoid function.
sigmoid <- 1 / (1 + exp(-1 * pred))
d.act.d.pred <- sigmoid * (1 - sigmoid)
# Return
return(d.act.d.pred)
}
#' Predict Y given current weights.
#' @param modelResults An object of the ModelResults class.
#' @param prunedModels The models that remain after pruning. There should only
#' be one model at the end.
#' @param minCutoff Mininum cutoff for the prediction.
#' @param maxCutoff Maximum cutoff for the prediction.
#' @param useCutoff Whether or not to use the cutoff for prediction. Default is FALSE.
#' @param weights Current metafeature weights.
#' @param averaging If TRUE, then averaging is used to combine predictors rather than
#' retaining the same functional form for both the input and the output.
DoPrediction <- function(modelResults, prunedModels, minCutoff = minCutoff, maxCutoff = maxCutoff, useCutoff = FALSE,
weights, averaging = FALSE){
# Obtain the final prediction.
flatPairs <- unlist(prunedModels@pairs)
flatTargets <- unlist(prunedModels@targets)
flatSources <- unlist(prunedModels@sources)
predictions <- CompositePrediction(pairs = flatPairs, modelResults = modelResults,
minCutoff = minCutoff,
maxCutoff = maxCutoff,
useCutoff = useCutoff,
weights = weights,
targets = flatTargets,
sources = flatSources,
averaging = averaging)
Y.pred <- as.data.frame(predictions)
colnames(Y.pred) <- 1
rownames(Y.pred) <- names(predictions)
# Return the prediction.
return(Y.pred)
}
#' Calculate minimum and maximum cutoffs for prediction.
#' @param modelResults An object of the ModelResults class.
CalculatePredictionCutoffs <- function(modelResults){
trueVals <- as.matrix(modelResults@model.input@input.data@sampleMetaData[,modelResults@model.input@stype])
if(!is.numeric(trueVals)){
catNames <- sort(unique(trueVals))
trueValuesChar <- trueVals
trueVals <- matrix(rep(0, length(trueValuesChar)), ncol = ncol(trueValuesChar),
nrow = nrow(trueValuesChar))
trueVals[which(trueValuesChar == catNames[2])] <- 1
}
fudgeFactor <- 0.1
maxTrue <- max(trueVals)
minTrue <- min(trueVals)
marginMax <- fudgeFactor * abs(maxTrue)
marginMin <- fudgeFactor * abs(minTrue)
minCutoff <- minTrue - marginMin
maxCutoff <- maxTrue + marginMax
return(list(min = minCutoff, max = maxCutoff))
}
#' Train the graph learning model, using the specifications in the ModelResults
#' class and storing the results in the ModelResults class.
#' @param modelResults An object of the ModelResults class.
#' @param prunedModels The models that remain after pruning.
#' @param minCutoff Mininum cutoff for the prediction.
#' @param maxCutoff Maximum cutoff for the prediction.
#' @param useCutoff Whether or not to use the cutoff for prediction. Default is FALSE.
#' @param weights Current weights
#' @param averaging If TRUE, then averaging is used to combine predictors rather than
#' retaining the same functional form for both the input and the output.
DoSingleTrainingIteration <- function(modelResults, prunedModels, minCutoff, maxCutoff, useCutoff = FALSE,
weights, averaging = FALSE){
# Predict Y.
Y.pred <- DoPrediction(modelResults, prunedModels,
minCutoff = minCutoff,
maxCutoff = maxCutoff,
useCutoff = useCutoff,
weights = weights,
averaging = averaging)
# Backpropagate and calculate the error.
modelResults <- Backpropagate(modelResults = modelResults,
prunedModels = prunedModels,
Y.pred = Y.pred,
minCutoff = minCutoff,
maxCutoff = maxCutoff,
useCutoff = useCutoff,
weights = weights,
averaging = averaging)
# Modify the model results and return.
return(modelResults)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.