R/trainingfunctions.R

Defines functions DoSingleTrainingIteration CalculatePredictionCutoffs DoPrediction DerivativeOfActivation NhatPrime Nhat DhatPrime Dhat computeGradientForAveraging computeGradient Backpropagate

Documented in Backpropagate CalculatePredictionCutoffs computeGradient computeGradientForAveraging DerivativeOfActivation Dhat DhatPrime DoPrediction DoSingleTrainingIteration Nhat NhatPrime

#' Run backpropagation for a single layer. In backpropagation, a gradient is
#' is computed by taking partial derivatives for each of the model weights. Given
#' a learning rate, the weights are adjusted according to the gradient.
#' @param modelResults An object of the ModelResults class.
#' @param prunedModels The models that remain after pruning.
#' @param Y.pred The predicted phenotype value.
#' @param minCutoff Mininum cutoff for the prediction.
#' @param maxCutoff Maximum cutoff for the prediction.
#' @param useCutoff Whether or not to use the cutoff for prediction. Default is FALSE.
#' @param weights The current value of the weights
#' @param averaging If TRUE, then averaging is used to combine predictors rather than
#' retaining the same functional form for both the input and the output.
Backpropagate <- function(modelResults, prunedModels, Y.pred, minCutoff, maxCutoff, useCutoff = FALSE,
                          weights, averaging = FALSE){
  # Calculate gradient.
  gradient <- NULL
  if(averaging == TRUE){
    gradient <- computeGradientForAveraging(modelResults = modelResults, prunedModels = prunedModels,
                                minCutoff = minCutoff,
                                maxCutoff = maxCutoff,
                                useCutoff = useCutoff,
                                weights = weights)
  }else{
    gradient <- computeGradient(modelResults = modelResults, prunedModels = prunedModels,
                                minCutoff = minCutoff,
                                maxCutoff = maxCutoff,
                                useCutoff = useCutoff,
                                weights = weights)
  }

  # Set previous weights.
  prevWeights <- modelResults@current.metaFeature.weights
  
  # Update gradient.
  modelResults@current.gradient <- as.matrix(gradient)
  modelResults@iteration.tracking[modelResults@current.iteration+1,
                                  which(grepl("Gradient", 
                                              colnames(modelResults@iteration.tracking)))] <- gradient

  # Set current weights, previous weights, and gradient.
  # Reference for the optimization algorithms: https://arxiv.org/abs/1609.04747
  if(modelResults@optimization.type == "SGD"){
    
    # Stochastic Gradient Descent
    modelResults@current.metaFeature.weights <- prevWeights - 
      modelResults@learning.rate * modelResults@current.gradient
    
    # Set the new weights and gradient.
    
  }else if(modelResults@optimization.type == "momentum"){
    # Momentum
    gamma <- 0.9
    update.vec <- gamma * modelResults@previous.update.vector + 
      modelResults@learning.rate * modelResults@current.gradient
    if(modelResults@current.iteration == 1){
      update.vec <- modelResults@learning.rate * modelResults@current.gradient
    }
    modelResults@current.metaFeature.weights <- prevWeights - update.vec
    modelResults@previous.update.vector <- update.vec
  }else if(modelResults@optimization.type == "adagrad"){
    # Adagrad
    G <- modelResults@sum.square.gradients
    epsilon <- 0.00000001
    update.vec <- (modelResults@learning.rate / sqrt(G+epsilon)) * modelResults@current.gradient
    if(modelResults@current.iteration == 1){
      update.vec <- modelResults@learning.rate * modelResults@current.gradient
    }
    modelResults@current.metaFeature.weights <- prevWeights - update.vec
    modelResults@sum.square.gradients <- modelResults@sum.square.gradients +
      (modelResults@current.gradient^2)
  }else if(modelResults@optimization.type == "adam"){
    # ADAM
    beta1 <- 0.9
    beta2 <- 0.999
    epsilon <- 0.00000001
    m <- beta1 * modelResults@previous.momentum + 
      (1-beta1) * modelResults@current.gradient
    v <- beta2 * modelResults@previous.update.vector +
      (1-beta2) * (modelResults@current.gradient^2)
    m.hat <- m / (1 - (beta1)^modelResults@current.iteration)
    v.hat <- v / (1 - (beta2)^modelResults@current.iteration)
    update.vec <- (modelResults@learning.rate * m.hat) / (sqrt(v.hat)+epsilon)
    modelResults@current.metaFeature.weights <- prevWeights - update.vec
    modelResults@previous.momentum <- m
    modelResults@previous.update.vector <- v
  }

  # Update the weights for this iteration.
  modelResults@iteration.tracking[modelResults@current.iteration+1,
                                  which(grepl("Weight", 
                                              colnames(modelResults@iteration.tracking)))] <- modelResults@current.metaFeature.weights
  
  # Return.
  return(modelResults)
}

#' Compute the gradient for a single layer neural network.
#' @param modelResults An object of the ModelResults class.
#' @param prunedModels The models that remain after pruning.
#' @param minCutoff Mininum cutoff for the prediction.
#' @param maxCutoff Maximum cutoff for the prediction.
#' @param useCutoff Whether or not to use the cutoff for prediction. Default is FALSE.
#' @param weights Current weights for the model
computeGradient <- function(modelResults, prunedModels, minCutoff, maxCutoff, useCutoff = FALSE,
                            weights = weights){

  # Extract analyte types
  S_start <- unlist(prunedModels@sources)
  S_end <- unlist(prunedModels@targets)
  S <- unlist(prunedModels@pairs)
  analyteTypeOut <- modelResults@model.input@input.data@analyteType2
  analyteTypeIn <- modelResults@model.input@input.data@analyteType1
  if(modelResults@model.input@independent.var.type == 2){
    analyteTypeIn <- modelResults@model.input@input.data@analyteType2
  }
  if(modelResults@model.input@outcome == 1){
    analyteTypeOut <- modelResults@model.input@input.data@analyteType1
  }
  Aind <- data.frame(analyteTypeIn)
  Aout <- data.frame(analyteTypeOut)
  if(!is.null(ncol(analyteTypeIn))){
    Aind <- data.frame(analyteTypeIn[S_start,])
    Aout <- data.frame(analyteTypeOut[S_end,])
  }
  if(nrow(Aind) == 1){
    Aind <- t(Aind)
    Aout <- t(Aout)
  }

  # Extract covariates
  C <- NA
  if(length(modelResults@model.input@covariates) > 0 && modelResults@model.input@covariates[1] != ""){
    C <- modelResults@model.input@input.data@sampleMetaData[,modelResults@model.input@covariates]
  }

  # Extract other components for gradient.
  P_hat <- CompositePrediction(pairs = S, modelResults = modelResults,
                               minCutoff = minCutoff,
                               maxCutoff = maxCutoff,
                               useCutoff = useCutoff,
                               weights = weights,
                               sources = S_start,
                               targets = S_end)
  P <- modelResults@model.input@true.phenotypes
  Beta <- modelResults@model.input@model.properties[S,]
  subsetMeta <- lapply(modelResults@model.input@metaFeatures, function(metafeature){
     return(as.data.frame(metafeature[,S]))
  })
  M <- t(as.data.frame(do.call(cbind, subsetMeta)))
  
  rownames(M) <- names(modelResults@model.input@metaFeatures)
  phi <- modelResults@current.metaFeature.weights

  # Compute the gradient for each sample.
  sampGradients <- lapply(1:length(P), function(k){

    # Compute the derivative of P-hat.
    PhatDeriv <- 2 * (P[k] - P_hat[k])

    # Extract the independent, outcome, and covariate values.
    AindLocal <- data.frame(Aind[,k])
    if(length(AindLocal) == ncol(Aind) && ncol(Aind) > 1){
      AindLocal <- t(AindLocal)
      colnames(AindLocal) <- colnames(Aind)
    }
    AoutLocal <- data.frame(Aout[,k])
    if(length(AoutLocal) == ncol(Aout) && ncol(Aout) > 1){
      AoutLocal <- t(AoutLocal)
      colnames(AoutLocal) <- colnames(Aout)
    }
    CovarLocal <- NA
    if(length(modelResults@model.input@covariates) > 0 && modelResults@model.input@covariates[1] != ""){
      CovarLocal <- data.frame(C[k,])
      colnames(CovarLocal) <- colnames(C)
    }
    MLocal <- data.frame(M[,k])
    if(length(MLocal) == ncol(M) && ncol(M) > 1){
      MLocal <- t(MLocal)
      colnames(MLocal) <- colnames(M)
    }

    # Compute the denominator.
    dhat <- Dhat(Aind = as.matrix(AindLocal), 
                 phi = phi, 
                 M = MLocal, 
                 Beta2 = Beta[,"type"],
                 Beta3 = Beta[,"a:type"])

    # Compute the numerator.
    nhat <- Nhat(Aind = as.matrix(AindLocal),
                 Aout = as.matrix(AoutLocal),
                 C = CovarLocal,
                 phi = phi, 
                 M = MLocal, 
                 Beta0 = Beta[,"(Intercept)"],
                 Beta1 = Beta[,"a"],
                 BetaC = Beta[,unlist(modelResults@model.input@covariates)])

    # Compute the derivative for each value of phi.
    phiGradients <- lapply(1:length(phi), function(gamma){
      # Extract the relevant component of M.
      Mgamma <- MLocal[gamma,]

      # Compute the derivative of P-hat with respect to the denominator.
      dhatprime <- DhatPrime(Aind = as.matrix(AindLocal), 
                             M = Mgamma, 
                             Beta2 = Beta[,"type"],
                             Beta3 = Beta[,"a:type"])

      # Compute the derivative of P-hat with respect to the numerator.
      nhatprime <- NhatPrime(Aind = as.matrix(AindLocal),
                             Aout = as.matrix(AoutLocal),
                             C = CovarLocal,
                             M = Mgamma, 
                             Beta0 = Beta[,"(Intercept)"],
                             Beta1 = Beta[,"a"],
                             BetaC = Beta[,unlist(modelResults@model.input@covariates)])
      return((dhat * nhatprime - nhat * dhatprime) / (dhat ^ 2))
    })

    # Obtain the subgraph derivative.
    subgraphDerivDF <- as.data.frame(phiGradients)
    colnames(subgraphDerivDF) <- names(modelResults@model.input@metaFeatures)

    return(PhatDeriv * subgraphDerivDF)
  })
  sampGradientsDF <- do.call(rbind, sampGradients)
  gradients <- colSums(sampGradientsDF)

  # Return the derivative.
  return(-1 * gradients)
}

#' Compute the gradient for a single layer neural network.
#' @param modelResults An object of the ModelResults class.
#' @param prunedModels The models that remain after pruning.
#' @param minCutoff Mininum cutoff for the prediction.
#' @param maxCutoff Maximum cutoff for the prediction.
#' @param useCutoff Whether or not to use the cutoff for prediction. Default is FALSE.
#' @param weights Current weights for the model
computeGradientForAveraging <- function(modelResults, prunedModels, minCutoff, maxCutoff, useCutoff = FALSE,
                            weights = weights){
  
  # Extract analyte types
  S_start <- unlist(prunedModels@sources)
  S_end <- unlist(prunedModels@targets)
  S <- unlist(prunedModels@pairs)
  analyteTypeOut <- modelResults@model.input@input.data@analyteType2
  analyteTypeIn <- modelResults@model.input@input.data@analyteType1
  if(modelResults@model.input@independent.var.type == 2){
    analyteTypeIn <- modelResults@model.input@input.data@analyteType2
  }
  if(modelResults@model.input@outcome == 1){
    analyteTypeOut <- modelResults@model.input@input.data@analyteType1
  }
  Aind <- data.frame(analyteTypeIn)
  Aout <- data.frame(analyteTypeOut)
  if(!is.null(ncol(analyteTypeIn))){
    Aind <- data.frame(analyteTypeIn[S_start,])
    Aout <- data.frame(analyteTypeOut[S_end,])
  }
  
  # Extract covariates
  C <- NA
  if(length(modelResults@model.input@covariates) > 0 && modelResults@model.input@covariates[1] != ""){
    C <- modelResults@model.input@input.data@sampleMetaData[,modelResults@model.input@covariates]
  }
  
  # Extract other components for gradient.
  P_hat <- CompositePrediction(pairs = S, modelResults = modelResults,
                               minCutoff = minCutoff,
                               maxCutoff = maxCutoff,
                               useCutoff = useCutoff,
                               weights = weights,
                               sources = S_start,
                               targets = S_end,
                               averaging = TRUE)
  P <- modelResults@model.input@true.phenotypes
  Beta <- modelResults@model.input@model.properties[S,]
  M <- as.data.frame(do.call(rbind, modelResults@model.input@metaFeatures)[,S])
  rownames(M) <- names(modelResults@model.input@metaFeatures)
  phi <- modelResults@current.metaFeature.weights
  
  # Compute the gradient for each sample.
  sampGradients <- lapply(1:length(P), function(k){
    
    # Compute the derivative of P-hat.
    PhatDeriv <- 2 * (P[k] - P_hat[k])
    
    # Extract the independent, outcome, and covariate values.
    AindLocal <- data.frame(Aind[,k])
    if(length(AindLocal) == ncol(Aind) && ncol(Aind) > 1){
      AindLocal <- t(AindLocal)
      colnames(AindLocal) <- colnames(Aind)
    }
    AoutLocal <- data.frame(Aout[,k])
    if(length(AoutLocal) == ncol(Aout) && ncol(Aout) > 1){
      AoutLocal <- t(AoutLocal)
      colnames(AoutLocal) <- colnames(Aout)
    }
    CovarLocal <- NA
    if(length(modelResults@model.input@covariates) > 0 && modelResults@model.input@covariates[1] != ""){
      CovarLocal <- data.frame(C[k,])
      colnames(CovarLocal) <- colnames(C)
    }
    AoutLocal <- data.frame(Aout[,k])
    if(length(AoutLocal) == ncol(Aout) && ncol(Aout) > 1){
      AoutLocal <- t(AoutLocal)
      colnames(AoutLocal) <- colnames(Aout)
    }

    # Compute the denominator.
    term1 <- as.matrix(Beta[,"type"])
    term2 <- AindLocal * as.matrix(Beta[,"a:type"])
    dhat <- term1 + term2

    # Compute the numerator.
    C = CovarLocal
    term2 <- as.matrix(Beta[,"(Intercept)"])
    term3 <- AindLocal * as.matrix(Beta[,"a"])
    term4 <- 0
    if(length(C) > 0 && !is.na(C)){
      for(covariate in colnames(C)){
        BetaC = as.matrix(Beta[,covariate])
        covarMat <- matrix(rep(unlist(C[covariate]), nrow(BetaC)), nrow = nrow(BetaC))
        term4 <- term4 + BetaC * covarMat
      }
    }
    nhat <- term1 - (term2 + term3 + term4)
    
    # Compute the final value.
    predVec <- do.call(cbind, lapply(1:nrow(M), function(i){
       return(as.data.frame(nhat / dhat))
    }))
    PijDeriv <- colSums(-1 * t(M) * predVec)
    
    # Return
    return(PijDeriv * PhatDeriv)
  })
  sampGradientsDF <- do.call(rbind, sampGradients)
  gradients <- colSums(sampGradientsDF)

  # Return the derivative.
  return(-1 * gradients)
}

#' Compute the D-hat value, which is the denominator value of the composite prediction
#' for a sample k and a subgraph lambda.
#' @param Aind The matrix containing the values of the independent analyte type.
#' @param phi The weights of the meta-features.
#' @param M The meta-features.
#' @param Beta2 The phenotype term coefficients.
#' @param Beta3 The interaction term coefficients.
Dhat <- function(Aind, phi, M, Beta2, Beta3){
  phiMat <- t(matrix(rep(phi, nrow(Aind)), nrow = nrow(Aind)))
  sumWeights <- colSums(phiMat * M)
  term1 <- sum(Beta2 * sumWeights)
  term2 <- sum(Aind * Beta3 * sumWeights)
  return(term1 + term2)
}

#' Compute the D-hat value, which is the derivative of the denominator value of the composite prediction
#' for a sample k, a subgraph lambda, and a meta-feature metric gamma.
#' @param Aind The matrix containing the values of the independent analyte type.
#' @param M The meta-features.
#' @param Beta2 The phenotype term coefficients.
#' @param Beta3 The interaction term coefficients.
DhatPrime <- function(Aind, M, Beta2, Beta3){
  term1 <- sum(Beta2 * M)
  term2 <- sum(Aind * Beta3 * M)
  return(term1 + term2)
}

#' Compute the N-hat value, which is the numerator value of the composite prediction
#' for a sample k and a subgraph lambda.
#' @param Aind The matrix containing the values of the independent analyte type.
#' @param Aout The matrix containing the values of the outcome analyte type.
#' @param phi The weights of the meta-features.
#' @param M The meta-features.
#' @param Beta0 The intercept coefficients.
#' @param Beta1 The analyte coefficients.
#' @param BetaC The covariate coefficients.
#' @param C The covariates for the predictors of interest. If no covariates,
#' this value should be NA.
Nhat <- function(Aind, Aout, phi, M, Beta0, Beta1, BetaC, C){
  phiMat <- t(matrix(rep(phi, nrow(Aind)), nrow = nrow(Aind)))
  sumWeights <- colSums(phiMat * M)
  term1 <- sum(Aout * sumWeights)
  term2 <- sum(Beta0 * sumWeights)
  term3 <- sum(Aind * Beta1 * sumWeights)
  
  # Find term 4 if applicable.
  term4 <- 0
  if(length(C) > 0){
    for(covariate in colnames(C)){
      covarMat <- matrix(rep(unlist(C), nrow(BetaC)), nrow = nrow(BetaC))
      sumWeightsMat <- matrix(rep(unlist(sumWeights), ncol(covarMat)), ncol = ncol(covarMat))
      term4 <- term4 + sum(BetaC * covarMat * sumWeightsMat)
    }
  }
  return(term1 - (term2 + term3 + term4))
}

#' Compute the N-hat value, which is the derivative of the numerator value of the composite prediction
#' for a sample k, a subgraph lambda, and a meta-feature metric gamma.
#' @param Aind The matrix containing the values of the independent analyte type.
#' @param Aout The matrix containing the values of the outcome analyte type.
#' @param M The meta-feature values.
#' @param Beta0 The intercept coefficients.
#' @param Beta1 The analyte coefficients.
#' @param BetaC The covariate coefficients.
#' @param C The covariates for the predictors of interest. If no covariates,
#' this value should be NA.
NhatPrime <- function(Aind, Aout, M, Beta0, Beta1, BetaC, C){

  term1 <- sum(Aout * M)
  term2 <- sum(Beta0 * M)
  term3 <- sum(Aind * Beta1 * M)
  
  # Find term 4 if applicable.
  term4 <- 0
  if(length(C) > 0){
    for(covariate in colnames(C)){
      covarMat <- matrix(rep(unlist(C), nrow(BetaC)), nrow = nrow(BetaC))
      sumWeightsMat <- matrix(rep(unlist(M), ncol(covarMat)), ncol = ncol(covarMat))
      term4 <- term4 + sum(BetaC * covarMat * sumWeightsMat)
    }
  }
  
  return(term1 - (term2 + term3 + term4))
}

#' Compute the derivative of the error by the predictor.
#' @param modelResults The results of training (so far).
#' @param pred The predicted value of a single sample for a single parameter
#' of interest.
DerivativeOfActivation <- function(modelResults, pred){
  # Take the derivative of the sigmoid function.
  sigmoid <-  1 / (1 + exp(-1 * pred))
  d.act.d.pred <- sigmoid * (1 - sigmoid)
  
  # Return
  return(d.act.d.pred)
}

#' Predict Y given current weights.
#' @param modelResults An object of the ModelResults class.
#' @param prunedModels The models that remain after pruning. There should only
#' be one model at the end.
#' @param minCutoff Mininum cutoff for the prediction.
#' @param maxCutoff Maximum cutoff for the prediction.
#' @param useCutoff Whether or not to use the cutoff for prediction. Default is FALSE.
#' @param weights Current metafeature weights.
#' @param averaging If TRUE, then averaging is used to combine predictors rather than
#' retaining the same functional form for both the input and the output.
DoPrediction <- function(modelResults, prunedModels, minCutoff = minCutoff, maxCutoff = maxCutoff, useCutoff = FALSE,
                         weights, averaging = FALSE){
  
  # Obtain the final prediction.
  flatPairs <- unlist(prunedModels@pairs)
  flatTargets <- unlist(prunedModels@targets)
  flatSources <- unlist(prunedModels@sources)
  predictions <- CompositePrediction(pairs = flatPairs, modelResults = modelResults,
                                     minCutoff = minCutoff, 
                                     maxCutoff = maxCutoff, 
                                     useCutoff = useCutoff,
                                     weights = weights,
                                     targets = flatTargets,
                                     sources = flatSources,
                                     averaging = averaging)
  Y.pred <- as.data.frame(predictions)
  colnames(Y.pred) <- 1
  rownames(Y.pred) <- names(predictions)
  
  # Return the prediction.
  return(Y.pred)
}

#' Calculate minimum and maximum cutoffs for prediction.
#' @param modelResults An object of the ModelResults class.
CalculatePredictionCutoffs <- function(modelResults){
  trueVals <- as.matrix(modelResults@model.input@input.data@sampleMetaData[,modelResults@model.input@stype])
  if(!is.numeric(trueVals)){
    catNames <- sort(unique(trueVals))
    trueValuesChar <- trueVals
    trueVals <- matrix(rep(0, length(trueValuesChar)), ncol = ncol(trueValuesChar),
                       nrow = nrow(trueValuesChar))
    trueVals[which(trueValuesChar == catNames[2])] <- 1
  }
  fudgeFactor <- 0.1
  maxTrue <- max(trueVals)
  minTrue <- min(trueVals)
  marginMax <- fudgeFactor * abs(maxTrue)
  marginMin <- fudgeFactor * abs(minTrue)
  minCutoff <- minTrue - marginMin
  maxCutoff <- maxTrue + marginMax
  return(list(min = minCutoff, max = maxCutoff))
}

#' Train the graph learning model, using the specifications in the ModelResults
#' class and storing the results in the ModelResults class.
#' @param modelResults An object of the ModelResults class.
#' @param prunedModels The models that remain after pruning.
#' @param minCutoff Mininum cutoff for the prediction.
#' @param maxCutoff Maximum cutoff for the prediction.
#' @param useCutoff Whether or not to use the cutoff for prediction. Default is FALSE.
#' @param weights Current weights
#' @param averaging If TRUE, then averaging is used to combine predictors rather than
#' retaining the same functional form for both the input and the output.
DoSingleTrainingIteration <- function(modelResults, prunedModels, minCutoff, maxCutoff, useCutoff = FALSE,
                                      weights, averaging = FALSE){
  # Predict Y.
  Y.pred <- DoPrediction(modelResults, prunedModels,
                         minCutoff = minCutoff,
                         maxCutoff = maxCutoff,
                         useCutoff = useCutoff,
                         weights = weights,
                         averaging = averaging)

  # Backpropagate and calculate the error.
  modelResults <- Backpropagate(modelResults = modelResults,
                                prunedModels = prunedModels,
                                Y.pred = Y.pred,
                                minCutoff = minCutoff,
                                maxCutoff = maxCutoff,
                                useCutoff = useCutoff,
                                weights = weights,
                                averaging = averaging)

  # Modify the model results and return.
  return(modelResults)
}
ncats/MultiOmicsGraphPrediction documentation built on Aug. 23, 2023, 9:19 a.m.