MultiOmicsGraphPrediction: Network-Based Ensemble Prediction from IntLIM Predictors

Documented in CombineSubspaces CombineSubspacesTest ComputeAnalyteCoefMetafeature ComputeCosineSimilarity ComputeGlobalErrorMetafeature ComputeGlobalErrorMetafeatureTest ComputeInteractionCoefMetafeature ComputeLocalErrorMetafeature ComputeLocalErrorMetafeatureTest ComputePathwayMetafeature ComputePDFMetafeature ComputePvalMetafeature ComputeReactionMetafeature CosineSimilarityToTrainingOnSingleDataType CosineSimilarityToTrainingOnSubspace FindOptimalSubspaceClustering GetMetaFeatures GetPredictorIDs GetTestMetaFeatures

#' Computes the metafeatures for each sample and model.
#' @param predictions Prediction data frame, where rows are samples, and
#' columns are predictors.
#' @param metaFeatureList A list of the valid metrics to include. Valid metrics are
#' "pdf", "localerr", "globalerr", "pathway", "reaction", "interactionpval", 
#' "interactioncoef", and "analytecoef". Additionally, use "equality" to add equal
#' weight to all predictors.
#' @param k The number of nearest neighbors to consider in localerr.
#' @param inputData The input data read in using the function IntLIM function
#' ReadData.
#' @param eigStep The number of eigenvectors to step by during the evaluation
#' in localerr.
#' Note that this must be less than the number of samples in localerr. Default = 10.
#' @param alphaMin The lowest value of alpha to investigate in localerr. Default = 0.
#' @param alphaMax The highest value of alpha to investigate in localerr. Default = 1.
#' @param alphaStep The value of alpha to step by during the evaluation in localerr.
#' Default = 0.1.
#' @param modelStats A data frame that includes the interaction p-values and
#' interaction coefficients for each pair (such as the one output by IntLIM's
#' ProcessResults function)
#' @param stype Phenotype or outcome to use in models.
#' @param colIdInd The ID of the column that has the analyte ID's for the
#' independent variable. If blank, then the existing ID's are used.
#' @param colIdOut The ID of the column that has the analyte ID's for the
#' outcome variable. If blank, then the existing ID's are used.
#' @param modelsToConsider A list of models for which to calculate meta-features.
#' @return A list of data frames (one for each sample) with predictor importance
#' measured according to the listed criteria (one column per metric, one row
#' per predictor).
#' @export
GetMetaFeatures <- function(predictions, inputData,
                                       metaFeatureList = c("pdf", "localerr", "globalerr",
                                                      "pathway", "reaction",
                                                      "interactionpval", "interactioncoef",
                                                      "analytecoef", "equality"),
                                       modelStats = "",
                                       k = k,
                                       eigStep = 10, 
                                       alphaMin = 0,
                                       alphaMax = 1, 
                                       alphaStep = 0.1,
                                    stype = "",
                                    colIdInd = "",
                                    colIdOut = "",
                            modelsToConsider){
  
  # Compute metrics.
  metaFeatures <- list()
  if("pdf" %in% metaFeatureList){
    print("Computing PDF metafeatures")
    metaFeatures$pdf <- ComputePDFMetafeature(predictions = predictions[,modelsToConsider  ])
  }
  if("localerr" %in% metaFeatureList){
    print("Computing Local Error metafeatures")
    trueVals <- inputData@sampleMetaData[,stype]
    names(trueVals) <- rownames(inputData@sampleMetaData)
    metaFeatures$localerr <- ComputeLocalErrorMetafeature(predictions = predictions[,modelsToConsider], 
                                                    true = trueVals,
                                                    k = k,
                                                    inputData = inputData, 
                                                    eigStep = eigStep, 
                                                    alphaMin = alphaMin,
                                                    alphaMax = alphaMax, 
                                                    alphaStep = alphaStep)
  }
  if("globalerr" %in% metaFeatureList){  
    print("Computing Global Error metafeatures")
    metaFeatures$globalerr <- ComputeGlobalErrorMetafeature(predictions = predictions[,modelsToConsider], 
                                                      true = inputData@sampleMetaData[,stype])
  }
  if("pathway" %in% metaFeatureList){
    print("Computing Pathway metafeatures")
    metaFeatures$pathway <- ComputePathwayMetafeature(predictions = predictions[,modelsToConsider], 
                                                inputData = inputData,
                                                colIdInd = colIdInd,
                                                colIdOut = colIdOut)
  }
  if("reaction" %in% metaFeatureList){
    print("Computing Reaction metafeatures")
    metaFeatures$reaction <- ComputeReactionMetafeature(predictions = predictions[,modelsToConsider], 
                                                  inputData = inputData,
                                                  colIdInd = colIdInd,
                                                  colIdOut = colIdOut)
  }
  if("interactionpval" %in% metaFeatureList){
    print("Computing Interaction Term p-Value metafeatures")
    metaFeatures$interactionpval <- ComputePvalMetafeature(predictions = predictions[,modelsToConsider], 
                                                     modelStats = modelStats)
  }
  if("interactioncoef" %in% metaFeatureList){
    print("Computing Interaction Coefficient metafeatures")
    metaFeatures$interactioncoef <- ComputeInteractionCoefMetafeature(predictions = predictions[,modelsToConsider], 
                                                                modelStats = modelStats)
  }
  if("analytecoef" %in% metaFeatureList){
    print("Computing Analyte Coefficient metafeatures")
    metaFeatures$analytecoef <- ComputeAnalyteCoefMetafeature(predictions = predictions[,modelsToConsider], 
                                                        modelStats = modelStats)
  }
  if("equality" %in% metaFeatureList){
    print("Adding an equality metafeature")
    metaFeatures$equality <- t(matrix(rep(1, nrow(predictions) * length(modelsToConsider)),
                                      ncol = nrow(predictions)))
    rownames(metaFeatures$equality) <- rownames(predictions)
    colnames(metaFeatures$equality) <- modelsToConsider
  }
  
  return(metaFeatures)
}

#' Computes the metafeatures for each sample and model in the testing data.
#' @param predictionsTrain Prediction data frame for training data, where rows are samples, and
#' columns are predictors.
#' @param predictionsTest Prediction data frame for testing data, where rows are samples, and
#' columns are predictors.
#' @param k The number of nearest neighbors to consider in localerr.
#' @param inputDataTrain The training data (in IntLimData format).
#' @param inputDataTest The testing data (in IntLimData format).
#' @param eigStep The number of eigenvectors to step by during the evaluation
#' in localerr.
#' Note that this must be less than the number of samples in localerr. Default = 10.
#' @param alphaMin The lowest value of alpha to investigate in localerr. Default = 0.
#' @param alphaMax The highest value of alpha to investigate in localerr. Default = 1.
#' @param alphaStep The value of alpha to step by during the evaluation in localerr.
#' Default = 0.1.
#' @param modelStats A data frame that includes the interaction p-values and
#' interaction coefficients for each pair (such as the one output by IntLIM's
#' ProcessResults function)
#' @param stype Phenotype or outcome to use in models.
#' @param colIdInd The ID of the column that has the analyte ID's for the
#' independent variable. If blank, then the existing ID's are used.
#' @param colIdOut The ID of the column that has the analyte ID's for the
#' outcome variable. If blank, then the existing ID's are used.
#' @param metaFeatureList A list of metafeatures to compute.
#' @return A list of data frames (one for each sample) with predictor importance
#' measured according to the listed criteria (one column per metric, one row
#' per predictor).
#' @export
GetTestMetaFeatures <- function(predictionsTrain, predictionsTest, inputDataTrain, inputDataTest,
                                metaFeatureList = c("pdf", "localerr", "globalerr",
                                                   "pathway", "reaction",
                                                   "interactionpval", "interactioncoef",
                                                   "analytecoef"),
                                    modelStats = "",
                                    k = k,
                                    eigStep = 10, 
                                    alphaMin = 0,
                                    alphaMax = 1, 
                                    alphaStep = 0.1,
                                    stype = "",
                                    colIdInd = "",
                                    colIdOut = ""){
  # Compute metrics.
  metaFeatures <- list()
  if("pdf" %in% metaFeatureList){
    print("Computing PDF metafeatures")
    metaFeatures$pdf <- ComputePDFMetafeature(predictions = predictionsTest)
  }
  if("localerr" %in% metaFeatureList){
    print("Computing Local Error metafeatures")
    trueVals <- inputDataTrain@sampleMetaData[,stype]
    names(trueVals) <- rownames(inputDataTrain@sampleMetaData)
    metaFeatures$localerr <- ComputeLocalErrorMetafeatureTest(predictions = predictionsTrain, 
                                                    true = trueVals,
                                                    k = k,
                                                    inputDataTrain = inputDataTrain, 
                                                    inputDataTest = inputDataTest,
                                                    eigStep = eigStep, 
                                                    alphaMin = alphaMin,
                                                    alphaMax = alphaMax, 
                                                    alphaStep = alphaStep)
  }
  if("globalerr" %in% metaFeatureList){  
    print("Computing Global Error metafeatures")
    metaFeatures$globalerr <- ComputeGlobalErrorMetafeatureTest(predictions = predictionsTrain, 
                                                      true = inputDataTrain@sampleMetaData[,stype])
    # Adjust dimensionality for test data.
    metaFeatures$globalerr <- t(matrix(rep(metaFeatures$globalerr[1,], nrow(predictionsTest),
                                         ncol = nrow(predictionsTest))))
    rownames(metaFeatures$globalerr) <- rownames(predictionsTest)
    colnames(metaFeatures$globalerr) <- colnames(predictionsTest)
  }
  if("pathway" %in% metaFeatureList){
    print("Computing Pathway metafeatures")
    metaFeatures$pathway <- ComputePathwayMetafeature(predictions = predictionsTest, 
                                                inputData = inputDataTest,
                                                colIdInd = colIdInd,
                                                colIdOut = colIdOut)
    # Adjust dimensionality for test data.
    metaFeatures$pathway <- t(matrix(rep(metaFeatures$pathway[1,], nrow(predictionsTest)),
                                                 ncol = nrow(predictionsTest)))
    rownames(metaFeatures$pathway) <- rownames(predictionsTest)
    colnames(metaFeatures$pathway) <- colnames(predictionsTest)
  }
  if("reaction" %in% metaFeatureList){
    print("Computing Reaction metafeatures")
    metaFeatures$reaction <- ComputeReactionMetafeature(predictions = predictionsTest, 
                                                  inputData = inputDataTest,
                                                  colIdInd = colIdInd,
                                                  colIdOut = colIdOut)
    # Adjust dimensionality for test data.
    metaFeatures$reaction <- t(matrix(rep(metaFeatures$reaction[1,], nrow(predictionsTest)),
                                           ncol = nrow(predictionsTest)))
    rownames(metaFeatures$reaction) <- rownames(predictionsTest)
    colnames(metaFeatures$reaction) <- colnames(predictionsTest)
  }
  if("interactionpval" %in% metaFeatureList){
    print("Computing Interaction Term p-Value metafeatures")
    metaFeatures$interactionpval <- ComputePvalMetafeature(predictions = predictionsTrain,
                                                     modelStats = modelStats)
    metaFeatures$interactionpval <- t(matrix(rep(metaFeatures$interactionpval[1,], nrow(predictionsTest)),
                                       ncol = nrow(predictionsTest)))
    rownames(metaFeatures$interactionpval) <- rownames(predictionsTest)
    colnames(metaFeatures$interactionpval) <- colnames(predictionsTest)
  }
  if("interactioncoef" %in% metaFeatureList){
    print("Computing Interaction Coefficient metafeatures")
    metaFeatures$interactioncoef <- ComputeInteractionCoefMetafeature(predictions = predictionsTrain,
                                                                modelStats = modelStats)
    # Adjust dimensionality for test data.
    metaFeatures$interactioncoef <- t(matrix(rep(metaFeatures$interactioncoef[1,], nrow(predictionsTest)),
                                          ncol = nrow(predictionsTest)))
    rownames(metaFeatures$interactioncoef) <- rownames(predictionsTest)
    colnames(metaFeatures$interactioncoef) <- colnames(predictionsTest)
  }
  if("analytecoef" %in% metaFeatureList){
    print("Computing Analyte Coefficient metafeatures")
    metaFeatures$analytecoef <- ComputeAnalyteCoefMetafeature(predictions = predictionsTrain, 
                                                        modelStats = modelStats)
    
    # Adjust dimensionality for test data.
    metaFeatures$analytecoef <- t(matrix(rep(metaFeatures$analytecoef[1,], nrow(predictionsTest)),
                                                 ncol = nrow(predictionsTest)))
    rownames(metaFeatures$analytecoef) <- rownames(predictionsTest)
    colnames(metaFeatures$analytecoef) <- colnames(predictionsTest)
  }
  if("equality" %in% metaFeatureList){
    print("Adding an equality metafeature")
    metaFeatures$equality <- t(matrix(rep(1, nrow(predictionsTest) * ncol(predictionsTest)),
                                      ncol = nrow(predictionsTest)))
    rownames(metaFeatures$equality) <- rownames(predictionsTest)
    colnames(metaFeatures$equality) <- colnames(predictionsTest)
  }
  
  return(metaFeatures)
}

#' Computes the importance as the density in the probability density function,
#' normalized over all densities.
#' @param predictions Prediction data frame, where rows are samples, and
#' columns are predictors.
#' @return A data frame with the importance metric for each sample and each
#' predictor.
#' @export
ComputePDFMetafeature <- function(predictions){
  
  # Compute the importance for each sample.
  predictions <- as.matrix(predictions)
  importanceAll <- lapply(1:nrow(predictions), function(samp){
    # Prediction
    pred <- predictions[samp,]

    # Compute importance based on density.
    importance <- rep(0, length(pred))
    d <- stats::density(pred)
    importance <- unlist(lapply(pred, function(pred){
      return(d$y[which.min(abs(d$x - pred))])
    }))
    
    # Get the density percentile over all pairs.
    percentileVals <- seq(1, 100, by = 1) / 100
    quantiles <- stats::quantile(importance, percentileVals)
    percentiles <- unlist(lapply(importance, function(c){
      cvec <- rep(c, length(quantiles))
      which_match <- which.min(abs(cvec - quantiles))
      return(percentileVals[which_match])
    }))
    importance <- percentiles
    
    # Return.
    cat(".")
    return(importance)
  })
  importance <- t(do.call(cbind, importanceAll))
  rownames(importance) <- rownames(predictions)
  colnames(importance) <- colnames(predictions)
  return(importance)
}

#' Function for obtaining database identifiers for each predictor. Used
#' in functional analysis.
#' @param preds Prediction data frame, where rows are samples, and
#' columns are predictors.
#' @param inputData Input data, which may include database ID mappings for
#' the analytes.
#' @param colIdInd The ID of the column that has the analyte ID's for the
#' independent variable. If blank, then the existing ID's are used.
#' @param colIdOut The ID of the column that has the analyte ID's for the
#' outcome variable. If blank, then the existing ID's are used.
#' @return A data frame with two columns. The indId column contains the ID's
#' for the independent analyte, and the outId column contains the ID's for
#' the outcome analyte.
GetPredictorIDs <- function(preds, inputData, colIdInd, colIdOut){
  # Get prediction names.
  indAnalytes <- unlist(lapply(colnames(preds), function(pair){
    return(strsplit(pair, "__")[[1]][1])
  }))
  outAnalytes <- unlist(lapply(colnames(preds), function(pair){
    return(strsplit(pair, "__")[[1]][2])
  }))
  
  # Map to source ID.
  indId <- indAnalytes
  outId <- outAnalytes
  if(length(inputData@analyteType1MetaData) > 0 && colIdInd != "" && indAnalytes[[1]] %in% rownames(inputData@analyteType1MetaData)){
    indId <- inputData@analyteType1MetaData[indAnalytes, colIdInd]
  }else if(length(inputData@analyteType2MetaData) > 0 && colIdInd != "" && indAnalytes[[1]] %in% rownames(inputData@analyteType2MetaData)){
    indId <- inputData@analyteType2MetaData[indAnalytes, colIdInd]
  }
  if(length(inputData@analyteType1MetaData) > 0 && colIdOut != "" && outAnalytes[[1]] %in% rownames(inputData@analyteType1MetaData)){
    outId <- inputData@analyteType1MetaData[outAnalytes, colIdOut]
  }else if(length(inputData@analyteType2MetaData) > 0 && colIdOut != "" && outAnalytes[[1]] %in% rownames(inputData@analyteType2MetaData)){
    outId <- inputData@analyteType2MetaData[outAnalytes, colIdOut]
  }
  
  # Return data frame.
  return(data.frame(indId = indId, outId = outId))
}

#' Compute the importance using pathway membership. Predictor pairs that share
#' at least one pathway have an importance of 1, and all others have an 
#' importance of 0.
#' @param predictions Prediction data frame, where rows are samples, and
#' columns are predictors.
#' @param inputData Input data, which may include database ID mappings for
#' the analytes.
#' @param colIdInd The ID of the column that has the analyte ID's for the
#' independent variable. If blank, then the existing ID's are used.
#' @param colIdOut The ID of the column that has the analyte ID's for the
#' outcome variable. If blank, then the existing ID's are used.
#' @return A data frame with the importance metric for each sample and each
#' predictor. The metric will be the same for all samples in this case.
ComputePathwayMetafeature <- function(predictions, inputData, colIdInd,
                                     colIdOut){
  
  # Obtain names of predictors.
  predNames <- GetPredictorIDs(predictions, inputData, colIdInd, colIdOut)
  predNames[,"indId"] <- as.character(predNames[,"indId"])
  predNames[,"outId"] <- as.character(predNames[,"outId"])
  allPathways <- RaMP::getPathwayFromAnalyte(unique(c(predNames[,"indId"], 
                                                      predNames[,"outId"])))
  
  # Find the pathways associated with each analyte.
  namesInd <- unique(predNames[,"indId"])
  pwayResultInd <- lapply(namesInd, function(analyte){
    return(allPathways$pathwayId[which(allPathways$inputId == analyte)])
  })
  names(pwayResultInd) <- namesInd
  namesOut <- unique(predNames[,"outId"])
  pwayResultOut <- lapply(namesOut, function(analyte){
    return(allPathways$pathwayId[which(allPathways$inputId == analyte)])
  })
  names(pwayResultOut) <- namesOut
  
  # Find which pairs share pathways.
  sharesPathway <- unlist(lapply(1:nrow(predNames), function(i){
    if(i %% (ceiling(nrow(predNames) / 10)) == 0){
      cat(".")
    }
    shares <- FALSE
    if(length(intersect(pwayResultInd[[predNames[i,"indId"]]], pwayResultOut[[predNames[i,"outId"]]])) > 0){
      shares <- TRUE
    }
    return(shares)
  }))
  importance <- rep(0, length(sharesPathway))
  importance[which(sharesPathway == TRUE)] <- 1
  
  # Make copies for each sample.
  importanceAll <- lapply(rownames(predictions), function(samp){return(importance)})
  importanceFinal <- t(do.call(cbind, importanceAll))
  rownames(importanceFinal) <- rownames(predictions)
  colnames(importanceFinal) <- colnames(predictions)
  return(importanceFinal)
}

#' Compute the importance using reaction membership. Predictor pairs that share
#' at least one reaction have an importance of 1, and all others have an 
#' importance of 0.
#' @param predictions Prediction data frame, where rows are samples, and
#' columns are predictors.
#' @param inputData Input data, which may include database ID mappings for
#' the analytes.
#' @param colIdInd The ID of the column that has the analyte ID's for the
#' independent variable. If blank, then the existing ID's are used.
#' @param colIdOut The ID of the column that has the analyte ID's for the
#' outcome variable. If blank, then the existing ID's are used.
#' @return A data frame with the importance metric for each sample and each
#' predictor. The metric will be the same for all samples in this case.
ComputeReactionMetafeature <- function(predictions, inputData, colIdInd,
                                      colIdOut){
  # Obtain names of predictors.
  predNames <- GetPredictorIDs(predictions, inputData, colIdInd, colIdOut)
  predNames[,"indId"] <- as.character(predNames[,"indId"])
  predNames[,"outId"] <- as.character(predNames[,"outId"])
  uniqueOutIds <- unique(predNames[,"outId"])
  rxnAll <- RaMP::rampFastCata(uniqueOutIds)$rxn_partner_ids
  names(rxnAll) <- uniqueOutIds
  
  # Find which pairs share reactions.
  sharesRxn <- unlist(lapply(1:nrow(predNames), function(i){
    if(i %% (ceiling(nrow(predNames) / 10)) == 0){
      cat(".")
    }
    shares <- FALSE
    tryCatch({
      rxnResult <- rxnAll[[predNames[i, "outId"]]]
      rxnResultAll <- unlist(lapply(rxnResult, function(res){
        return(strsplit(res, "; ")[[1]])
      }))
      if(predNames[i,"indId"] %in% rxnResultAll){
        shares <- TRUE
      }
    }, error = function(cond){})
    return(shares)
  }))
  importance <- rep(0, length(sharesRxn))
  importance[which(sharesRxn == TRUE)] <- 1
  
  # Make copies for each sample.
  importanceAll <- lapply(rownames(predictions), function(samp){return(importance)})
  importanceFinal <- t(do.call(cbind, importanceAll))
  rownames(importanceFinal) <- rownames(predictions)
  colnames(importanceFinal) <- colnames(predictions)
  return(importanceFinal)
}

#' Compute the importance using p-value of model interaction term. Specifically,
#' the p-value percentile is calculated, and the weight is taken as 1 - the percentile,
#' so that the lowest p-values are weighted more heavily.
#' @param predictions Prediction data frame, where rows are samples, and
#' columns are predictors.
#' @param modelStats A data frame that includes the interaction p-values and
#' interaction coefficients for each pair (such as the one output by IntLIM's
#' ProcessResults function)
#' @return A data frame with the importance metric for each sample and each
#' predictor. The metric will be the same for all samples in this case.
ComputePvalMetafeature <- function(predictions, modelStats){
  
  # Measure importance for each predictor.
  pvals <- modelStats$FDRadjPval
  names(pvals) <- paste(modelStats$Analyte1, modelStats$Analyte2, sep = "__")
  pvals <- pvals[colnames(predictions)]
  
  # Get the p-value percentile over all pairs.
  percentileVals <- seq(1, 100, by = 1) / 100
  quantiles <- stats::quantile(pvals, percentileVals)
  closestQuantile <- rep(-1, length(pvals))
  smallestDifference <- rep(1, length(pvals))
  for(i in 1:length(quantiles)){
    diffs <- abs(pvals - quantiles[i])
    which_better <- which(diffs < smallestDifference)
    smallestDifference[which_better] <- diffs[which_better]
    closestQuantile[which_better] <- percentileVals[i]
  }
  importance <- 1 - closestQuantile
  
  # Make copies for each sample.
  importanceAll <- lapply(rownames(predictions), function(samp){return(importance)})
  importanceFinal <- t(do.call(cbind, importanceAll))
  rownames(importanceFinal) <- rownames(predictions)
  colnames(importanceFinal) <- colnames(predictions)
  return(importanceFinal)
}

#' Compute the importance using absolute value of interaction term coefficient. Specifically,
#' the coefficient percentile is calculated and taken as the weight.
#' @param predictions Prediction data frame, where rows are samples, and
#' columns are predictors.
#' @param modelStats A data frame that includes the interaction p-values and
#' interaction coefficients for each pair (such as the one output by IntLIM's
#' ProcessResults function)
#' @return A data frame with the importance metric for each sample and each
#' predictor. The metric will be the same for all samples in this case.
ComputeInteractionCoefMetafeature <- function(predictions, modelStats){
  
  # Measure importance for each predictor.
  coefs <- modelStats$interaction_coeff
  names(coefs) <- paste(modelStats$Analyte1, modelStats$Analyte2, sep = "__")
  coefs <- coefs[colnames(predictions)]
  
  # Get the p-value percentile over all pairs.
  percentileVals <- seq(1, 100, by = 1) / 100
  quantiles <- stats::quantile(abs(coefs), percentileVals)
  closestQuantile <- rep(-1, length(coefs))
  smallestDifference <- rep(max(abs(coefs)), length(coefs))
  for(i in 1:length(quantiles)){
    diffs <- abs(abs(coefs) - quantiles[i])
    which_better <- which(diffs < smallestDifference)
    smallestDifference[which_better] <- diffs[which_better]
    closestQuantile[which_better] <- percentileVals[i]
  }
  importance <- closestQuantile
  
  # Make copies for each sample.
  importanceAll <- lapply(rownames(predictions), function(samp){return(importance)})
  importanceFinal <- t(do.call(cbind, importanceAll))
  rownames(importanceFinal) <- rownames(predictions)
  colnames(importanceFinal) <- colnames(predictions)
  return(importanceFinal)
}

#' Compute the importance using absolute value of analyte term coefficient. Specifically,
#' the coefficient percentile is calculated and taken as the weight.
#' @param predictions Prediction data frame, where rows are samples, and
#' columns are predictors.
#' @param modelStats A data frame that includes the interaction p-values,
#' interaction coefficients, and other coefficients for each pair (such as the one output by IntLIM's
#' ProcessResults function)
#' @return A data frame with the importance metric for each sample and each
#' predictor. The metric will be the same for all samples in this case.
ComputeAnalyteCoefMetafeature <- function(predictions, modelStats){
  
  # Measure importance for each predictor.
  coefs <- modelStats$type
  names(coefs) <- paste(modelStats$Analyte1, modelStats$Analyte2, sep = "__")
  coefs <- coefs[colnames(predictions)]
  
  # Get the p-value percentile over all pairs.
  percentileVals <- seq(1, 100, by = 1) / 100
  quantiles <- stats::quantile(abs(coefs), percentileVals)
  closestQuantile <- rep(-1, length(coefs))
  smallestDifference <- rep(max(abs(coefs)), length(coefs))
  for(i in 1:length(quantiles)){
    diffs <- abs(abs(coefs) - quantiles[i])
    which_better <- which(diffs < smallestDifference)
    smallestDifference[which_better] <- diffs[which_better]
    closestQuantile[which_better] <- percentileVals[i]
  }
  importance <- closestQuantile
  
  # Make copies for each sample.
  importanceAll <- lapply(rownames(predictions), function(samp){return(importance)})
  importanceFinal <- t(do.call(cbind, importanceAll))
  rownames(importanceFinal) <- rownames(predictions)
  colnames(importanceFinal) <- colnames(predictions)
  return(importanceFinal)
}

#' Finds the optimal subspace clustering (i.e. using Grassmann Manifold Technique
#' - see PMID 30329022) given two different modalities of data (e.g. gene and metabolite).
#' It optimizes the cophenetic correlation of the hierarchicial clustering
#' of the samples using a grid search.
#' @param type1Similarity A cosine similarity matrix for the first data type, 
#' found using ComputeCosineSimilarity.
#' @param type2Similarity A cosine similarity matrix for the second data type,
#' found using ComputeCosineSimilarity.
#' @param eigStep The number of eigenvectors to step by during the evaluation.
#' Note that this must be less than the number of samples. Default = 10.
#' @param alphaMin The lowest value of alpha to investigate. Default = 0.
#' @param alphaMax The highest value of alpha to investigate. Default = 1.
#' @param alphaStep The value of alpha to step by during the evalutation.
#' Default = 0.1.
#' @return A named list including the data projected onto the merged subspace,
#' the optimal number of eigenvectors, the optimal alpha value, the clustering
#' coefficient, and the dendrogram.
FindOptimalSubspaceClustering <- function(type1Similarity, type2Similarity,
                                          eigStep = 10, alphaMin = 0,
                                          alphaMax = 1, alphaStep = 0.1){
  # Check input parameters.
  if(eigStep > nrow(type1Similarity)){
    stop("Must set eigStep to a value lower than the number of samples!")
  }
  else if(alphaMin < 0 || alphaMin > 1 || alphaMax < 0 || alphaMax > 1 ){
    stop("Both alphaMin and alphaMax must be between 0 and 1!")
  }
  else if(alphaStep > alphaMax - alphaMin){
    stop(paste("alphaStep is too big! There must be at least one step between",
               "alphaMin and alphaMax"))
  }
  else{
    # Initialize.
    L_mod <- matrix()
    dendro <- -1
    optimal_eigens <- -1
    optimal_alpha <- -1
    coph_cor <- matrix()
    
    # Do grid search.
    for(eig in seq(from=1,to=dim(type1Similarity)[1],by=eigStep)){
      for(a in seq(from=alphaMin,to=alphaMax,by=alphaStep)){
        
        # Find clustering with the current grid values.
        L_mod_i <- CombineSubspaces(type1Similarity = type1Similarity, 
                                    type2Similarity = type2Similarity, 
                                    eigenCount = eig, alpha = a)
        
        # Compute cophenetic correlation.
        rownames(L_mod_i) <- rownames(type1Similarity)
        sim <- ComputeCosineSimilarity(L_mod_i)
        dist <- max(sim) - sim
        rownames(dist) <- rownames(sim)
        d <- stats::as.dist(dist)
        dend <- stats::hclust(d)
        coph_i <- stats::cophenetic(dend)
        coph_cor_i <- stats::cor(coph_i, d)
        
        # If cophenetic correlation is higher than previous, save.
        if(length(coph_cor) > 1 || is.na(coph_cor_i > coph_cor) || coph_cor_i > coph_cor){
          coph_cor <- coph_cor_i
          L_mod <- L_mod_i
          dendro <- dend
          optimal_eigens <- eig
          optimal_alpha <- a
        }
      }
    }
    return(list("L_mod"=L_mod, "eigenvector_ct"=optimal_eigens, "alpha"=optimal_alpha,
                "cluster_coeff"=coph_cor, "dendrogram"=dendro))
  }
}

#' Combine subspaces (i.e. using Grassmann Manifold Technique
#' - see PMID 30329022) given two different modalities of data (e.g. gene and metabolite)
#' and the alpha value and the desired number of eigenvectors.
#' @param type1Similarity A cosine similarity matrix for the first data type, 
#' found using ComputeCosineSimilarity.
#' @param type2Similarity A cosine similarity matrix for the second data type,
#' found using ComputeCosineSimilarity.
#' @param eigenCount The number of eigenvectors to use.
#' @param alpha The value of alpha to use.
#' @return A named list including the data projected onto the merged subspace,
#' the optimal number of eigenvectors, the optimal alpha value, the clustering
#' coefficient, and the dendrogram.
CombineSubspaces <- function(type1Similarity, type2Similarity, eigenCount, alpha){
  
  # Make graphs.
  D_dat1 <- diag(colSums(type1Similarity))
  D_dat2 <- diag(colSums(type2Similarity))
  
  # Get normalized Laplacian for each graph.
  L_dat1 <- as.matrix(D_dat1 - type1Similarity)
  L_dat2 <- as.matrix(D_dat2 - type2Similarity)
  
  # Normalize.
  D_neg_half_dat1 <- diag(diag(1 / sqrt(abs(D_dat1))))
  D_neg_half_dat2 <- diag(diag(1 / sqrt(abs(D_dat2))))
  L_dat1 <- D_neg_half_dat1 %*% L_dat1 %*% D_neg_half_dat1
  L_dat2 <- D_neg_half_dat2 %*% L_dat2 %*% D_neg_half_dat2
  
  # Get eigenvectors for each Laplacian.
  U_dat1 <- eigen(L_dat1)$vectors[,c(1:eigenCount)]
  U_dat2 <- eigen(L_dat2)$vectors[,c(1:eigenCount)]
  
  # Combine
  L_mod <- L_dat1 + L_dat2 - alpha * ((U_dat1 %*% t(U_dat1)) + (U_dat2 %*% t(U_dat2)))
  return(L_mod)
}

#' Combine subspaces (i.e. using Grassmann Manifold Technique
#' - see PMID 30329022) given two different modalities of data (e.g. gene and metabolite)
#' and the alpha value and the desired number of eigenvectors.
#' @param type1SimilarityTrain A cosine similarity matrix for the first data type, 
#' found using ComputeCosineSimilarity on the training data.
#' @param type2SimilarityTrain A cosine similarity matrix for the second data type,
#' found using ComputeCosineSimilarity on the training data.
#' @param type1SimilarityTest A cosine similarity matrix for the first data type, 
#' found using ComputeCosineSimilarity on the testing data.
#' @param type2SimilarityTest A cosine similarity matrix for the second data type,
#' found using ComputeCosineSimilarity on the testing data.
#' @param eigenCount The number of eigenvectors to use.
#' @param alpha The value of alpha to use.
#' @return A named list including the data projected onto the merged subspace,
#' the optimal number of eigenvectors, the optimal alpha value, the clustering
#' coefficient, and the dendrogram.
CombineSubspacesTest <- function(type1SimilarityTrain, type2SimilarityTrain, 
                                 type1SimilarityTest, type2SimilarityTest, 
                                 eigenCount, alpha){
  
  # Make graphs. We compute the diagonals on the training data only.
  D_dat1 <- diag(colSums(type1SimilarityTrain))
  D_dat2 <- diag(colSums(type2SimilarityTrain))
  L_dat1_train <- as.matrix(D_dat1 - type1SimilarityTrain)
  L_dat2_train <- as.matrix(D_dat2 - type2SimilarityTrain)
  D_neg_half_dat1 <- diag(diag(1 / sqrt(abs(D_dat1))))
  D_neg_half_dat2 <- diag(diag(1 / sqrt(abs(D_dat2))))
  L_dat1_train <- D_neg_half_dat1 %*% L_dat1_train %*% D_neg_half_dat1
  L_dat2_train <- D_neg_half_dat2 %*% L_dat2_train %*% D_neg_half_dat2
  
  # Get eigenvectors for each Laplacian on the training data.
  U_dat1_train <- eigen(L_dat1_train)$vectors[,c(1:eigenCount)]
  U_dat2_train <- eigen(L_dat2_train)$vectors[,c(1:eigenCount)]
  
  # Do not subtract from D_dat for testing data because none of the
  # data points are on the diagonal. Simply negate the values.
  L_dat1 <- as.matrix(0 - type1SimilarityTest)
  L_dat2 <- as.matrix(0 - type2SimilarityTest)
  
  # Normalize. Here, transposition is necessary to obtain the correct
  # dimensionality of the matrix.
  D_neg_half_dat1 <- diag(diag(1 / sqrt(abs(D_dat1))))
  D_neg_half_dat2 <- diag(diag(1 / sqrt(abs(D_dat2))))
  L_dat1 <- t(t(D_neg_half_dat1 %*% t(L_dat1)) %*% D_neg_half_dat1)
  L_dat2 <- t(t(D_neg_half_dat2 %*% t(L_dat2)) %*% D_neg_half_dat2)
  
  # Project testing data onto eigenvectors.
  U_dat1 <- t(L_dat1) %*% U_dat1_train
  U_dat2 <- t(L_dat2) %*% U_dat2_train
  
  # Combine
  L_mod <- L_dat1 + L_dat2 - alpha * (t(U_dat1 %*% t(U_dat1_train)) + t(U_dat2 %*% t(U_dat2_train)))
  return(L_mod)
}

#' Compute cosine similarity between samples on an adjacency matrix.
#' @param R The adjacency matrix.
#' @return A matrix of sample rows and sample columns, with the cosine
#' similarities listed as the values.
ComputeCosineSimilarity <- function(R){
  # Normalize matrix by the norm of its columns.
  euclidean_norm <- sqrt(rowSums(R^2))
  euclidean_norm_mat <- matrix(rep(euclidean_norm, ncol(R)), ncol=ncol(R))
  R_norm <- as.matrix(R / euclidean_norm_mat)

  # Compute matrix transpose to get cosine similarity.
  sim <- R_norm %*% t(R_norm)
  rownames(sim) <- rownames(R)
  colnames(sim) <- rownames(R)
  return(sim)
}

#' Compute cosine similarity between a testing sample and each training sample
#' in the combined subspace matrix.
#' @param opt The optimal model obtained using FindOptimalSubspaceClustering on
#' the training data.
#' @param inputDataTest The testing data.
#' @param inputDataTrain The training data.
#' @param samp The name of the testing sample against which to compute similarity.
#' @return A matrix of similarities, where the rows are the training data samples
#' and the columns are the testing data samples.
CosineSimilarityToTrainingOnSubspace <- function(opt, inputDataTest, inputDataTrain,
                                                 samp){
  
  # Compute similarity between the training samples.
  type1SimilarityTrain <- ComputeCosineSimilarity(t(inputDataTrain@analyteType1))
  type2SimilarityTrain <- ComputeCosineSimilarity(t(inputDataTrain@analyteType2))
  
  # Compute adjacency of the sample to each of the training samples using
  # the parameters of the optimal model.
  type1SimilarityTest <- CosineSimilarityToTrainingOnSingleDataType(trainingData = inputDataTrain@analyteType1,
                                                           testingData = inputDataTest@analyteType1,
                                                           samp = samp)
  type2SimilarityTest <- CosineSimilarityToTrainingOnSingleDataType(trainingData = inputDataTrain@analyteType2,
                                                         testingData = inputDataTest@analyteType2,
                                                         samp = samp)

  R <- CombineSubspacesTest(type1SimilarityTrain = type1SimilarityTrain,
                            type2SimilarityTrain = type2SimilarityTrain,
                            type1SimilarityTest = t(type1SimilarityTest), 
                        type2SimilarityTest = t(type2SimilarityTest), 
                        eigenCount = opt$eigenvector_ct, 
                        alpha = opt$alpha)
  colnames(R) <- samp
  rownames(R) <- rownames(inputDataTrain@sampleMetaData)
  
  # Compute cosine similarity on the new subspace.
  similarity <- CosineSimilarityToTrainingOnSingleDataType(trainingData = opt$L_mod,
                                                           testingData = R,
                                                           samp = samp)
  colnames(similarity) <- samp
  rownames(similarity) <- rownames(inputDataTrain@sampleMetaData)

  return(similarity)
}

#' Compute cosine similarity between a testing sample and each training sample
#' for a single data type.
#' @param testingData The testing data.
#' @param trainingData The training data.
#' @param samp The name of the testing sample against which to compute similarity.
#' @return A matrix of similarities, where the rows are the training data samples
#' and the columns are the testing data samples.
CosineSimilarityToTrainingOnSingleDataType <- function(trainingData, testingData,
                                                       samp){
  # Compute cosine similarity to each training sample.
  similarityList <- matrix(unlist(lapply(1:ncol(trainingData), function(c){
    dotProduct <- sum(trainingData[,c] * testingData[,samp])
    normTrain <- sqrt(sum(trainingData[,c]^2))
    normTest <- sqrt(sum(testingData[,samp]^2))
    return(dotProduct / (normTrain * normTest))
  })))

  # Return similarity as a matrix.
  return(similarityList)
}

#' Computes the importance as the median absolute error for local predictors
#' (i.e. the predictions for k nearest neighbors of each sample).
#' @param predictions Prediction data frame, where rows are samples, and
#' columns are predictors.
#' @param true Named vector of the true outcome values.
#' @param k The number of nearest neighbors to consider.
#' @param inputData The input data read in using the function IntLIM function
#' ReadData.
#' @param eigStep The number of eigenvectors to step by during the evaluation.
#' Note that this must be less than the number of samples. Default = 10.
#' @param alphaMin The lowest value of alpha to investigate. Default = 0.
#' @param alphaMax The highest value of alpha to investigate. Default = 1.
#' @param alphaStep The value of alpha to step by during the evalutation.
#' Default = 0.1.
#' @return A data frame with the importance metric for each sample and each
#' predictor.
#' @export
ComputeLocalErrorMetafeature <- function(predictions, true, k,
                                       inputData, eigStep = 10, alphaMin = 0,
                                       alphaMax = 1, alphaStep = 0.1){
  
  # Find the optimal projection for best cluster separability.
  type1sim <- ComputeCosineSimilarity(t(inputData@analyteType1))
  type2sim <- ComputeCosineSimilarity(t(inputData@analyteType2))
  opt <- FindOptimalSubspaceClustering(type1Similarity = type1sim, 
                                       type2Similarity = type2sim,
                                       eigStep = eigStep, alphaMin = alphaMin,
                                       alphaMax = alphaMax, alphaStep = alphaStep)
  
  # Compute the importance for each sample.
  importanceAll <- lapply(1:nrow(predictions), function(samp){
    # Prediction
    pred <- predictions[-samp,]
    
    # Find nearest neighbors.
    similarities <- ComputeCosineSimilarity(opt$L_mod)[samp,]
    sorted_sims <- names(similarities)[order(-similarities)]
    pred_sorted_sims <- sorted_sims[which(sorted_sims %in% rownames(pred))]
    knn <- pred_sorted_sims[1:k]
    pred_local <- pred[knn,]
    
    # Error
    trueVals <- matrix(rep(true[knn], ncol(pred_local)),
                       nrow = length(knn),
                       ncol = ncol(pred_local))
    if(!is.numeric(trueVals)){
      catNames <- sort(unique(trueVals))
      trueValuesChar <- trueVals
      trueVals <- matrix(rep(0, length(trueValuesChar)), ncol = ncol(trueValuesChar),
                           nrow = nrow(trueValuesChar))
      trueVals[which(trueValuesChar == catNames[2])] <- 1
    }

    err <- abs(trueVals - pred_local)
    medErr <- unlist(lapply(1:ncol(err), function(c){
      return(stats::median(err[,c]))
    }))
    importance <- 1 - medErr

    # Get the 1 - error percentile over all pairs.
    percentileVals <- seq(1, 100, by = 1) / 100
    quantiles <- stats::quantile(importance, percentileVals)
    percentiles <- unlist(lapply(importance, function(c){
      cvec <- rep(c, length(quantiles))
      which_match <- which.min(abs(cvec - quantiles))
      return(percentileVals[which_match])
    }))
    importance <- percentiles
    
    # Return.
    cat(".")
    return(importance)
  })
  importanceFinal <- t(do.call(cbind, importanceAll))
  rownames(importanceFinal) <- rownames(predictions)
  colnames(importanceFinal) <- colnames(predictions)
  return(importanceFinal)
}

#' Computes the importance as the median absolute error for local predictors
#' (i.e. the predictions for k nearest neighbors of each sample).
#' @param predictions Prediction data frame, where rows are samples, and
#' columns are predictors.
#' @param true Named vector of the true outcome values.
#' @param k The number of nearest neighbors to consider.
#' @param inputDataTrain The training data (in IntLimData format).
#' @param inputDataTest The testing data (in IntLimData format).
#' @param eigStep The number of eigenvectors to step by during the evaluation.
#' Note that this must be less than the number of samples. Default = 10.
#' @param alphaMin The lowest value of alpha to investigate. Default = 0.
#' @param alphaMax The highest value of alpha to investigate. Default = 1.
#' @param alphaStep The value of alpha to step by during the evalutation.
#' Default = 0.1.
#' @return A data frame with the importance metric for each sample and each
#' predictor.
#' @export
ComputeLocalErrorMetafeatureTest <- function(predictions, true, k,
                                        inputDataTrain, inputDataTest, 
                                        eigStep = 10, alphaMin = 0,
                                        alphaMax = 1, alphaStep = 0.1){
  
  # Find the optimal projection for best cluster separability on the training data.
  type1sim <- ComputeCosineSimilarity(t(inputDataTrain@analyteType1))
  type2sim <- ComputeCosineSimilarity(t(inputDataTrain@analyteType2))
  opt <- FindOptimalSubspaceClustering(type1Similarity = type1sim, 
                                       type2Similarity = type2sim,
                                       eigStep = eigStep, alphaMin = alphaMin,
                                       alphaMax = alphaMax, alphaStep = alphaStep)
  
  # Compute the importance for each sample in the testing data.
  importanceAll <- lapply(rownames(inputDataTest@sampleMetaData), function(samp){

    # Training data predictions
    pred <- predictions
    
    # Find nearest neighbors.
    similarities <- CosineSimilarityToTrainingOnSubspace(opt = opt, 
                                                         inputDataTrain = inputDataTrain, 
                                                         inputDataTest = inputDataTest, 
                                                         samp = samp)
    sorted_sims <- rownames(similarities)[order(-similarities)]
    pred_sorted_sims <- sorted_sims[which(sorted_sims %in% rownames(pred))]
    knn <- pred_sorted_sims[1:k]
    pred_local <- pred[knn,]
    
    # Error
    trueVals <- matrix(rep(true[knn], ncol(pred_local)),
                       nrow = length(knn),
                       ncol = ncol(pred_local))
    if(!is.numeric(trueVals)){
      catNames <- sort(unique(trueVals))
      trueValuesChar <- trueVals
      trueVals <- matrix(rep(0, length(trueValuesChar)), ncol = ncol(trueValuesChar),
                           nrow = nrow(trueValuesChar))
      trueVals[which(trueValuesChar == catNames[2])] <- 1
    }
    
    err <- abs(trueVals - pred_local)
    medErr <- unlist(lapply(1:ncol(err), function(c){
      return(stats::median(err[,c]))
    }))
    importance <- 1 - medErr
    
    # Get the 1 - error percentile over all pairs.
    percentileVals <- seq(1, 100, by = 1) / 100
    quantiles <- stats::quantile(importance, percentileVals)
    percentiles <- unlist(lapply(importance, function(c){
      cvec <- rep(c, length(quantiles))
      which_match <- which.min(abs(cvec - quantiles))
      return(percentileVals[which_match])
    }))
    importance <- percentiles
    
    # Return.
    cat(".")
    return(importance)
  })
  importanceFinal <- t(do.call(cbind, importanceAll))
  rownames(importanceFinal) <- rownames(inputDataTest@sampleMetaData)
  colnames(importanceFinal) <- colnames(predictions)
  return(importanceFinal)
}

#' Computes the importance as the median absolute error for all predictors
#' on all samples except the sample in question.
#' @param predictions Prediction data frame, where rows are samples, and
#' columns are predictors.
#' @param true Named vector of the true outcome values.
#' @return A data frame with the importance metric for each sample and each
#' predictor.
#' @export
ComputeGlobalErrorMetafeature <- function(predictions, true){
  
  # Compute the importance for each sample.
  importanceAll <- lapply(1:nrow(predictions), function(samp){
    # Prediction
    pred <- predictions[-samp,]
    
    # Error
    trueVals <- matrix(rep(true, length(pred)),
                       nrow = length(true),
                       ncol = ncol(pred))
    trueVals <- trueVals[-samp,]
    
    err <- abs(trueVals - pred)
    medErr <- unlist(lapply(1:ncol(err), function(c){
      return(stats::median(err[,c]))
    }))
    importance <- 1 - medErr
    
    # Get the 1 - error percentile over all pairs.
    percentileVals <- seq(1, 100, by = 1) / 100
    quantiles <- stats::quantile(importance, percentileVals)
    percentiles <- unlist(lapply(importance, function(c){
      cvec <- rep(c, length(quantiles))
      which_match <- which.min(abs(cvec - quantiles))
      return(percentileVals[which_match])
    }))
    importance <- percentiles
    
    # Return.
    cat(".")
    return(importance)
  })
  importanceFinal <- t(do.call(cbind, importanceAll))
  rownames(importanceFinal) <- rownames(predictions)
  colnames(importanceFinal) <- colnames(predictions)
  return(importanceFinal)
}

#' Computes the importance as the median absolute error for all predictors
#' samples in the training data.
#' @param predictions Prediction data frame, where rows are samples, and
#' columns are predictors.
#' @param true Named vector of the true outcome values.
#' @return A data frame with the importance metric for each sample and each
#' predictor. For this metric, the value will be the same for all samples.
#' @export
ComputeGlobalErrorMetafeatureTest <- function(predictions, true){
  
  # Compute the importance for each sample.
  importanceAll <- lapply(1:nrow(predictions), function(samp){
    # Prediction
    pred <- predictions
    
    # Error
    trueVals <- matrix(rep(true, length(pred)),
                       nrow = length(true),
                       ncol = ncol(pred))
    trueVals <- trueVals
    
    err <- abs(trueVals - pred)
    medErr <- unlist(lapply(1:ncol(err), function(c){
      return(stats::median(err[,c]))
    }))
    importance <- 1 - medErr
    
    # Get the 1 - error percentile over all pairs.
    percentileVals <- seq(1, 100, by = 1) / 100
    quantiles <- stats::quantile(importance, percentileVals)
    percentiles <- unlist(lapply(importance, function(c){
      cvec <- rep(c, length(quantiles))
      which_match <- which.min(abs(cvec - quantiles))
      return(percentileVals[which_match])
    }))
    importance <- percentiles
    
    # Return.
    cat(".")
    return(importance)
  })
  importanceFinal <- t(do.call(cbind, importanceAll))
  rownames(importanceFinal) <- rownames(predictions)
  colnames(importanceFinal) <- colnames(predictions)
  return(importanceFinal)
}
ncats/MultiOmicsGraphPrediction documentation built on Aug. 23, 2023, 9:19 a.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
ncats/MultiOmicsGraphPrediction
Network-Based Ensemble Prediction from IntLIM Predictors

R/metafeaturefunctions.R
In ncats/MultiOmicsGraphPrediction: Network-Based Ensemble Prediction from IntLIM Predictors

R Package Documentation

Browse R Packages

We want your feedback!

ncats/MultiOmicsGraphPrediction Network-Based Ensemble Prediction from IntLIM Predictors

R/metafeaturefunctions.R In ncats/MultiOmicsGraphPrediction: Network-Based Ensemble Prediction from IntLIM Predictors

R Package Documentation

Browse R Packages

We want your feedback!

ncats/MultiOmicsGraphPrediction
Network-Based Ensemble Prediction from IntLIM Predictors

R/metafeaturefunctions.R
In ncats/MultiOmicsGraphPrediction: Network-Based Ensemble Prediction from IntLIM Predictors