COINCIDE

Please note that this vignette is incomplete and is not intended for package users at this time. It is here as a placeholder until the vignette is complete.

Preprocess the data

library(PDATK)
library(S4Vectors)
library(MultiAssayExperiment)
library(qs)
library(ggplot2)
library(BiocParallel)
library(doParallel)

seed <- 55555
reps <- 1000

data(CSPC_MAE)

# Subset to only feature shared across all data
CSPCmae <- intersectRows(CSPC_MAE)
experiments(CSPCmae) <- endoapply(experiments(CSPCmae), rankFeatures)

top20FeaturesPerCohort <- lapply(experiments(CSPCmae), getTopFeatures, numFeats=20)
top1PctFeaturesOverall <- getTopFeatures(CSPCmae, 
    numFeats=floor(0.01*length(rownames(CSPCmae)[[1]])), na.rm=TRUE)
topFeatures <- unique(c(top1PctFeaturesOverall, unlist(top20FeaturesPerCohort)))
topFeaturesMAE <- CSPCmae[topFeatures, , ]
normMAE <- PDATK::normalize(topFeaturesMAE, MARGIN=2, 
    method=c('scale', 'center', 'medianImpute'))

Create and Train a ConsensusClusterModel

set.seed(seed)
conClustModel <- ConsensusMetaclusteringModel(normMAE, randomSeed=seed)
trainedConClustModel <- trainModel(conClustModel, reps=reps)

optimalKFunction <- function(models, subinterval) {
    vapply(models, FUN=function(x) { 
        icl = calcICL(x,
                plot=NULL)
        coni = icl[["itemConsensus"]]
        pac = list()
        k_list = unique(coni$k)
        for (k_temp in 1:length(k_list)) {
            coni_temp = coni[which(coni$k == k_list[[k_temp]]),]
            middle_no = dim(coni_temp[which(coni_temp$itemConsensus > subinterval[1] &     
                coni_temp$itemConsensus < subinterval[2]),])[1]
            pac[[k_temp]] = data.frame(k = k_list[[k_temp]],pac = middle_no)
        }
        pac = do.call(rbind,pac)
        optimal_k = pac$k[which.min(pac$pac)]
        return(optimal_k)
    }, numeric(1))
}

classifiedConClustModel <- predictClasses(trainedConClustModel, subinterval=c(0.1, 0.9))
bhklab/PanCuRx documentation built on Dec. 30, 2021, 4:59 p.m.