R/clusterFun.R

# cluster based on functional profiles
# not sure of this featuresif metaFeat is true: correlated features are clustered before clustering of samples
library(pheatmap)
library(devtools)
install_github("richabatra/ConsensusClusterPlus")
library(ConsensusClusterPlus)
library(glmnet)

# how can I make clusterFUN to have the parameters as consensus?
getFeatureCoefs <- function(cluster.ids, enrichMat, clusterNum){
  design.mat <- model.matrix(cluster.ids ~ data.matrix(enrichMat))
  if(clusterNum >2) {
    glmfit.cv <- cv.glmnet(design.mat, cluster.ids, alpha=1, family="multinomial")
    coef.mat <- do.call(cbind, coef(glmfit.cv, s=glmfit.cv$lambda.1se))
  } else {
    glmfit.cv <- cv.glmnet(design.mat, cluster.ids, alpha=1)
    coef.mat <- coef(glmfit.cv, s=glmfit.cv$lambda.1se)
  }
  plotMat <- coef.mat[-c(1:2), ]
  sigCoef <- rownames(plotMat)[which(rowSums(plotMat)>0)]
  res <- list(sigCoef, plotMat, coef.mat)
  names(res) <- c("sigCoef", "plotMat", "coef.mat")
  return(res)
}

clusterFun <- function(countMatrix, scData, fun2enrich="tfs", method, metaFeat="FALSE", resDir){
  # ss enrichment
  enrichMat <- funEnrichment(countMatrix, scData, fun2enrich="tfs")
  write.csv(enrichMat, file=paste0(resDir, "/", fun2enrich, "_enrichment.csv"))
  enrichMat <- t(enrichMat)
  # consensus clustering
  distMat <- dist(enrichMat, method = "euclidean")
  clusterRes <- ConsensusClusterPlus(distMat,maxK=6,reps=50,pItem=0.8,pFeature=1,
                                 title=resDir,clusterAlg="hc",seed=1262118388.71279,plot="png")

  clusterNum <- clusterRes$deltaK$k[which.max(clusterRes$deltaK$deltaK)]
  clusterData <- data.frame(enrichMat)
  clusterData$clusterIDs <- clusterRes$default[[clusterNum]]$consensusClass
  clusterData <- clusterData[order(clusterData$clusterIDs), ]
  write.csv(clusterData, file=paste0(resDir, "/", fun2enrich, "_enrichmentCluster.csv"))
  # plot clusters
  mdsfit <- cmdscale(distMat, eig = TRUE, k = 2)
  MDSdim1 <- mdsfit$points[, 1]
  MDSdim2 <- mdsfit$points[, 2]
  pdf(paste0(resDir, "/", "clusterMDS.pdf"))
  plot(MDSdim1, MDSdim2, pch = 19, col=cbPalette[clusterData$clusterIDs], main="colored by clusterIDs")
  dev.off()
  # feature coeficient
  cluster.ids <- clusterRes$default[[clusterNum]]$consensusClass
  glmRes <- getFeatureCoefs (cluster.ids, enrichMat, clusterNum)
  if(length(glmRes$sigCoef) >0){
    rownames(glmRes$plotMat) <- colnames(enrichMat)
    colnames(glmRes$plotMat) <- 1:clusterNum
    pdf(paste0(resDir, "/", "featureCoefs.pdf"))
    pheatmap(glmRes$plotMat[which(rowSums(glmRes$plotMat)>0), ], cluster_rows = F, cluster_cols = F, display_numbers = T )
    dev.off()
    clustSig <- lapply(1:ncol(glmRes$plotMat), FUN=function(x) as.vector(rownames(glmRes$plotMat))[which(glmRes$plotMat[, x]>0)])
    clusterSigData <- lapply(clustSig, FUN=function(x)clusterData[, which(names(clusterData)%in%x)])
  } else{
    clusterSigData <- NULL
  }
 res <- list(clusterRes, glmRes, clusterSigData)
 names(res) <- c("clusterRes", "glmRes", "clusterSigData")
 return(res)
}
richabatra/FunRNA documentation built on May 4, 2019, 7:43 p.m.