R/run-capony-par.R

Defines functions canopy.sample.single.numi canopy.sample.parallel.mod

Documented in canopy.sample.parallel.mod

#' Run Canopy in parallel
#' @param sna_obj An output object from the make_sna_mat() function.
#' @param cna_obj An output object from the make_cna_mat() function.
#' @param Y An output object from the make_Y_mat() function.
#' @param projectname Name of the project to save results.
#' @param K Vector of the number of clones to be used.
#' @param numchain Number of chains for the MCMC. Default is 15.
#' @param max.simrun Maximum number of simulations to perform before convergence. Default is 100000.
#' @param min.simrun Minimum number of simulations to perform before convergence. Default is 10000.
#' @param burnin Burn-in period for the MCMC. Default is 10.
#' @param thin Thining parameter for the MCMC. Default is 5.
#' @param parallel Should this be run in parallel. Default is FALSE.
#' @return Y
#' @export
#' @examples
#' @import
#' dplyr
#' dtplyr
#' Canopy
#' foreach
#' doParallel
#' parallel

canopy.sample.parallel.mod = function(R, X, WM, Wm, epsilonM, epsilonm, C = NULL,
                                  Y, K, numchain, max.simrun, min.simrun, writeskip, projectname,
                                  cell.line = NULL, plot.likelihood = NULL,nCores = 1) {
  if (!is.matrix(R)) {
    stop("R should be a matrix!")
  }
  if (!is.matrix(X)) {
    stop("X should be a matrix!")
  }
  if (!is.matrix(WM)) {
    stop("WM should be a matrix!")
  }
  if (!is.matrix(Wm)) {
    stop("Wm should be a matrix!")
  }
  if (min(K) < 2) {
    stop("Smallest number of subclones should be >= 2!\n")
  }
  if (numchain < 2) {
    stop("Number of chains should be >= 2!\n")
  }
  if (is.null(cell.line)) {
    cell.line = FALSE
  }
  if (is.null(plot.likelihood)) {
    plot.likelihood = TRUE
  }
  if (is.null(C)) {
    C = diag(nrow(WM))
    colnames(C) = rownames(C) = rownames(WM)
  }
  if (any(colSums(C) != 1)) {
    stop("Matrix C should have one and only one 1 for each column!")
  }
  if ( plot.likelihood){
    pdf(file = paste(projectname, "_likelihood.pdf", sep = ""), width = 10, height = 5)
  }

  sampname = colnames(R)
  sna.name = rownames(R)
  cna.region.name = rownames(C)
  cna.name = colnames(C)
  sampchain = vector("list", length(K))

  ki = 1
  for (k in K) {
    cat("Sample in tree space with", k, "subclones\n")

    #begin parallel
    # nCores <- detectCores() - 1
    cl <- makeCluster(nCores)
    clusterExport(cl, list("k", "R", "X", "WM", "Wm", "epsilonM", "epsilonm", "C",
                           "Y",  "max.simrun", "min.simrun", "writeskip", "projectname",
                           "cell.line", "plot.likelihood",
                           "sampname", "sna.name", "cna.region.name", "cna.name",
                           "canopy.sample.single.numi"), envir = environment())
    clusterEvalQ(cl, library(Canopy))

    sampchainList = parLapply(cl, 1:numchain, function(numi) {
      sampchainOut = canopy.sample.single.numi(numi, k, R, X, WM, Wm, epsilonM, epsilonm, C,
                                               Y,  max.simrun, min.simrun, writeskip, projectname,
                                               cell.line, plot.likelihood,
                                               sampname, sna.name, cna.region.name, cna.name)
      return( sampchainOut )
    })

    stopCluster(cl)
    #end parallel

    sampchaink = lapply(sampchainList, function(x) x[["sampchaink"]])
    sampchaink.lik = lapply(sampchainList, function(x) x[["sampchaink.lik"]])
    sampchaink.accept.rate = lapply(sampchainList, function(x) x[["sampchaink.accept.rate"]])


    ###################################### plotting and saving #####
    if (plot.likelihood) {
      par(mfrow=c(1,2))
      xmax=ymin=ymax=rep(NA,numchain)
      for(i in 1:numchain){
        xmax[i]=max(which((!is.na(sampchaink.lik[[i]]))))
        ymin[i]=sampchaink.lik[[i]][1]
        ymax[i]=sampchaink.lik[[i]][xmax[i]]
      }

      plot(sampchaink.lik[[1]],xlim=c(1,max(xmax)),ylim=c(min(ymin),max(ymax)),
           xlab='Iteration',ylab='Log-likelihood',type='l',
           main=paste('Post. likelihood:',k,'branches'))
      for(numi in 2:numchain){
        points(sampchaink.lik[[numi]],xlim=c(1,max(xmax)),ylim=c(min(ymin),max(ymax)),col=numi,type='l')
      }

      plot(sampchaink.accept.rate[[1]],ylim=c(0,1),xlim=c(1,max(xmax)),
           xlab='Iteration',ylab='Acceptance rate',type='l',
           main=paste('Acceptance rate:',k,'branches'))
      for(numi in 2:numchain){
        points(sampchaink.accept.rate[[numi]],ylim=c(0,1),xlim=c(1,max(xmax)),col=numi,type='l')
      }
      par(mfrow=c(1,1))
    }
    sampchain[[ki]] = sampchaink

    ki = ki + 1
  }
  if(plot.likelihood) {
    dev.off()
  }
  return(sampchain)
}



canopy.sample.single.numi = function(numi, k, R, X, WM, Wm, epsilonM, epsilonm, C,
                                     Y, max.simrun, min.simrun, writeskip, projectname,
                                     cell.line, plot.likelihood,
                                     sampname, sna.name, cna.region.name, cna.name){

  # cat("\tRunning chain", numi, "out of", numchain, "...\n")
  ###################################### Tree initialization #####
  text = paste(paste(paste(paste("(", 1:(k - 1), ",", sep = ""),
                           collapse = ""), k, sep = ""), paste(rep(")", (k - 1)),
                                                               collapse = ""), ";", sep = "")
  runif.temp=runif(1)
  if(k == 5 & runif.temp<0.5){
    text = c('(1,((2,3),(4,5)));')
  }else if(k == 6 & runif.temp < 1/3){
    text = c('(1,((2,3),(4,(5,6))));')
  }else if(k == 6 & runif.temp > 2/3){
    text = c('(1,(2,((3,4),(5,6))));')
  }else if(k == 7 & runif.temp > 1/4 & runif.temp <= 2/4){
    text=c('(1,((2,3),(4,(5,(6,7)))));')
  }else if(k == 7 & runif.temp > 2/4 & runif.temp <= 3/4){
    text = c('(1,((2,3),((4,5),(6,7))));')
  }else if(k == 7 & runif.temp > 3/4){
    text = c('(1,((2,(3,4)),(5,(6,7))));')
  }
  tree <- read.tree(text = text)
  tree$sna = initialsna(tree, sna.name)
  # if(k>=5){tree$relation=getrelation(tree)}
  tree$Z = getZ(tree, sna.name)
  tree$P = initialP(tree, sampname, cell.line)
  tree$cna = initialcna(tree, cna.name)
  tree$cna.copy = initialcnacopy(tree)
  CMCm = getCMCm(tree, C)  # get major and minor copy per clone
  tree$CM = CMCm[[1]]
  tree$Cm = CMCm[[2]]  # major/minor copy per clone
  tree$Q = getQ(tree, Y, C)
  tree$H = tree$Q  # start as all SNAs that precede CNAs land
  # on major copies
  tree$VAF = getVAF(tree, Y)
  tree$likelihood = getlikelihood(tree, R, X, WM, Wm, epsilonM,
                                  epsilonm)
  ###################################### Sample in tree space #####
  sampi = 1
  writei = 1
  samptree = vector("list", max.simrun)
  samptree.lik=rep(NA, max.simrun)
  samptree.accept=rep(NA, max.simrun)
  samptree.accept.rate=rep(NA, max.simrun)
  while (sampi <= max.simrun) {  # sampi: MCMC iteration number
    ######### sample sna positions
    tree.new = tree
    tree.new$sna = sampsna(tree)
    tree.new$Z = getZ(tree.new, sna.name)
    tree.new$Q = getQ(tree.new, Y, C)
    tree.new$H = tree.new$Q
    tree.new$VAF = getVAF(tree.new, Y)
    tree.new$likelihood = getlikelihood(tree.new, R, X,
                                        WM, Wm, epsilonM, epsilonm)
    tree.temp=addsamptree(tree,tree.new)
    tree=tree.temp[[1]]
    samptree.accept[sampi]=tree.temp[[2]]
    if (sampi%%writeskip == 0) {
      samptree[[writei]] = tree
      writei = writei + 1
    }
    samptree.lik[sampi]=tree$likelihood
    samptree.accept.rate[sampi]=mean(samptree.accept[max(1,sampi-999):sampi])
    # if ((sampi >= 2*min.simrun) & (samptree.lik[sampi] <= mean(samptree.lik[max((sampi-1000),1):max((sampi-1),1)])) &
    #     (samptree.accept.rate[sampi] <= mean(samptree.accept.rate[max((sampi-1000),1):max((sampi-1),1)])) &
    #     (samptree.accept.rate[sampi] <= 0.1)) break
    sampi = sampi + 1
    ######## sample cna positions
    tree.new = tree
    tree.new$cna = sampcna(tree)
    CMCm = getCMCm(tree.new, C)
    tree.new$CM = CMCm[[1]]
    tree.new$Cm = CMCm[[2]]
    tree.new$Q = getQ(tree.new, Y, C)
    tree.new$H = tree.new$Q
    tree.new$VAF = getVAF(tree.new, Y)
    tree.new$likelihood = getlikelihood(tree.new, R, X,
                                        WM, Wm, epsilonM, epsilonm)
    tree.temp=addsamptree(tree,tree.new)
    tree=tree.temp[[1]]
    samptree.accept[sampi]=tree.temp[[2]]
    if (sampi%%writeskip == 0) {
      samptree[[writei]] = tree
      writei = writei + 1
    }
    samptree.lik[sampi]=tree$likelihood
    samptree.accept.rate[sampi]=mean(samptree.accept[max(1,sampi-999):sampi])
    # if ((sampi >= 2*min.simrun) & (samptree.lik[sampi] <= mean(samptree.lik[max((sampi-1000),1):max((sampi-1),1)])) &
    #     (samptree.accept.rate[sampi] <= mean(samptree.accept.rate[max((sampi-1000),1):max((sampi-1),1)])) &
    #     (samptree.accept.rate[sampi] <= 0.1)) break
    sampi = sampi + 1
    ######## sample P (clonal proportions)
    tree.new = tree
    tree.new$P = sampP(tree.new, cell.line)
    tree.new$VAF = getVAF(tree.new, Y)
    tree.new$likelihood = getlikelihood(tree.new, R, X,
                                        WM, Wm, epsilonM, epsilonm)
    tree.temp=addsamptree(tree,tree.new)
    tree=tree.temp[[1]]
    samptree.accept[sampi]=tree.temp[[2]]
    if (sampi%%writeskip == 0) {
      samptree[[writei]] = tree
      writei = writei + 1
    }
    samptree.lik[sampi]=tree$likelihood
    samptree.accept.rate[sampi]=mean(samptree.accept[max(1,sampi-999):sampi])
    # if ((sampi >= 2*min.simrun) & (samptree.lik[sampi] <= mean(samptree.lik[max((sampi-1000),1):max((sampi-1),1)])) &
    #     (samptree.accept.rate[sampi] <= mean(samptree.accept.rate[max((sampi-1000),1):max((sampi-1),1)])) &
    #     (samptree.accept.rate[sampi] <= 0.1)) break
    sampi = sampi + 1
    ######## sample major and minor copy number
    tree.new = tree
    tree.new$cna.copy = sampcnacopy(tree.new)
    CMCm = getCMCm(tree.new, C)
    tree.new$CM = CMCm[[1]]
    tree.new$Cm = CMCm[[2]]
    tree.new$VAF = getVAF(tree.new, Y)
    tree.new$likelihood = getlikelihood(tree.new, R, X,
                                        WM, Wm, epsilonM, epsilonm)
    tree.temp=addsamptree(tree,tree.new)
    tree=tree.temp[[1]]
    samptree.accept[sampi]=tree.temp[[2]]
    if (sampi%%writeskip == 0) {
      samptree[[writei]] = tree
      writei = writei + 1
    }
    samptree.lik[sampi]=tree$likelihood
    samptree.accept.rate[sampi]=mean(samptree.accept[max(1,sampi-999):sampi])
    # if ((sampi >= 2*min.simrun) & (samptree.lik[sampi] <= mean(samptree.lik[max((sampi-1000),1):max((sampi-1),1)])) &
    #     (samptree.accept.rate[sampi] <= mean(samptree.accept.rate[max((sampi-1000),1):max((sampi-1),1)])) &
    #     (samptree.accept.rate[sampi] <= 0.1)) break
    sampi = sampi + 1
    ######## sample whether SNA falls in major or minor allele
    if (any(tree$Q == 1)) {
      tree.new = tree
      q.temp = which(tree.new$Q == 1)
      q.temp.change = q.temp[sample.int(1, n = length(q.temp))]
      tree.new$H[q.temp.change] = 1 - tree.new$H[q.temp.change]
      tree.new$VAF = getVAF(tree.new, Y)
      tree.new$likelihood = getlikelihood(tree.new, R, X,
                                          WM, Wm, epsilonM, epsilonm)
      tree.temp=addsamptree(tree,tree.new)
      tree=tree.temp[[1]]
      samptree.accept[sampi]=tree.temp[[2]]
      if (sampi%%writeskip == 0) {
        samptree[[writei]] = tree
        writei = writei + 1
      }
      samptree.lik[sampi]=tree$likelihood
      samptree.accept.rate[sampi]=mean(samptree.accept[max(1,sampi-999):sampi])
      # if ((sampi >= 2*min.simrun) & (samptree.lik[sampi] <= mean(samptree.lik[max((sampi-1000),1):max((sampi-1),1)])) &
      #     (samptree.accept.rate[sampi] <= mean(samptree.accept.rate[max((sampi-1000),1):max((sampi-1),1)])) &
      #     (samptree.accept.rate[sampi] <= 0.1)) break
      sampi = sampi + 1
    }
  }
  sampchaink = samptree[1:(writei - 1)]
  sampchaink.lik = samptree.lik
  sampchaink.accept.rate = samptree.accept.rate

  return(list(sampchaink = sampchaink,
              sampchaink.lik = sampchaink.lik,
              sampchaink.accept.rate = sampchaink.accept.rate))
}
AxelitoMartin/TumorEvolution documentation built on Jan. 12, 2021, 2:43 a.m.