R/beast-R0-full.R

#' BEAST_R0_Full
#' 
#' Main simulation function to investigate the effects of sampling strategy against TMRCA/Clock Rate Bias. As opposed
#' to \code(BEAST_R0_Check), this function considers all sampling schemes.
#' 
#' \code(BEAST_R0_Full) proceeds by simulating a tree in a local directory, sampling from it and placing the nexus samples
#' within the local directory, before then creating the BEAST2.3xml file in a parallel network directory, and 
#' then running the BEAST analysis using the batch collation file.
#' 
#' Highly prescriptive function declaration, which considers all the simulation parameters needed, the range of
#' sigma parameters, the BEAST template required, directories etc. 
#'
#' SIMULATION PARAMETERS
#' @param t.cont - boolean if generation times are desribed by a gamma distribution (TRUE) or poisso (FALSE). Default=TRUE
#' @param withinhost - boolean if the same sequence (FALSE) or time explcitly (TRUE) evolved sequences are inherited. Default=TRUE
#' @param fixed.offspring - boolean if number of offspring is always equal to R0. Default=FALSE
#' @param fixed.generations - boolean if generation times are always equal to Tg. Default=FALSE
#' @param outgroup - boolean if outgroup evolved over time=root-to-tip time is included. Default=FALSE
#' @param prob - boolean if simulation model is time explicit, or determined by probabilities of mutations determined by sequences. Default=FALSE
#' @param N - Population Size. Default = 1000
#' @param size.reps - Vector of offspring distribution parameters. Default=c(0,10,1,0.1)
#' @param mu - Clock Rate. Default=1e-4
#' @param R0 - Basic reproduction number. Default=2
#' @param Tg - Generation Time. Default=2.6
#' @param deSilvaTest - boolean if time and sequence evolution is handled as the methodology in the deSilva 12 paper.   
#' @param fasta.file - provided fasta file sequence. If NULL (Default), random sequence of length 1000 is generated.
#' SAMPLING/REPETITION PARAMETERS
#' @param sample.reps - number of tree subsamples under each scheme. Default = 10
#' @param tree.reps - number of unique tree simulations conducted under each scheme. Default = 20
#' @param sigma.reps - variables total. Default = c(1,5,9,13,18)
#' @param n.tips - number of sequences to be sampled. Default = 50
#' @param oldestboolean - boolean if oldest sequence not root is included. Default=TRUE
#' @param template.xml - path for template xml. Default is a strict molecular clock, exponential coalescent, HKY85+G4 xml 
#' found within the data folder called test.xml
#' @param dir - Local Directory where simulations tree and nexus files are stored. Defaults specific to author. 
#' @param network.path - Network Directory which a cluster can see to access the BEAST xml files. Defaults specific to author.
#' 
#' @return Returns a data frame of collated statstics 
#' 
#' 
#' @export
#' @rdname BEAST_R0_Full
#' 
#'

BEAST_R0_Full <- function(t.cont=TRUE,withinhost=TRUE,fixed.offspring=TRUE,fixed.generations=FALSE,outgroup=FALSE,prob=TRUE,deSilvaTest=FALSE,
                          N=1000,n.tips=50,sample.reps=10,tree.reps=20, size.reps=c(0,10,1,0.1),relaxed=FALSE, sigma.reps=c(1,5,9,13,18),
                          fasta.file=NULL,mu=0.0001,R0=2,Tg=50,bs=TRUE,random=FALSE, oldestboolean=FALSE,
                          template.xml = "C:/Users/Oliver/Google Drive/Academic Work/Imperial/git/sims4/Data/test.xml",
                          dir="C:/Users/Oliver/Google Drive/Academic Work/Imperial/O15-12/BEAST/Data Collection/VarianceCheckGenerational/",
                          network.path="\\\\fi--san02.dide.ic.ac.uk\\homes\\olw13\\Data\\"){
  
  ## HANDLE VARIABLES ##
  ##########################################################
  ptm <- proc.time()
  
  ## CREATE RESULTS STORAGE ##
  ##########################################################
  dir.create(path=dir)
  network <- paste(network.path,tail(strsplit(dir,"/")[[1]],1),"\\",sep="")
  dir.create(path=network)
  network.dir <- paste(network.path,tail(strsplit(dir,"/")[[1]],1),"\\","mu_",mu,"_R0_",R0,"_Tg_",Tg,"\\",sep="")
  dir.create(path=network.dir)
  param.dir <- paste(dir,"mu_",mu,"_R0_",R0,"_Tg_",Tg,"/",sep="")
  dir.create(path=param.dir)
  
  ## MAIN LOOP ##
  ##########################################################
  
  ## set up for loop for changes in variation
  
  for (a in 1:length(size.reps)){
    
    ## create progress bar
    pb <- winProgressBar(title=paste(a,": Tree Reps",sep=""), label="0% done", min=0, max=tree.reps, initial=0)
    
    
    size <- size.reps[a]
    if(a==1){
      var.dir <- paste(param.dir,"Size",size,"/",sep="")  
      network.var.dir <- paste(network.dir,"Size",size,"\\",sep="")
    } else {
      var.dir <- paste(param.dir,"Size",size,"/",sep="")  
      network.var.dir <- paste(network.dir,"Size",size,"\\",sep="")
    }
    
    
    dir.create(var.dir)
    dir.create(network.var.dir)
    
    # R0 broken, size = 10, size = 0.1
    
    for (b in 1:tree.reps){
      
      sub.var.dir <- paste(var.dir,"rep",b,"/",sep="")
      network.sub.var.dir <- paste(network.var.dir,"rep",b,"\\",sep="")
      dir.create(sub.var.dir)
      dir.create(network.sub.var.dir)
      
      ## for loop for repetitions
      # simulate 10 trees
      
      
      ## simulate tree
      if(a==1){
        if(prob==TRUE){
          case.tree <- Probability_Sim(fasta.file,mu=mu,R0=R0,Tg=Tg,fixed.offspring=TRUE,fixed.generations=fixed.generations,
                                       t.cont=t.cont,outgroup=outgroup,size=size)
        } else {
          case.tree <- Time_Sim(fasta.file,mu=mu,R0=R0,Tg=Tg,withinhost=withinhost,fixed.offspring=TRUE,fixed.generations=fixed.generations,
                                t.cont=t.cont,outgroup=outgroup,size=size,deSilvaTest=deSilvaTest)
        }
      } else {
        if(prob==TRUE){
          case.tree <- Probability_Sim(fasta.file,mu=mu,R0=R0,Tg=Tg,fixed.offspring=fixed.offspring,fixed.generations=fixed.generations,
                                       t.cont=t.cont,outgroup=outgroup,size=size)
        } else {
          case.tree <- Time_Sim(fasta.file,mu=mu,R0=R0,Tg=Tg,withinhost=withinhost,fixed.offspring=fixed.offspring,fixed.generations=fixed.generations,
                                t.cont=t.cont,outgroup=outgroup,deSilvaTest=deSilvaTest,size=size)
        }
      }
      save(case.tree,file=paste(sub.var.dir,"casetree.RData",sep=""))
      
      ###############################################
      ## UNIFORM      
      ###############################################
      # sample sample.rep times in a uniform fashion
      
      
      Optimised_Tip_Samples(case.tree=case.tree,file=sub.var.dir,n.tips=n.tips,n.bs = sample.reps,outgroup=outgroup,
                            sigma=1,bs=bs,random=FALSE,oldestboolean=oldestboolean,bs.label="bs",t.cont=t.cont)
      
      
      ## create beast xml files
      
      for(j in 1:sample.reps){
        
        ## create BEAST xml in the parent data directory
        Nex_2_BEASTxml(template.xml = template.xml,
                       nex = paste(sub.var.dir,"bs",j,".nex",sep=""),file = paste(network.sub.var.dir,"bs",j,".beast.xml",sep=""),
                       rep.str = paste("bs",j,sep=""),cont = t.cont)
      }
      
      ## create necessary submit bat in the network share
      fileConn<-file(paste(network.sub.var.dir,"treerep",b,".uniform.cluster.bat",sep=""))
      writeLines(paste("job submit /scheduler:fi--dideclusthn.dide.ic.ac.uk /stdout:",
                       network.sub.var.dir,"uniform.out",b,".txt /stderr:",network.sub.var.dir,"uniform.err",b,".txt ",
                       "/numcores:1-1 /jobtemplate:GeneralNodes ", paste(network.sub.var.dir,"treerep",b,".uniform.beast.bat",sep=""),sep=""), 
                 fileConn)
      close(fileConn)
      
      ## create necessary beast bat in the network share
      fileConn2<-file(paste(network.sub.var.dir,"treerep",b,".uniform.beast.bat",sep=""))
      writeLines(paste("call setjava64 \n",
                       "FOR /L %%i IN (1 1 ",sample.reps,") do ( \n",
                       "java -Djava.library.path=\"\\\\fi--san02.dide.ic.ac.uk\\homes\\olw13\\libhmsbeagle\" -jar", 
                       " \"\\\\fi--san02.dide.ic.ac.uk\\homes\\olw13\\BEAST\\lib\\beast.jar\"",
                       " -working -overwrite -beagle -beagle_CPU -beagle_SSE -beagle_instances 8 -threads 8 ",
                       paste("\"",network.sub.var.dir,"bs%%i%.beast.xml\"\n",
                             ")",sep=""),
                       sep=""),fileConn2)
      close(fileConn2)
      
      ## call cluster BEAST
      system(paste(network.sub.var.dir,"treerep",b,".uniform.cluster.bat",sep=""))
      
      ##################################
      ## OLDEST
      ##################################
      # sample sample.rep times with sigma = 19.5
      
      
      
      bs.label <- paste("dsbs",sigma.reps[sl],"_",sep="")
      Optimised_Tip_Samples(case.tree=case.tree,file=sub.var.dir,n.tips=n.tips,n.bs = sample.reps,outgroup=outgroup, discretised=TRUE,
                            sigma=19.5,bs=bs,random=TRUE,oldestboolean=oldestboolean,bs.label=bs.label,t.cont=t.cont,proportional=FALSE)
      
      
      ## create beast xml files
      
      for(j in 1:sample.reps){
        
        ## create BEAST xml in the parent data directory
        Nex_2_BEASTxml(template.xml = template.xml,
                       nex = paste(sub.var.dir,bs.label,j,".nex",sep=""),file = paste(network.sub.var.dir,bs.label,j,".beast.xml",sep=""),
                       rep.str = paste(bs.label,j,sep=""),cont = t.cont)
      }
      
      ## create necessary submit bat in the network share
      fileConn<-file(paste(network.sub.var.dir,"treerep",b,bs.label,".cluster.bat",sep=""))
      writeLines(paste("job submit /scheduler:fi--dideclusthn.dide.ic.ac.uk /stdout:",
                       network.sub.var.dir,bs.label,".out",b,".txt /stderr:",network.sub.var.dir,bs.label,".err",b,".txt ",
                       "/numcores:1-1 /jobtemplate:GeneralNodes ", paste(network.sub.var.dir,"treerep",b,bs.label,".beast.bat",sep=""),sep=""), 
                 fileConn)
      close(fileConn)
      
      ## create necessary beast bat in the network share
      fileConn2<-file(paste(network.sub.var.dir,"treerep",b,bs.label,".beast.bat",sep=""))
      writeLines(paste("call setjava64 \n",
                       "FOR /L %%i IN (1 1 ",sample.reps,") do ( \n",
                       "java -Djava.library.path=\"\\\\fi--san02.dide.ic.ac.uk\\homes\\olw13\\libhmsbeagle\" -jar", 
                       " \"\\\\fi--san02.dide.ic.ac.uk\\homes\\olw13\\BEAST\\lib\\beast.jar\"",
                       " -working -overwrite -beagle -beagle_CPU -beagle_SSE -beagle_instances 8 -threads 8 ",
                       paste("\"",network.sub.var.dir,bs.label,"%%i%.beast.xml\"\n",
                             ")",sep=""),
                       sep=""),fileConn2)
      close(fileConn2)
      
      ## call cluster BEAST
      system(paste(network.sub.var.dir,"treerep",b,bs.label,".cluster.bat",sep=""))
      
      ##################################
      ## PROPORTIONAL
      ##################################
      # sample sample.rep times in a discretised fashion
      
      for(psl in 1:length(sigma.reps)){      
        
        bs.label <- paste("psbs",sigma.reps[psl],"_",sep="")
        Optimised_Tip_Samples(case.tree=case.tree,file=sub.var.dir,n.tips=n.tips,n.bs = sample.reps,outgroup=outgroup, discretised=FALSE,
                              sigma=sigma.reps[psl],bs=bs,random=TRUE,oldestboolean=oldestboolean,bs.label=bs.label,t.cont=t.cont,proportional=TRUE)
        
        
        ## create beast xml files
        
        for(j in 1:sample.reps){
          
          ## create BEAST xml in the parent data directory
          Nex_2_BEASTxml(template.xml = template.xml,
                         nex = paste(sub.var.dir,bs.label,j,".nex",sep=""),file = paste(network.sub.var.dir,bs.label,j,".beast.xml",sep=""),
                         rep.str = paste(bs.label,j,sep=""),cont = t.cont)
        }
        
        ## create necessary submit bat in the network share
        fileConn<-file(paste(network.sub.var.dir,"treerep",b,bs.label,".cluster.bat",sep=""))
        writeLines(paste("job submit /scheduler:fi--dideclusthn.dide.ic.ac.uk /stdout:",
                         network.sub.var.dir,bs.label,".out",b,".txt /stderr:",network.sub.var.dir,bs.label,".err",b,".txt ",
                         "/numcores:1-1 /jobtemplate:GeneralNodes ", paste(network.sub.var.dir,"treerep",b,bs.label,".beast.bat",sep=""),sep=""), 
                   fileConn)
        close(fileConn)
        
        ## create necessary beast bat in the network share
        fileConn2<-file(paste(network.sub.var.dir,"treerep",b,bs.label,".beast.bat",sep=""))
        writeLines(paste("call setjava64 \n",
                         "FOR /L %%i IN (1 1 ",sample.reps,") do ( \n",
                         "java -Djava.library.path=\"\\\\fi--san02.dide.ic.ac.uk\\homes\\olw13\\libhmsbeagle\" -jar", 
                         " \"\\\\fi--san02.dide.ic.ac.uk\\homes\\olw13\\BEAST\\lib\\beast.jar\"",
                         " -working -overwrite -beagle -beagle_CPU -beagle_SSE -beagle_instances 8 -threads 8 ",
                         paste("\"",network.sub.var.dir,bs.label,"%%i%.beast.xml\"\n",
                               ")",sep=""),
                         sep=""),fileConn2)
        close(fileConn2)
        
        ## call cluster BEAST
        system(paste(network.sub.var.dir,"treerep",b,bs.label,".cluster.bat",sep=""))
        
      }
      ##################################        
      ## DISCRETISED
      ##################################      
      
      # sample sample.rep times in a discfretised fashion
      
      
      for(sl in 1:length(sigma.reps)){      
        
        bs.label <- paste("dbs",sigma.reps[sl],"_",sep="")
        Optimised_Tip_Samples(case.tree=case.tree,file=sub.var.dir,n.tips=n.tips,n.bs = sample.reps,outgroup=outgroup, discretised=TRUE,
                              sigma=sigma.reps[sl],bs=bs,random=TRUE,oldestboolean=oldestboolean,bs.label=bs.label,t.cont=t.cont,proportional=FALSE)
        
        
        ## create beast xml files
        
        for(j in 1:sample.reps){
          
          ## create BEAST xml in the parent data directory
          Nex_2_BEASTxml(template.xml = template.xml,
                         nex = paste(sub.var.dir,bs.label,j,".nex",sep=""),file = paste(network.sub.var.dir,bs.label,j,".beast.xml",sep=""),
                         rep.str = paste(bs.label,j,sep=""),cont = t.cont)
        }
        
        ## create necessary submit bat in the network share
        fileConn<-file(paste(network.sub.var.dir,"treerep",b,bs.label,".cluster.bat",sep=""))
        writeLines(paste("job submit /scheduler:fi--dideclusthn.dide.ic.ac.uk /stdout:",
                         network.sub.var.dir,bs.label,".out",b,".txt /stderr:",network.sub.var.dir,bs.label,".err",b,".txt ",
                         "/numcores:1-1 /jobtemplate:GeneralNodes ", paste(network.sub.var.dir,"treerep",b,bs.label,".beast.bat",sep=""),sep=""), 
                   fileConn)
        close(fileConn)
        
        ## create necessary beast bat in the network share
        fileConn2<-file(paste(network.sub.var.dir,"treerep",b,bs.label,".beast.bat",sep=""))
        writeLines(paste("call setjava64 \n",
                         "FOR /L %%i IN (1 1 ",sample.reps,") do ( \n",
                         "java -Djava.library.path=\"\\\\fi--san02.dide.ic.ac.uk\\homes\\olw13\\libhmsbeagle\" -jar", 
                         " \"\\\\fi--san02.dide.ic.ac.uk\\homes\\olw13\\BEAST\\lib\\beast.jar\"",
                         " -working -overwrite -beagle -beagle_CPU -beagle_SSE -beagle_instances 8 -threads 8 ",
                         paste("\"",network.sub.var.dir,bs.label,"%%i%.beast.xml\"\n",
                               ")",sep=""),
                         sep=""),fileConn2)
        close(fileConn2)
        
        ## call cluster BEAST
        system(paste(network.sub.var.dir,"treerep",b,bs.label,".cluster.bat",sep=""))
        
      }
      
      ###############################################
      ## update progress bar
      info <- sprintf("%d%% done", round(((((b)/tree.reps)*100))))
      setWinProgressBar(pb, round((((b)/tree.reps)*tree.reps)), label=info)
      
      
    }
    
  }
  
  ## print times
  close(pb)
  print(paste(tree.reps,"trees analysed, concerning ",sample.reps," samples",((proc.time() - ptm)[3]),"secs"))
}
OJWatson/sims4 documentation built on May 7, 2019, 8:33 p.m.