R/beast-lsd-bootstrap.R

## function declaration

BEAST_LSD_Bootstrap <- function(t.exp=TRUE,N=1000,n.tips=50,n.bs=50,sigma.reps=seq(from=1,to=(N/n.tips)-0.5,by=1.5),
                          fasta.file="Data/1000.fasta",mu=0.0001,R0=2.4,Tg=50,bs=TRUE,sample=FALSE, oldestboolean=FALSE,
                          dir="C:/Users/Oliver/Google Drive/Academic Work/Imperial/O15-12/BEAST/Data Collection/LSDUniform/",
                          lsd.dir="C:\\Users\\Oliver\\Desktop\\lsd",network.path="\\\\fi--san02.dide.ic.ac.uk\\homes\\olw13\\Data\\"){
  
  ## HANDLE VARIABLES ##
  ##########################################################
  if(all(sigma.reps>=1)==FALSE) stop("sigma reps contains values less than 1")
  ptm <- proc.time()
  ## CREATE RESULTS STORAGE ##
  ##########################################################
  dir.create(path=dir)
  network <- paste(network.path,tail(strsplit(dir,"/")[[1]],1),"\\",sep="")
  dir.create(path=network)
  network.dir <- paste(network.path,tail(strsplit(dir,"/")[[1]],1),"\\","mu_",mu,"_R0_",R0,"_Tg_",Tg,"\\",sep="")
  dir.create(path=network.dir)
  param.dir <- paste(dir,"mu_",mu,"_R0_",R0,"_Tg_",Tg,"/",sep="")
  dir.create(path=param.dir)
  res <- matrix(nrow = n.bs, ncol = length(sigma.reps))
  m.res <- matrix(nrow = n.bs, ncol = length(sigma.reps))
  ## MAIN LOOP ##
  ##########################################################
  
  ## create progress bar
  pb <- winProgressBar(title="Bootstrap Loops", label="0% done", min=0, max=length(sigma.reps), initial=1)
  
    ## simulate tree
    if(t.exp==TRUE){
      case.tree <- Time_Sim(fasta.file,mu=mu,R0=R0,Tg=Tg)
      save(case.tree,file=paste(param.dir,"casetree.RData",sep=""))
    } else {
      case.tree <- Generation_Sim(fasta.file=,mu=mu,R0=R0,Tg=Tg)
      save(case.tree,file=paste(param.dir,"casetree.RData",sep=""))
    }
    
    ## create the full tree for labelling reasons
    ft <- Case_To_Phylo(case.tree, full=TRUE)
  
  
  ## set up parameter loops
  for (i in 1:length(sigma.reps)){
    
    ## create dirs for results
    sub.dir <- paste(param.dir,"sigma",sigma.reps[i],"/",sep="")
    dir.create(path=sub.dir)
    network.sub.dir <- paste(network.dir,"sigma",sigma.reps[i],"\\",sep="")
    dir.create(path=network.sub.dir)
    
    ## set up bootstrap loops
    Optimised_Tip_Samples(case.tree=case.tree,file=sub.dir,n.tips=n.tips,
                          sigma=sigma.reps[i],bs=bs,n.bs=n.bs,sample=sample,oldestboolean=oldestboolean)
    
    
    ## create necessary LSD files
    ## create related trees forcing them to be rooted
    for(j in 1:n.bs){
      nex <- ape::read.nexus.data(paste(sub.dir,"bs",j,".nex",sep=""))
      if(j==1){ 
        tree <- write.tree(ape::njs(dist.dna(as.DNAbin.list(nex),model = "raw")))
        write(paste(substr(tree, 1, nchar(tree)-1),";",sep=""),file=paste(sub.dir,"bs_tree.txt",sep=""))
        df <- matrix(nrow=N,ncol=2)
        df[1:N,1] <- c(ft$node.label,ft$tip.label)
        df[1:N,2] <- sapply(strsplit(c(ft$node.label,ft$tip.label), "_"), "[[", 3)
        write.table(df[,2],file=paste(sub.dir,"bs_date.date",sep=""),quote = FALSE,na = "",row.names = df[,1],col.names = N)
      } else {
        tree <- write.tree(ape::njs(dist.dna(as.DNAbin.list(nex),model = "raw")))
        write(paste(substr(tree, 1, nchar(tree)-1),";",sep=""),file=paste(sub.dir,"bs_tree.txt",sep=""),append=TRUE)
      }
    }
    
    ## write and execute LSD related script
    sh.sub.dir <- paste("\"",gsub(pattern = "/",replacement = "\\",fixed=TRUE,sub.dir),sep="")
    
    ##########################
    ## OLD HACK ASSUMING THAT LSD FAILS PERIODICALLY WHENEVER IT'S HAVING A MOMENT
    #if(i==1){
    #  cmd.list <- list(cmd = paste(lsd.dir," -i ",sh.sub.dir,"bs_tree.txt\" -d ",sh.sub.dir,"bs_date.date\" -r a -n ",50,sep=""))
    #  cmd <- paste(lsd.dir," -i ",sh.sub.dir,"bs_tree.txt\" -d ",sh.sub.dir,"bs_date.date\" -c -r a -n ",50,sep="")
    #} else {
    #  cmd <- paste(cmd,"\n",lsd.dir," -i ",sh.sub.dir,"bs_tree.txt\" -d ",sh.sub.dir,"bs_date.date\" -c -r a -n ",50,sep="")
    #  cmd.list$cmd[i] <- paste(lsd.dir," -i ",sh.sub.dir,"bs_tree.txt\" -d ",sh.sub.dir,"bs_date.date\" -r a -n ",50,sep="")
    #}
    ##########################

    cmd <- paste(lsd.dir," -i ",sh.sub.dir,"bs_tree.txt\" -d ",sh.sub.dir,"bs_date.date\" -c -r as -n ",n.bs,sep="")
    system(cmd)
    
    ## colate and plotbootstrap error against sigma
    lsd.tb <- read.table(file=paste(sub.dir,"bs_tree.txt.result",sep=""),skip = 20,nrows = n.bs)
    res[,i] <- lsd.tb[,6]
    m.res[,i] <- as.numeric(substr(lsd.tb[,4], 1, nchar(as.character(lsd.tb[,4]))-1))
    
    ## find most accurate sample set
    best.nex <- which((res[,i])^2==min((res[,i]^2)))
    if (length(best.nex)>1) best.nex <- best.nex[1]
    
    ## create BEAST xml in the parent data directory
    Nex_2_BEASTxml(template.xml = "C:/Users/Oliver/Google Drive/Academic Work/Imperial/git/sims4/Data/test.xml",
                   nex = paste(sub.dir,"bs",best.nex,".nex",sep=""),file = paste(network.sub.dir,"sigma",sigma.reps[i],".beast.xml",sep=""),
                   rep.str = paste("sigma",sigma.reps[i],sep=""),cont = t.exp)
    
    ## create necessary submit bat in the network share
    fileConn<-file(paste(network.sub.dir,"sigma",sigma.reps[i],".cluster.bat",sep=""))
    writeLines(paste("job submit /scheduler:fi--dideclusthn.dide.ic.ac.uk /stdout:",
                     network.sub.dir,"out",sigma.reps[i],".txt /stderr:",network.sub.dir,"err",sigma.reps[i],".txt ",
                     "/numcores:1-1 /jobtemplate:GeneralNodes ", paste(network.sub.dir,"sigma",sigma.reps[i],".beast.bat",sep=""),sep=""), 
               fileConn)
    close(fileConn)
    
    ## create necessary beast bat in the network share
    fileConn2<-file(paste(network.sub.dir,"sigma",sigma.reps[i],".beast.bat",sep=""))
    writeLines(paste("call setjava64 \n",
               "java -Djava.library.path=\"\\\\fi--san02.dide.ic.ac.uk\\homes\\olw13\\libhmsbeagle\" -jar", 
               " \"\\\\fi--san02.dide.ic.ac.uk\\homes\\olw13\\BEAST\\lib\\beast.jar\"",
               " -working -overwrite -beagle -beagle_CPU -beagle_SSE -beagle_instances 8 -threads 8 ",
               paste("\"",network.sub.dir,"sigma",sigma.reps[i],".beast.xml\"",sep=""),
               sep=""),fileConn2)
    close(fileConn2)
                   
    ## call cluster BEAST
    system(paste(network.sub.dir,"sigma",sigma.reps[i],".cluster.bat",sep=""))
    
    ## update progress bar
    info <- sprintf("%d%% done", round(((((i)/length(sigma.reps))*100))))
    setWinProgressBar(pb, round((((i)/length(sigma.reps)))), label=info)
    
    Sys.sleep(15)
  }
  
  ################################################################
  ## BIG UGLY HACK AS THE PROGRAM KEEPS TIMING OUT OR SOMETHING ##
  ################################################################
  #fileConn2<-file("C:/Users/Oliver/Desktop/lsd.bat")
  #writeLines(cmd, fileConn2)
  #close(fileConn2)
  #system("C:\\Users\\Oliver\\Desktop\\lsd.bat")
  
  #for (i in 1:length(sigma.reps)){
  #  sub.dir <- paste(param.dir,"sigma",sigma.reps[i],"/",sep="")
  #  info = file.info(paste(sub.dir,"bs_tree.txt.result",sep=""))
  #  if (info$size == 0 | is.na(info$size)){
  #    system(cmd.list$cmd[i])
  #  }
  #  lsd.tb <- read.table(file=paste(sub.dir,"bs_tree.txt.result",sep=""),skip = 20,nrows = n.bs)
  #  res[,i] <- lsd.tb[,6]
  #}
  ################################################################
    
  boxplot(res)
  return(res)
}
OJWatson/sims4 documentation built on May 7, 2019, 8:33 p.m.