R/runeems_snps_setup.R

Defines functions eems_cleanup runeems_snps_setup

Documented in eems_cleanup runeems_snps_setup

#' @title runeems_snps_setup function
#' 
#' 
#' Wrapper function for preparing to run EEMS. A bash script is produced that will run EEMS. Optionally generates a '*.outer' file from the '*.coords' file.
#' 
#' @param x An object of class genind or vcfR, or a character string with path to a diffs or VCF file.
#' @param coords Character string with path to coordinates file, which has two columns with longitude and latitude coordinates (decimal degree format) of individuals in the genind object. Columns should be space or tab separated.
#' @param save.in Character string with path to directory where EEMS input and output files should be saved. This directory must not exist prior to running this function, but its parent directory must exist.
#' Files and directories generated by this function:
#'    /save.in/
#'    /save.in/data
#'    /save.in/data/data.coord
#'    /save.in/data/data.diffs
#'    /save.in/data/data.outer
#'    /save.in/mcmc    ### This will contain as many mcmc subdirectories as the value of nchains.
#'    /save.in/params  ### This will contain as many parameter files as the value of nchains.
#' @param outer Either NULL (the default) or a character string with path to the '*.outer' file, which has two columns with longitude and latitude coordinates (decimal degree format) defining the perimeter of a polygon covering the region to model with EEMS. See EEMS documentation.
#' If NULL, this function will generate a "*.outer" file automatically; the user will be prompted to accept the auto-generated *.outer file prior to running eems.
#' If you don't want to use the automatically generated *.outer file, you can create a custom one here: https://www.keene.edu/campus/maps/tool/
#' @param exe.path Character string with path to runeems_snps executable, or NULL (the default) if you've defined the EEMS path with the function config_miscwrappers. Use paths_misc.wrappers() to check if this path has been set.
#' @param n.sites The number of sites. Determined automatically if 'x' is a VCF file or vcfR or genind object, but must be supplied if 'x' is a path to a diffs matrix.
#' @param pl Ploidy. Default is 2.
#' @param nDemes Number of demes. Default is 300. This can be a single number or a numeric vector of length nchains with value to use for each chain.
#' @param numMCMCIter Number of MCMC iterations (i.e., chain length). Default is 10000000. This can be a single number or a numeric vector of length nchains.
#' @param numBurnIter Number of Burnin iterations (i.e., chain length). Default is 1000000. This can be a single number or a numeric vector of length nchains.
#' @param numThinIter Number of iterations to ignore before sampling the next MCMC iteration. Default is 9999. This can be a single number or a numeric vector of length nchains.
#' @param nchains Number of chains to run. Default is 3.
#' @param ... Arguments to pass to create.outer. See ?create.outer() for possible arguments.
#' @return Nothing is returned.
#' @export runeems_snps_setup
runeems_snps_setup <- function(x, coords, save.in, outer=NULL, exe.path=NULL, n.sites=NULL, pl=2, nDemes=300, numMCMCIter = 10000000, numBurnIter = 1000000, numThinIter = 9999, nchains=3, ...){
 # data.dirname <- paste0(sample(c(letters,LETTERS,0:9),size=10,replace=T),collapse="")
 # data.dirpath <- paste0(tempdir(),"/",data.dirname)
 # dir.create(data.dirpath)
	if(is.null(exe.path)){
		exe.path <- exepaths_miscwrappers("runeems_snps")
		if(is.na(exe.path)){
			stop("exe.path is NULL and runeems_snps has not been defined previously. Use config_miscwrappers function to define the full path to the runeems_snps executable")
		}
	}
	if(dir.exists(save.in)){
		command.to.remove.directory <- paste0("unlink('",save.in,"',recursive=T)")
		stop(paste("Output directory already exists. Change save.in or run '",command.to.remove.directory,"' to remove existing save.in"))
		# overwrite.key <- paste(sample(c(letters,LETTERS,0:9),size=20,replace=T),collapse="")
		# confirm.overwrite <- readline(prompt=paste0("Output directory already exists. Enter this code: ",overwrite.key," to overwrite save.in, or anything else to cancel: "))
		# if(confirm.overwrite!=overwrite.key){
		# 	stop()
		# }
	} else {
		dir.create(save.in)
	}
	### Check input arguments and stop if wrong format.
	if(length(nDemes)!=1 & length(nDemes)!= nchains){
		stop("nDemes should have length 1 or length equal to nchains")
	} else {
		if(length(nDemes)==1){
			nDemes = rep(nDemes,nchains)
		}
	}
	if(length(numMCMCIter)!=1 & length(numMCMCIter)!= nchains){
		stop("numMCMCIter should have length 1 or length equal to nchains")
	} else {
		if(length(numMCMCIter)==1){
			numMCMCIter = rep(numMCMCIter,nchains)
		}
	}
	if(length(numBurnIter)!=1 & length(numBurnIter)!= nchains){
		stop("numBurnIter should have length 1 or length equal to nchains")
	} else {
		if(length(numBurnIter)==1){
			numBurnIter = rep(numBurnIter,nchains)
		}
	}
	if(length(numThinIter)!=1 & length(numThinIter)!= nchains){
		stop("numThinIter should have length 1 or length equal to nchains")
	} else {
		if(length(numThinIter)==1){
			numThinIter = rep(numThinIter,nchains)
		}
	}
	### Set the additional arguments list.
	# additional.args <- list(...)
	### Set input.data equal to data argument
	input.data <- x
	### Create directories in save.in to hold a copy of the input (data) files, parameter files, and mcmc output
	# Create input directory containing copy of input files
	input.dirpath <- paste0(save.in,"/data")
	dir.create(input.dirpath)
	# Create output mcmc directory and subdirectories
	mcmcpath         <- paste0(save.in,"/mcmc")
	mcmcpath.subdirs <- paste0(mcmcpath,"/",paste0("chain",1:nchains))
	dir.create(mcmcpath)
	sapply(X=mcmcpath.subdirs,FUN=dir.create)
	# Create directory to hold params files
	params.path <- file.path(save.in,"params")
	dir.create(params.path)
	# Copy *.coord file to input.dirpath and rename as "data.coord"
#	system(paste("cp",coords, paste0(input.dirpath,"/data.coord")))
	# Load coordinates matrix and determine number of individuals 
	coords.df <- read.table(coords)
	if(!is.numeric(coords.df[1,1])){
		coords.df <- read.table(coords, header=F)
	}
	coords.df <- coords.df[,1:2]
	n.coords  <- nrow(coords.df)
	### Write coords.df to input.dirpath, without rownames or column names, and space-delineated
	write.table(x=coords.df,file=paste0(input.dirpath,"/data.coord"),col.names=F,row.names=F,quote=F,sep=" ")
	### Check if input.data is an object of class vcfR
	if(is(input.data,"vcfR")){
		n.indv <- (length(colnames(attributes(input.data)[[3]]))-1)
		if(n.coords!=n.indv){
			stop(paste(n.coords,"individuals with coordinates but",n.indv,"individuals with snps"))
		}
		genind     <- vcfR::vcfR2genind(input.data)
		input.data <- genind
	}
	### Check if input.data is an object of class genind
	if(is(input.data,"genind")){
		n.indv <- nrow(attributes(input.data)$tab)
		if(n.coords!=n.indv){
			stop(paste(n.coords,"individuals with coordinates but",n.indv,"individuals with snps"))
		}
		data.diffs <- genind2diffs(genind.obj=input.data,output.file=paste0(input.dirpath,"/data.diffs"))
		diffs      <- data.diffs[["diffs"]]
		nIndiv     <- data.diffs[["nIndiv"]]
		nSites     <- data.diffs[["nSites"]]
	} else {
		if(is(input.data,"character")){
			first.line <- readLines(input.data,n=1)
			if(grep("VCF",first.line)==1){
				vcf.data   <- vcfR::read.vcfR(input.data)
				gt.mat     <- vcf.data@gt[,-1]
				n.indv     <- ncol(gt.mat)
				#n.indv <- (length(colnames(attributes(vcf.data)[[3]]))-1)
				if(n.coords!=n.indv){
					stop(paste(n.coords,"individuals with coordinates but",n.indv,"individuals with snps"))
				}
				genind     <- vcfR::vcfR2genind(vcf.data)
				data.diffs <- genind2diffs(genind.obj=genind,ploidy=as.numeric(pl),output.file=paste0(input.dirpath,"/data.diffs"))
				if(!file.exists(paste0(input.dirpath,"/data.diffs"))){
					stop("data.diffs does not exist")
				}
				diffs      <- data.diffs[["diffs"]]
				nIndiv     <- data.diffs[["nIndiv"]]
				nSites     <- data.diffs[["nSites"]]
			} else {
				data.diffs <- read.table(input.data,sep=" ",header=F)
				n.indv     <- ncol(data.diffs)
				if(n.coords!=n.indv){
					stop(paste(n.coords,"individuals with coordinates but",n.indv,"individuals in diffs matrix"))
				}
				### Check that the diffs file has the correct dimmensions
				if(ncol(data.diffs)==nrow(data.diffs)){
					nIndiv     <- ncol(data.diffs)
					if(is.null(n.sites)){
						stop("n.sites must be supplied when input.data is a diffs file")
					} else {
						nSites     <- as.numeric(n.sites)
					}
					system(paste("cp",input.data,paste0(input.dirpath,"/data.diffs")))
				} else {
					stop("input.data path must be VCF or diffs file")
				}
			}
		}
	}
	# If path to the outer file is NULL, generate one automatically, otherwise copy it to input.dirpath and rename as "data.outer"
	if(is.null(outer)){
		data_outer <- create.outer(coords=coords.df, output.path=paste0(input.dirpath,"/data.outer"), plot.output.path=paste0(save.in,"/habitat_outer.pdf"),...)
	} else {
		system(paste0("cp '",outer,"' '",file.path(input.dirpath,"data.outer"),"'"))
	}
	### Generate diffs file, saving to input.dirpath with name "data.diffs"
	# data.diffs <- genind2diffs(genind.obj=genind,output.file=paste0(input.dirpath,"/data.diffs"))
	# diffs      <- data.diffs["diffs"]
	# nIndiv     <- c(data.diffs["nIndiv"])
	# nSites     <- data.diffs["nSites"]
	params.files.path <- file.path(params.path,paste0("params-chain",1:nchains,".ini"))
	for(i in 1:nchains){
		L1 <- paste0("datapath = ",paste0(input.dirpath,"/data"))
		L2 <- paste0("mcmcpath = ",mcmcpath.subdirs[i])
		L3 <- paste0("nIndiv = ",as.integer(nIndiv))
		L4 <- paste0("nSites = ",as.integer(nSites))
		L5 <- paste0("nDemes = ",as.integer(nDemes[i]))
		L6 <- paste0("diploid = ",TRUE)
		L7 <- paste0("numMCMCIter = ",as.integer(numMCMCIter[i]))
		L8 <- paste0("numBurnIter = ",as.integer(numBurnIter[i]))
		L9 <- paste0("numThinIter = ",as.integer(numThinIter[i]))
		param.lines <- c(L1,L2,L3,L4,L5,L6,L7,L8,L9)
		writeLines(param.lines,params.files.path[i])
		### Write a separate bash script for each chain.
		command.write.i <- c("#!/bin/bash",paste(exe.path,"--params",params.files.path[i]))
		writeLines(command.write.i,paste0(save.in,"/runeems_snps_chain",i,".sh"))
	}
	# command.write <- c("#!/bin/bash",paste(exe.path,"--params",params.files.path))
	# writeLines(command.write,paste0(save.in,"/runeems_snps.sh"))
	# if(!setup.only){
	#	print(paste0("Analysis setup complete. To begin, run bash scripts: '",save.in,"/runeems_snps_chain",1:nchains,".sh'"))
	#	#command.exe   <- gsub(" & $","",paste(command.write,collapse=" & "))
	#	#system(command.exe)
	#} else {
	#	print(paste0("Analysis setup complete. To begin, run bash scripts: '",save.in,"/runeems_snps_chain",1:nchains,".sh'"))
	#}
	#print(paste0("Setup complete. To run EEMs, run bash scripts: '",paste(paste0(save.in,"/runeems_snps_chain",1:nchains,".sh'")), collapse=","))
	#return(paste0(save.in,"/runeems_snps.sh"))
	result <- list.files(save.in,pattern="^runeems_snps.+.sh$",full.names=T)
	# make bash scripts executable
	system(paste("chmod u+x",result))
	# list with locations of bash scripts
	textmessage <- list(result)
	names(textmessage) <- "Setup complete. To run EEMs, run these bash scripts:"
	print(textmessage)
	return(result)
}
#' @examples
#' library(misc.wrappers)
#' 
#' # Path to VCF with SNPs
#' vcf.path    <- file.path(system.file("extdata", package = "misc.wrappers"), "simK4.vcf.gz")
#' # Path to file with longitude and latitude of sampling locality of each individual
#' coords.path <- file.path(system.file("extdata", package = "misc.wrappers"), "simK4_coords.txt")
#' # Where to save output
#' save.path   <- file.path(system.file("examples", package = "misc.wrappers"),"simK4")
#' # Setup environment and input files for runeems_snps
#' eems.setup  <- runeems_snps_setup(x=vcf.path, coords=coords.path, save.in=save.path, numMCMCIter = 100000, numBurnIter = 10000, numThinIter = 999)

#' @title cleanup eems
#' 
#' Deletes eems_setup bash and R scripts in a directory. Run this after all eems jobs are completed. Does not delete the "jobX.sh" bash files that run eems.
#' 
#' @param dir Character string with path to directory containing eems .sh amd .R scripts
#' @return NULL
#' @export eems_cleanup
eems_cleanup <- function(dir){
	sh.files <- list.files(dir,pattern="^[a-z]{20}.sh$",full.name=TRUE)
	R.files  <- gsub(".sh$",".R",sh.files)
	file.remove(c(sh.files,R.files))
}
JeffWeinell/misc.wrappers documentation built on Sept. 20, 2023, 12:42 p.m.