R/runSnapAddPmat.R

Defines functions runSnapAddPmatSingle runSnapAddPmat.default runSnapAddPmat

Documented in runSnapAddPmat

#' Add cell-by-peak matrix to a snap file
#'
#' This function takes a peak list and a snap object as input 
#' and add cell-by-peak matrix into the existing snap file.
#' 
#' @param obj A snap obj.
#' @param peak A GenomicRanges object that contains peak coordinates.
#' @param path.to.snaptools Path to snaptools excutable file.
#' @param tmp.folder Directory to store temporary files generated by this program. 
#' @param num.cores Number of CPUs to use.
#' @param buffer.size Buffer size for incrementally increasing internal array size to store reads alignment information. In
#' most cases, you don't have to change this parameter. However, if there are very high coverage dataset that each barcode has
#' more than 10000 fragments, it's recommended to specify a smaller buffer size in order to decrease memory usage (but it will take longer time to read snap files) [1000].
#' 
#' @importFrom parallel mclapply
#' @export
runSnapAddPmat <- function(obj, peak, path.to.snaptools, tmp.folder, num.cores, buffer.size) {
  UseMethod("runSnapAddPmat", obj);
}

#' @export
runSnapAddPmat.default <- function(
	obj,
	peak, 
	path.to.snaptools,
	tmp.folder,
	num.cores=1,
	buffer.size=500
){
	cat("Epoch: checking input parameters ... \n", file = stderr())
	if(missing(obj)){
		stop("obj is missing");
	}else{
		if(!is(obj, "snap")){
			stop("obj is not a snap object");
		}
	}
	
	if(!is(peak, "GRanges")){
		stop("peak is not a GRanges object");
	}else{
		if((x = length(peak)) == 0L){
			stop("peak is empty")
		}
	}
	
	if(missing(path.to.snaptools)){
		stop("path.to.snaptools is missing");
	}else{
		if(!file.exists(path.to.snaptools)){
			stop("path.to.snaptools does not exist");
		}
		
		flag = tryCatch({
			file_test('-x', path.to.snaptools);	
		},
		error=function(cond){
			return(FALSE)
		})
		if(flag == FALSE){
			stop("path.to.snaptools is not an excutable file");
		}
		
	}

	if(missing(tmp.folder)){
		stop("tmp.folder is missing")
	}else{
		if(!dir.exists(tmp.folder)){
			stop("tmp.folder does not exist");			
		}
	}
	
	fileList = as.list(unique(obj@file));
	flag = mclapply(fileList, function(file){
		runSnapAddPmatSingle(
			file,
			peak=peak, 
			path.to.snaptools=path.to.snaptools,
			buffer.size=buffer.size,
			tmp.folder=tmp.folder
		)
	}, mc.cores=num.cores);
	
}

runSnapAddPmatSingle <- function(
	file,
	peak, 
	path.to.snaptools,
	tmp.folder,
	buffer.size=500
){
	cat("Epoch: checking input parameters ... \n", file = stderr())
	if(missing(file)){
		stop("file is missing");
	}else{
		if(!file.exists(file)){
			stop("file does not exist");
		}
		if(!isSnapFile(file)){
			stop("file is not a snap file");
		}
	}
	
	if(!is(peak, "GRanges")){
		stop("peak is not a GRanges object");
	}else{
		if((x = length(peak)) == 0L){
			stop("peak is empty")
		}
	}
	
	
	if(missing(path.to.snaptools)){
		stop("path.to.snaptools is missing");
	}else{
		if(!file.exists(path.to.snaptools)){
			stop("path.to.snaptools does not exist");
		}
		
		flag = tryCatch({
			file_test('-x', path.to.snaptools);	
		},
		error=function(cond){
			return(FALSE)
		})
		if(flag == FALSE){
			stop("path.to.snaptools is not an excutable file");
		}
		
	}

	if(missing(tmp.folder)){
		stop("tmp.folder is missing")
	}else{
		if(!dir.exists(tmp.folder)){
			stop("tmp.folder does not exist");			
		}
	}
	
	# write down the barcode info
	tmp.file.peak = tempfile(pattern = "run_snap_add_pmat", tmpdir = tmp.folder, fileext = ".bed")
	write.table(as.data.frame(peak)[,1:3], file = tmp.file.peak, append = FALSE, quote = FALSE, sep = "\t",
	                 eol = "\n", na = "NA", dec = ".", row.names = FALSE,
	                 col.names = FALSE, qmethod = c("escape", "double"),
	                 fileEncoding = "")
	
	cat("Epoch: adding cell-by-peak matrix into snap file ...\n", file = stderr())
	flag = system2(command=path.to.snaptools, 
		args=c("snap-add-pmat", 
			   "--snap-file", file, 
			   "--peak-file", tmp.file.peak, 
			   "--buffer-size", buffer.size,
			   "--tmp-folder", tmp.folder
			   )		
		);		

	if (flag != 0) {
	   	stop("'runSnapAddPmat' call failed");
	}
	return(flag);
}
r3fang/SnapATAC documentation built on March 29, 2022, 4:33 p.m.