R/snap-utilities.R

Defines functions createSnapSingle addPmatToSnapSingle addGmatToSnapSingle addBmatToSnapSingle readPeaks readBins isSnapFile.default isSnapFile filterBins.default filterBins extractReads extractReadsFromOneCell filterCells.default filterCells snapRbind calBmatCor.default calBmatCor exportMetaData.default exportMetaData readMetaData.default readMetaData createSnapFromGmat.default createSnapFromGmat createSnapFromPmat.default createSnapFromPmat createSnapFromBmat.default createSnapFromBmat rmGmatFromSnap.default rmGmatFromSnap rmPmatFromSnap.default rmPmatFromSnap rmBmatFromSnap.default rmBmatFromSnap addGmatToSnap.default addGmatToSnap addPmatToSnap.default addPmatToSnap addBmatToSnap.default addBmatToSnap createSnap.default createSnap barcodeInSnapFile.default barcodeInSnapFile showBinSizes.default showBinSizes

Documented in addBmatToSnap addGmatToSnap addPmatToSnap barcodeInSnapFile calBmatCor createSnap createSnapFromBmat createSnapFromGmat createSnapFromPmat exportMetaData extractReads filterBins filterCells isSnapFile readMetaData rmBmatFromSnap rmGmatFromSnap rmPmatFromSnap showBinSizes snapRbind

#' Create an empty snap object
#'
#' This function creates an empty snap object
#'
#' @examples
#' x.sp = newSnap();
#' 
#' @return An empty snap object
#'
#' @importFrom methods new
#' @importFrom GenomicRanges GRanges
#' @export
#' 
newSnap <- function () {
	metaData=data.frame();
	des = character()
	file = as.character(c());
	sample = as.character(c());
	barcode = as.character(c());
	feature = GRanges();
	peak = GRanges();		
	metaData = data.frame();
	bmat=Matrix(nrow=0, ncol=0, sparse=TRUE);		
	pmat=Matrix(nrow=0, ncol=0, sparse=TRUE);
	gmat=Matrix(nrow=0, ncol=0, sparse=TRUE);
	mmat=matrix(0,0,0);
	jmat=newJaccard();
	smat=newDimReduct();	
	graph=newKgraph();
	regModel=c();
	tsne=matrix(nrow=0, ncol=0);	
	umap=matrix(nrow=0, ncol=0);	
	cluster=factor();
	res = new("snap", 
			  des=des,
			  file=file,
			  sample=sample,
			  barcode=barcode, 
			  feature=feature, 
			  peak=peak, 
			  metaData=metaData, 
			  bmat=bmat, 
			  pmat=pmat, 
			  gmat=gmat,
			  mmat=mmat, 
			  jmat=jmat, 
			  smat=smat, 
			  graph=graph, 
			  tsne=tsne, 
			  umap=umap, 
			  cluster=cluster
			  );	
}

#' Show bin sizes in a snap file
#'
#' This function takes a snap-format file name as input and check the bin 
#' sizes or resolutions have been generated for count matrix
#'
#' @param file character. input snap-format file name
#'
#' @examples
#' file.name = system.file("extdata", "demo.snap", package = "SnapATAC");
#' showBinSizes(file.name);
#' 
#' @return integer vector. A vector of integers indicating the bin sizes
#'
#' @export
#'
showBinSizes <- function(file) {
  UseMethod("showBinSizes", file);
}

#' @export
showBinSizes.default <- function(file){
	# this is to check what are the binSizes have been generated
	if(!file.exists(file)){stop("file does not exist!")}
	if(!isSnapFile(file)){stop("input file is not a snap file\n")}		
	binSizeList = tryCatch(binSizeList <- h5read(file, '/AM/binSizeList'), error = function(e) {print(paste("Warning @check.bin.size: '/AM/binSizeList' not found in ", file)); return(numeric())})
	return(binSizeList);
}

#' summarySnap for snap object.
#'
#' This function takes a snap object and returns the summary statistics
#' @name summarySnap
#' @param obj snap; a snap object
#' @rdname summarySnap-methods
#' @importFrom stats median
#' @exportMethod summarySnap
#' @examples
#' file.name = system.file("extdata", "demo.snap", package = "SnapATAC");
#' x.sp = createSnap(file.name, sample="demo");
#' summarySnap(x.sp);
setGeneric("summarySnap", function(obj) standardGeneric("summarySnap"))

#' @rdname summarySnap-methods
#' @aliases summarySnap,snap-method
setMethod("summarySnap", "snap", function(obj){
	if(nrow(obj@metaData) == 0){stop("metaData is empty")}
	barcode = obj@metaData;
	message("Total  number of barcodes: ", length(obj@barcode));	
	message("Median number of sequencing fragments: ", median(barcode$TN));
	message("Median number of uniquely mapped fragments: ", median(barcode$UQ));
	message("Median number of mappability ratio: ", round(median((barcode$UM+1)/(barcode$TN+1)),2));
	message("Median number of properly paired ratio: ", round(median((barcode$PP+1)/(barcode$UM+1)),2));
	message("Median number of duplicate ratio: ", round(median(1 - (barcode$UQ+1)/(barcode$PP+1)),2));
	message("Median number of chrM ratio: ", round(median((barcode$CM+1) / (barcode$UQ+1)),2));
	message("Median number of unique molecules (UMI): ", median(barcode$UQ));
});


#' nrow for snap object.
#'
#' This function takes a snap object and returns number of cells
#' @name nrow
#' @param x snap; a snap object
#' @examples
#' data(demo.sp);
#' nrow(demo.sp);
#' @rdname nrow-methods
#' @aliases nrow,snap-method
#' @exportMethod nrow
setMethod("nrow", "snap", function(x) length(x@barcode));

#' colSums for snap objects
#'
#' This function takes a snap object and returns the column sums of its count matrix.
#' @name colSums
#' @param x A snap object
#' @param mat A charater object indicates what matrix slot to use c("bmat", "pmat", "gmat")
#' @param na.rm A logical variable indicates wether to remove NA in the matrix
#' @examples
#' data(demo.sp);
#' a = colSums(demo.sp, mat="bmat");
#' b = colSums(demo.sp, mat="pmat");
#' d = colSums(demo.sp, mat="gmat");
#' @rdname colSums-methods
#' @aliases colSums,snap-method
#' @exportMethod colSums
setMethod("colSums", "snap", function(x, mat=c("bmat", "pmat", "gmat"), na.rm=TRUE){
	mat = match.arg(mat);
	mat.use = methods::slot(x, mat);
	if((x=nrow(mat.use))==0L){
		stop("mat is empty")
	}
	res = Matrix::colSums(mat.use, na.rm);
	return(res);
});

#' rowSums for snap objects
#'
#' This function takes a snap object and returns the row sums of its count matrix.
#' @name rowSums
#' @param x A snap object
#' @param mat A charater object indicates what matrix slot to use
#' @param na.rm A logical variable indicates wether to remove NA in the matrix
#' 
#' @examples
#' data(demo.sp);
#' a = rowSums(demo.sp, mat="bmat");
#' b = rowSums(demo.sp, mat="pmat");
#' d = rowSums(demo.sp, mat="gmat");
#' 
#' @rdname rowSums-methods
#' @aliases rowSums,snap-method
#' @exportMethod rowSums
setMethod("rowSums", "snap", function(x, mat=c("bmat", "pmat", "gmat"), na.rm=TRUE){
	mat = match.arg(mat);
	mat.use = methods::slot(x, mat);
	if((x=nrow(mat.use))==0L){
		stop("mat is empty")
	}
	res = Matrix::rowSums(mat.use, na.rm);
	return(res);
});


#' rowMeans for snap objects
#'
#' This function takes a snap object and returns the row means of its count matrix.
#' 
#' @name rowMeans
#' @param x A snap object
#' @param mat A charater object indicates what matrix slot to use for calculation
#' @param na.rm A logical variable indicates wether to remove NA in the matrix
#'
#' @examples
#' data(demo.sp);
#' a = rowMeans(demo.sp, mat="bmat");
#' b = rowMeans(demo.sp, mat="pmat");
#' d = rowMeans(demo.sp, mat="gmat");
#' 
#' @rdname rowMeans-methods
#' @importFrom methods slot
#' @aliases rowMeans,snap-method
#' @exportMethod rowMeans
setMethod("rowMeans", "snap", function(x, mat=c("bmat", "pmat", "gmat"), na.rm=TRUE){
	mat = match.arg(mat);
	mat.use = methods::slot(x, mat);
	if((x=nrow(mat.use))==0L){
		stop("mat is empty")
	}
	res = Matrix::rowMeans(mat.use, na.rm);
	return(res);
});


#' colMeans for snap objects
#'
#' This function takes a snap object and returns the column means of its count matrix.
#' 
#' @name colMeans
#' @param x A snap object
#' @param mat A charater object indicates what matrix slot to use c("bmat", "pmat", "gmat").
#' @param na.rm A logical variable indicates wether to remove NA in the matrix.
#' 
#' @examples
#' data(demo.sp);
#' a = colMeans(demo.sp, mat="bmat");
#' b = colMeans(demo.sp, mat="pmat");
#' d = colMeans(demo.sp, mat="gmat");
#' 
#' @rdname colMeans-methods
#' @importFrom methods slot
#' @aliases colMeans,snap-method
#' @exportMethod colMeans
setMethod("colMeans", "snap", function(x, mat=c("bmat", "pmat", "gmat"), na.rm=TRUE){
	mat = match.arg(mat);
	mat.use = methods::slot(x, mat);
	if((x=nrow(mat.use))==0L){
		stop("mat is empty")
	}
	res = Matrix::colMeans(mat.use, na.rm);
	return(res);
});

#' Check snap object
#'
#' This function takes any object as input and check if it is a snap object
#' @param obj A snap object
#' @examples
#' data(demo.sp);
#' is.snap(demo.sp);
#' @rdname is.snap-methods
#' @exportMethod is.snap
setGeneric("is.snap", function(obj) standardGeneric("is.snap"))

#' @rdname is.snap-methods
#' @aliases is.snap,snap-method
setMethod("is.snap", "snap", function(obj) return(is(obj, "snap")));

#' subsetting for snap objects
#'
#' This function takes a snap object and returns the subset of snap object.
#' @param x snap; a snap object
#' @param i integer; selected barcode index
#' @param j integer; selected feature index
#' @param mat character; indicates the slot to subsetting
#' @param drop character; 
#' @examples
#' data(demo.sp);
#' demo.sp[1:10,];
#' demo.sp[,1:10,mat="bmat"];
#' demo.sp[,1:10,mat="pmat"];
#' @export
setMethod("[", "snap",
	function(x,i,j,mat=c("bmat", "pmat", "gmat"), drop="missing"){
		.barcode = x@barcode;
		.file = x@file;
		.sample = x@sample;
		.feature = x@feature;
		.peak = x@peak;
		.bmat = x@bmat;
		.pmat = x@pmat;
		.gmat = x@gmat;
		.mmat = x@mmat;
		.jmat = x@jmat;
		.smat = x@smat;
		.graph = x@graph;
		.cluster = x@cluster;
		.tsne = x@tsne;
		.umap = x@umap;
		.metaData = x@metaData;
		# a single row or column
       if(!missing(i)){
		   if(max(i) > nrow(x)){
			   stop("idx exceeds number of cells");
		   }
		   if(nrow(.bmat) > 0){.bmat <- .bmat[i,,drop=FALSE]}
		   if(nrow(.pmat) > 0){.pmat <- .pmat[i,,drop=FALSE]}
		   if(nrow(.gmat) > 0){.gmat <- .gmat[i,,drop=FALSE]}	   
		   if(nrow(.mmat) > 0){.mmat <- .mmat[i,,drop=FALSE]}	   
		   if(nrow(.jmat@jmat) > 0){.jmat <- .jmat[i,,drop=FALSE]}
		   if(nrow(.smat@dmat) > 0){.smat <- .smat[i,,drop=FALSE]}
		   if(nrow(.tsne) > 0){.tsne <- .tsne[i,,drop=FALSE]}
		   if(nrow(.umap) > 0){.umap <- .umap[i,,drop=FALSE]}
		   if(nrow(.graph@mat) > 0){.graph <- .graph[i,,drop=FALSE]}
		   if(nrow(.metaData) > 0){.metaData <- .metaData[i,,drop=FALSE]}
		   if(length(.cluster) > 0){.cluster <- .cluster[i,drop=FALSE]}
		   if(length(.barcode) > 0){.barcode <- .barcode[i,drop=FALSE]}
		   if(length(.file) > 0){.file <- .file[i,drop=FALSE]}
		   if(length(.sample) > 0){.sample <- .sample[i,drop=FALSE]}
	   }
	   if(!missing(j)){
   			mat = match.arg(mat);
	   		if(mat == "bmat"){
	 		   if(ncol(.bmat) > 0){.bmat <- .bmat[,j,drop=FALSE]}
			   if(length(.feature) > 0){.feature <- .feature[j];}	   
	   		}else if(mat == "pmat"){
 	 		   if(ncol(.pmat) > 0){.pmat <- .pmat[,j,drop=FALSE]}
			   if(length(.peak) > 0){.peak <- .peak[j];}	   
	   		}else if(mat == "gmat"){
 	 		   if(ncol(.gmat) > 0){.gmat <- .gmat[,j,drop=FALSE]}
	   		}
	   }
	   x@bmat = .bmat;
	   x@pmat = .pmat;
	   x@gmat = .gmat;
	   x@mmat = .mmat;
	   x@barcode = .barcode;
	   x@file = .file;
	   x@sample = .sample;
	   x@peak = .peak;
	   x@feature = .feature;
	   x@metaData = .metaData;
	   x@umap = .umap;
	   x@feature = .feature;
	   x@jmat = .jmat;
	   x@smat = .smat;
	   x@graph = .graph;
	   x@cluster = .cluster;
	   x@tsne = .tsne;
	   return(x);
})

#' Check barcode existance in snap file
#'
#' This function takes an array of barcodes and a snap-format file as input 
#' and check whether selected barcodes exist in the snap file.
#' 
#' @param barcode An array of selected barcodes.
#' @param file A snap format file.
#' 
#' @examples
#' file.name = system.file("extdata", "demo.snap", package = "SnapATAC");
#' barcodes = c("ACATTGGCAACCAGGTTGCTGGTATTGGAAGT", "ACATTGGCAAGAGGCAACAAGGATATCTGAGT");
#' barcodeInSnapFile(barcodes, file.name);
#' 
#' @return Return an array of logical variable indicates whether the 
#' barcode exists in snap file.
#' @importFrom rhdf5 h5read
#' @importFrom methods is
#' @export
barcodeInSnapFile <- function(barcode, file){
	UseMethod("barcodeInSnapFile", barcode);
}

#' @export
barcodeInSnapFile.default <- function(barcode, file){
	
	if(missing(file)){
		stop("file is missing");
	}else{
		if(!file.exists(file)){
			stop("file does not exist");
		}
		if(!isSnapFile(file)){
			stop("file is not a snap file");
		}
	}
	
	if(missing(barcode)){
		stop("barcode is missing");
	}else{
		if(!is(barcode, "character")){
			stop("barcode is not a character object");
		}
	}
	
	barcode.ref = as.character(tryCatch(barcode.ref <- h5read(file, '/BD/name'), error = function(e) {print(paste("Warning @barcodeInSnapFile: 'BD/name' not found in ",file)); return(vector(mode="character", length=0))}));
	        options(scipen=999);
	return(barcode %in% barcode.ref);

}

#' Create a snap object from a snap file
#'
#' This function takes a snap-format file as input and create
#' a snap object.
#'
#' @param file Name of a snap-format file.
#' @param sample A short sample name (i.g. "MOS.rep1").
#' @param description Description of the experiment [NULL].
#' @param do.par A logical variable indicates if run this using multiple processors [FALSE].
#' @param num.cores Number of processers to use [1].
#' @examples
#' file.name = system.file("extdata", "demo.snap", package = "SnapATAC");
#' demo.sp = createSnap(file.name, sample="demo", do.par=FALSE);
#' 
#' @return A snap object
#' @importFrom rhdf5 h5read
#' @importFrom parallel mclapply
#' @importFrom methods is
#' @export
createSnap <- function(file, sample, description, do.par, num.cores) {
  UseMethod("createSnap", file);
}

#' @export
createSnap.default <- function(file, sample, description=NULL, do.par=FALSE, num.cores=1){
	
	if(missing(file)){
		stop("file is missing");
	}else{
		if(!is(file, "character")){
			stop("file is not character")
		}
		if(any(duplicated(file))){
			stop("file has duplicate name");
		}		
	}
	
	if(!is.numeric(num.cores)){
		stop("num.cores is not an integer")
	}		
	num.cores = round(num.cores);

	fileList = as.list(file);	
	if(missing(sample)){
		stop("sample is missing");
	}else{
		if(!is(sample, "character")){
			stop("sample is not character")
		}
		if(any(duplicated(sample))){
			stop("sample has duplicate name");
		}
	}
	
	if(length(sample) != length(file)){
		stop("sample has different length with file");
	}
	
	sampleList = as.list(sample);
	
	if(!(is.null(description))){
		if(!is(description, "character")){
			stop("description must be character object")
		}
	}else{
		description=character()
	}
	
	# check if snap files exist
	if(any(do.call(c, lapply(fileList, function(x){file.exists(x)})) == FALSE)){
		idx = which(do.call(c, lapply(fileList, function(x){file.exists(x)})) == FALSE)
		print("error: these files does not exist")
		print(fileList[idx])
		stop()
	}
	
	# check if files are all snap files
	if(any(do.call(c, lapply(fileList, function(x){isSnapFile(x)})) == FALSE)){
		idx = which(do.call(c, lapply(fileList, function(x){isSnapFile(x)})) == FALSE)
		print("error: these files are not snap file")
		print(fileList[idx])
		stop()
	}
	
	message("Epoch: reading the barcode session ...");
	if(do.par){
		obj.ls = mclapply(as.list(seq(fileList)), function(i){
			createSnapSingle(file=fileList[[i]], sample=sampleList[[i]]);
		}, mc.cores=num.cores);
	}else{
		obj.ls = lapply(as.list(seq(fileList)), function(i){
			createSnapSingle(file=fileList[[i]], sample=sampleList[[i]]);
		});		
	}
	
	obj = Reduce(snapRbind, obj.ls);
	rm(obj.ls);
	gc();
	obj@des = description;
	return(obj);
}

#' Add cell-by-bin matrix
#' 
#' This function takes a snap object as input and add the cell-by-bin 
#' matrix to the existing snap object.
#' 
#' @param obj A snap object to add cell-by-bin matrix.
#' @param bin.size Cell-by-bin matrix with bin size of bin.size 
#' will be added to snap object.
#' @param do.par A logical variable indicates whether use multiple processors [FALSE].
#' @param num.cores Number of processors to use [1].
#' 
#' @examples
#' file.name = system.file("extdata", "demo.snap", package = "SnapATAC");
#' demo.sp = createSnap(file.name, sample="demo", do.par=FALSE);
#' showBinSizes(file.name);
#' demo.sp = addBmatToSnap(demo.sp, bin.size=100000, do.par=FALSE);
#' 
#' @return Return a snap object
#' @importFrom rhdf5 h5read
#' @importFrom GenomicRanges GRanges findOverlaps
#' @importFrom IRanges IRanges
#' @importFrom parallel mclapply
#' @importFrom methods is
#' @export
addBmatToSnap <- function(obj, bin.size, do.par, num.cores){
  UseMethod("addBmatToSnap", obj);
}

#' @export
addBmatToSnap.default <- function(obj, bin.size=5000, do.par=FALSE, num.cores=1){	
	# close the previously opened H5 file
	if(exists('h5closeAll', where='package:rhdf5', mode='function')){
		rhdf5::h5closeAll();		
	}else{
		rhdf5::H5close();
	}
	if(missing(obj)){
		stop("obj is missing")
	}else{
		if(!is(obj, "snap")){
			stop("obj is not a snap object")
		}
	}
	
	if(!is.numeric(num.cores)){
		stop("num.cores is not an integer")
	}		
	num.cores = round(num.cores);
	
	fileList = as.list(unique(obj@file));

	# check if snap files exist
	if(any(do.call(c, lapply(fileList, function(x){file.exists(x)})) == FALSE)){
		idx = which(do.call(c, lapply(fileList, function(x){file.exists(x)})) == FALSE)
		print("error: these files does not exist")
		print(fileList[idx])
		stop()
	}
	
	# check if files are all snap files
	if(any(do.call(c, lapply(fileList, function(x){isSnapFile(x)})) == FALSE)){
		idx = which(do.call(c, lapply(fileList, function(x){isSnapFile(x)})) == FALSE)
		print("error: these files are not snap file")
		print(fileList[idx])
		stop()
	}
	
	# check if BM session exist
	if(any(do.call(c, lapply(fileList, function(x){ "AM" %in% h5ls(x, recursive=1)$name  })) == FALSE)){
		idx = which(do.call(c, lapply(fileList, function(x){ "AM" %in% h5ls(x, recursive=1)$name  })) == FALSE)
		print("error: the following nsap files do not contain AM session")
		print(fileList[idx])
		stop()
	}
	
	if(any(do.call(c, lapply(fileList, function(x){(bin.size %in% showBinSizes(x))})) == FALSE)){
		idx = which(do.call(c, lapply(fileList, function(x){(bin.size %in% showBinSizes(x))})) == FALSE)
		print("error: chosen bin size does not exist in the following snap files")
		print(fileList[idx])
		stop()
	}

	# check if bins match
	bin.list = lapply(fileList, function(x){
		readBins(x, bin.size=bin.size)
	})
	
	if(!all(sapply(bin.list, FUN = identical, bin.list[[1]]))){
		stop("bins does not match between snap files, please regenerate the cell-by-bin matrix by snaptools")
	}
	
	# read the snap object
	message("Epoch: reading cell-bin count matrix session ...");
	if(do.par){
		obj.ls = mclapply(fileList, function(file){
			idx = which(obj@file == file)
			addBmatToSnapSingle(obj[idx,], file, bin.size=bin.size);
		}, mc.cores=num.cores);		
	}else{
		obj.ls = lapply(fileList, function(file){
			idx = which(obj@file == file)
			addBmatToSnapSingle(obj[idx,], file, bin.size=bin.size);
		});		
	}
	
	# combine
	if((x=length(obj.ls)) == 1L){
		res = obj.ls[[1]]
	}else{
		res = Reduce(snapRbind, obj.ls);		
	}
	obj@feature = res@feature;
	obj@bmat = res@bmat;
	rm(res, obj.ls);
	gc()
	return(obj);
}

#' Add cell-by-peak matrix
#' 
#' This function takes a snap object as input and add the 
#' cell-by-peak matrix to the existing snap object.
#' 
#' @param obj A snap object.
#' @param do.par A logical varaible indicates whether to use multiple processors [FALSE].
#' @param num.cores Number of processors to use [1].
#' @examples
#' file.name = system.file("extdata", "demo.snap", package = "SnapATAC");
#' demo.sp = createSnap(file.name, sample="demo", do.par=FALSE);
#' showBinSizes(file.name);
#' demo.sp = addPmatToSnap(demo.sp, do.par=FALSE);
#' 
#' @importFrom rhdf5 h5read
#' @importFrom GenomicRanges GRanges findOverlaps
#' @importFrom IRanges IRanges
#' @importFrom methods is
#' @export
addPmatToSnap <- function(obj, do.par, num.cores){
  UseMethod("addPmatToSnap", obj);
}

#' @export
addPmatToSnap.default <- function(obj, do.par=FALSE, num.cores=1){
	# close the previously opened H5 file
	if(exists('h5closeAll', where='package:rhdf5', mode='function')){
		rhdf5::h5closeAll();		
	}else{
		rhdf5::H5close();
	}

	if(missing(obj)){
		stop("obj is missing")
	}else{
		if(!is(obj, "snap")){
			stop("obj is not a snap object")
		}
	}
	
	if(!is.numeric(num.cores)){
		stop("num.cores is not an integer")
	}		
	num.cores = round(num.cores);
	
	
	fileList = as.list(unique(obj@file));

	# check if snap files exist
	if(any(do.call(c, lapply(fileList, function(x){file.exists(x)})) == FALSE)){
		idx = which(do.call(c, lapply(fileList, function(x){file.exists(x)})) == FALSE)
		print("error: these files does not exist")
		print(fileList[idx])
		stop()
	}
	
	# check if files are all snap files
	if(any(do.call(c, lapply(fileList, function(x){isSnapFile(x)})) == FALSE)){
		idx = which(do.call(c, lapply(fileList, function(x){isSnapFile(x)})) == FALSE)
		print("error: these files are not snap file")
		print(fileList[idx])
		stop()
	}
	
	# check if PM session exist
	if(any(do.call(c, lapply(fileList, function(x){ "PM" %in% h5ls(x, recursive=1)$name  })) == FALSE)){
		idx = which(do.call(c, lapply(fileList, function(x){ "PM" %in% h5ls(x, recursive=1)$name  })) == FALSE)
		print("error: the following snap files do not contain PM session")
		print(fileList[idx])
		stop()
	}
	
	# check if bins match
	peak.list = lapply(fileList, function(x){
		readPeaks(x)
	})
	
	if(!all(sapply(peak.list, FUN = identical, peak.list[[1]]))){
		stop("peaks does not match between snap files, please regenerate the cell-by-peak matrix by snaptools using the same peak file")
	}
	
	# read the snap object
	message("Epoch: reading cell-peak count matrix session ...");
	if(do.par){
		obj.ls = mclapply(fileList, function(file){
			idx = which(obj@file == file)
			addPmatToSnapSingle(obj[idx,], file);
		}, mc.cores=num.cores);		
	}else{
		obj.ls = lapply(fileList, function(file){
			idx = which(obj@file == file)
			addPmatToSnapSingle(obj[idx,], file);
		});				
	}
	
	# combine
	if((x=length(obj.ls)) == 1L){
		res = obj.ls[[1]]
	}else{
		res = Reduce(snapRbind, obj.ls);		
	}
	
	# re-order the matrix
	o1 = paste(obj@file, obj@barcode, sep=".");
	o2 = paste(res@file, res@barcode, sep=".");
	obj@peak = res@peak;
	obj@pmat = res@pmat[match(o1, o2),];
	rm(obj.ls, res, o1, o2);
	gc();
	return(obj);
}

#' Add cell-by-gene matrix
#' 
#' This function takes a snap object as input and add the cell-by-gene 
#' matrix to the existing snap object.
#' 
#' @param obj A snap object to add cell-by-bin matrix.
#' @param do.par A logical variable indicates whether to use multiple processors [FALSE].
#' @param num.cores Number of processors to use.
#' @examples
#' file.name = system.file("extdata", "demo.snap", package = "SnapATAC");
#' demo.sp = createSnap(file.name, sample="demo", do.par=FALSE);
#' demo.sp = addGmatToSnap(demo.sp, do.par=FALSE);
#' 
#' @return Return a snap object
#' @importFrom rhdf5 h5read 
#' @importFrom GenomicRanges GRanges findOverlaps
#' @importFrom parallel mclapply
#' @importFrom methods is
#' @export
addGmatToSnap <- function(obj, do.par, num.cores) {
  UseMethod("addGmatToSnap", obj);
}

#' @export
addGmatToSnap.default <- function(obj, do.par=FALSE, num.cores=1){	
	# close the previously opened H5 file
	if(exists('h5closeAll', where='package:rhdf5', mode='function')){
		rhdf5::h5closeAll();		
	}else{
		rhdf5::H5close();
	}
	if(missing(obj)){
		stop("obj is missing")
	}else{
		if(!is(obj, "snap")){
			stop("obj is not a snap object")
		}
	}
	
	if(!is.numeric(num.cores)){
		stop("num.cores is not an integer")
	}		
	num.cores = round(num.cores);
	
	fileList = as.list(unique(obj@file));

	# check if snap files exist
	if(any(do.call(c, lapply(fileList, function(x){file.exists(x)})) == FALSE)){
		idx = which(do.call(c, lapply(fileList, function(x){file.exists(x)})) == FALSE)
		print("error: these files does not exist")
		print(fileList[idx])
		stop()
	}
	
	# check if files are all snap files
	if(any(do.call(c, lapply(fileList, function(x){isSnapFile(x)})) == FALSE)){
		idx = which(do.call(c, lapply(fileList, function(x){isSnapFile(x)})) == FALSE)
		print("error: these files are not snap file")
		print(fileList[idx])
		stop()
	}
	
	# check if GM session exist
	if(any(do.call(c, lapply(fileList, function(x){ "GM" %in% h5ls(x, recursive=1)$name  })) == FALSE)){
		idx = which(do.call(c, lapply(fileList, function(x){ "GM" %in% h5ls(x, recursive=1)$name  })) == FALSE)
		print("error: the following nsap files do not contain GM session")
		print(fileList[idx])
		stop()
	}
	
	# read the snap object
	message("Epoch: reading cell-gene count matrix session ...");
	if(do.par){
		obj.ls = mclapply(fileList, function(file){
			idx = which(obj@file == file);
			addGmatToSnapSingle(obj[idx,], file);
		}, mc.cores=num.cores);		
	}else{
		obj.ls = lapply(fileList, function(file){
			idx = which(obj@file == file);
			addGmatToSnapSingle(obj[idx,], file);
		});				
	}

	# combine
	if((x=length(obj.ls)) == 1L){
		res = obj.ls[[1]]
	}else{
		res = Reduce(snapRbind, obj.ls);		
	}
	o1 = paste(obj@file, obj@barcode, sep=".");
	o2 = paste(res@file, res@barcode, sep=".");
	obj@gmat = res@gmat[match(o1, o2),];
	rm(obj.ls, res, o1, o2);
	gc();
	return(obj);
}

#' Remove cell-by-bin matrix
#' 
#' This function takes a snap object as input and removes the cell-by-bin 
#' matrix in the existing snap object. Report error when cell-by-bin matrix is empty.
#' 
#' @param obj A snap object to remove cell-by-bin matrix.
#' @examples
#' data(demo.sp)
#' rmBmatFromSnap(demo.sp)
#' 
#' @return Return a snap object without cell-by-bin matrix 
#' @importFrom methods slot
#' @importFrom GenomicRanges GRanges
#' @import Matrix
#' @export
rmBmatFromSnap <- function(obj){
  UseMethod("rmBmatFromSnap", obj);
}

#' @export
rmBmatFromSnap.default <- function(obj){	
	# close the previously opened H5 file
	if(missing(obj)){
		stop("obj is missing")
	}else{
		if(!is(obj, "snap")){
			stop("obj is not a snap object")
		}
		data.use = methods::slot(obj, "bmat");
		if((x=nrow(data.use)) == 0L){
			stop("cell-by-bin matrix does not exist in obj");
		}
	}
	
	obj@bmat = Matrix(0,0,0, sparse=TRUE);
	obj@feature = GRanges();
	return(obj);
}


#' Remove cell-by-peak matrix
#' 
#' This function takes a snap object as input and removes the cell-by-peak 
#' matrix in the existing snap object. Report error when cell-by-peak matrix is empty.
#' 
#' @param obj A snap object to remove cell-by-peak matrix.
#' @examples
#' data(demo.sp)
#' rmPmatFromSnap(demo.sp)
#' 
#' @return Return a snap object without cell-by-peak matrix
#' @importFrom methods slot
#' @importFrom GenomicRanges GRanges
#' @import Matrix
#' @export
rmPmatFromSnap <- function(obj){
  UseMethod("rmPmatFromSnap", obj);
}

#' @export
rmPmatFromSnap.default <- function(obj){	
	# close the previously opened H5 file
	if(missing(obj)){
		stop("obj is missing")
	}else{
		if(!is(obj, "snap")){
			stop("obj is not a snap object")
		}
		data.use = methods::slot(obj, "pmat");
		if((x=nrow(data.use)) == 0L){
			stop("cell-by-peak matrix does not exist in obj");
		}
	}
	
	obj@pmat = Matrix(0,0,0, sparse=TRUE);
	obj@peak = GRanges();
	return(obj);
}

#' Remove cell-by-gene matrix
#' 
#' This function takes a snap object as input and removes the cell-by-gene 
#' matrix in the existing snap object. Report error when cell-by-gene matrix 
#' is empty.
#' 
#' @param obj A snap object to remove cell-by-gene matrix.
#' @examples
#' data(demo.sp)
#' rmPmatFromSnap(demo.sp)
#' 
#' @return Return a snap object without cell-by-peak matrix
#' @importFrom methods slot
#' @importFrom GenomicRanges GRanges
#' @import Matrix
#' @export
rmGmatFromSnap <- function(obj){
  UseMethod("rmGmatFromSnap", obj);
}

#' @export
rmGmatFromSnap.default <- function(obj){	
	# close the previously opened H5 file
	if(missing(obj)){
		stop("obj is missing")
	}else{
		if(!is(obj, "snap")){
			stop("obj is not a snap object")
		}
		data.use = methods::slot(obj, "gmat");
		if((x=nrow(data.use)) == 0L){
			stop("cell-by-gene matrix does not exist in obj");
		}
	}
	
	obj@gmat = Matrix(0,0,0, sparse=TRUE);
	return(obj);
}

#' Create a snap object from cell-by-bin matrix
#' 
#' This function takes a cell-by-bin count matrix as input and returns a snap object.
#' 
#' @param mat A sparse matrix
#' @param barcodes Corresponding barcodes
#' @param bins A GenomicRanges object for the genomic coordinates of the bins
#' @examples
#' library("GenomicRanges");
#' mat = Matrix(sample(0:10, 100, replace=TRUE),sparse=TRUE, ncol=5);
#' barcodes = paste("barcode", seq(nrow(mat)), sep=".");
#' chroms = c("chr1", "chr1", "chr1", "chr1", "chr1");
#' pos = c(1, 5001, 10001, 15001, 20001);
#' bins = GRanges(chroms, IRanges(pos, pos+5000));
#' x.sp = createSnapFromBmat(
#'	mat, 
#'	barcodes=barcodes,
#'	bins=bins
#'	);
#' @return Return a snap object
#' @importFrom GenomicRanges GRanges findOverlaps
#' @importFrom IRanges IRanges
#' @importFrom methods is
#' @export
createSnapFromBmat <- function(mat, barcodes, bins) {
  UseMethod("createSnapFromBmat", mat);
}

#' @export
createSnapFromBmat.default <- function(mat, barcodes, bins){
	if(missing(mat) || missing(barcodes) || missing(bins)){
		stop("mat or barcodes or bins is missing");
	}

	if(!(is(mat, "dsCMatrix") || is(mat, "dgCMatrix"))){
		stop("'mat' is not a sparse matrix");
	}

	if(length(barcodes) != nrow(mat)){
		stop("'mat' has different number of rows with number of barcodes");
	}
	
	if(!is(bins, "GRanges")){
		stop("'bins' is not a GRanges object")
	}
	
	if(length(bins) != ncol(mat)){
		stop("'mat' has different number of columns with number of bins");
	}
	
	obj = newSnap();
	obj@bmat = mat;
	obj@barcode = barcodes;
	obj@feature = bins;
	return(obj);
}

#' Create a snap object from cell-by-peak matrix
#' 
#' This function takes a cell-by-peak count matrix as input and returns a snap object.
#' 
#' @param mat A sparse matrix
#' @param barcodes Corresponding barcodes
#' @param peaks A GRanges object for the genomic coordinates of peaks
#' @examples
#' library("GenomicRanges");
#' mat = Matrix(sample(0:10, 100, replace=TRUE),sparse=TRUE, ncol=5);
#' barcodes = paste("barcode", seq(nrow(mat)), sep=".");
#' chroms = c("chr1", "chr1", "chr1", "chr1", "chr1");
#' pos = c(1, 5001, 10001, 15001, 20001);
#' peaks = GRanges(chroms, IRanges(pos, pos+100));
#' x.sp = createSnapFromPmat(
#'	mat, 
#'	barcodes=barcodes,
#'	peaks=peaks
#'	);
#' @importFrom GenomicRanges GRanges findOverlaps
#' @importFrom IRanges IRanges
#' @importFrom methods is
#' @export
createSnapFromPmat <- function(mat, barcodes, peaks) {
  UseMethod("createSnapFromPmat", mat);
}

#' @export
createSnapFromPmat.default <- function(mat, barcodes, peaks){
	if(missing(mat) || missing(barcodes) || missing(peaks)){
		stop("mat or barcodes or peaks is missing");
	}

	if(!(is(mat, "dsCMatrix") || is(mat, "dgCMatrix"))){
		stop("'mat' is not a sparse matrix");
	}

	if(length(barcodes) != nrow(mat)){
		stop("'mat' has different number of rows with number of barcodes");
	}
	
	if(!is(peaks, "GRanges")){
		stop("'peaks' is not a GRanges object")
	}
	if(length(peaks) != ncol(mat)){
		stop("'mat' has different number of columns with number of peaks");
	}
	
	obj = newSnap();
	obj@pmat = mat;
	obj@barcode = barcodes;
	obj@peak = peaks;
	return(obj);
}

#' Create a snap object from cell-by-gene matrix
#' 
#' This function takes a cell-by-gene count matrix as input and returns a snap object.
#' 
#' @param mat A sparse matrix
#' @param barcodes An array of characters for barcodes
#' @param gene.names An array of characters for gene names
#' @examples
#' mat = Matrix(sample(0:10, 100, replace=TRUE),sparse=TRUE, ncol=5);
#' barcodes = paste("barcode", seq(nrow(mat)), sep=".");
#' gene.names = paste("genes", seq(ncol(mat)), sep=".");
#' x.sp = createSnapFromGmat(
#'	mat, 
#'	barcodes=barcodes,
#'	gene.names=gene.names
#'	);
#' @importFrom GenomicRanges GRanges findOverlaps
#' @importFrom IRanges IRanges
#' @importFrom methods is
#' @export
createSnapFromGmat <- function(mat, barcodes, gene.names) {
  UseMethod("createSnapFromGmat");
}

#' @export
createSnapFromGmat.default <- function(mat, barcodes, gene.names){
	if(missing(mat) || missing(barcodes) || missing(gene.names)){
		stop("mat or barcodes or gene.names is missing");
	}

	if(!(is(mat, "dsCMatrix") || is(mat, "dgCMatrix"))){
		stop("'mat' is not a sparse matrix");
	}

	if(length(barcodes) != nrow(mat)){
		stop("'mat' has different number of rows with number of barcodes");
	}
	
	if(!is(gene.names, "character")){
		stop("'gene.names' is not a character object")
	}
	if(length(gene.names) != ncol(mat)){
		stop("'mat' has different number of columns with number of gene.names");
	}
	
	obj = newSnap();
	obj@gmat = mat;
	obj@barcode = barcodes;
	colnames(obj@gmat) = gene.names;
	return(obj);
}

#' Read meta data from a snap file
#'
#' Take a snap file as input and read the barcode session only.
#'
#' @param file character for the snap-format file name which the data are to be read from.
#'
#' @examples
#' file.name = system.file("extdata", "demo.snap", package = "SnapATAC");
#' md = readMetaData(file.name);
#' 
#' @return A data frame contains barcodes and its attributes
#'
#' @importFrom rhdf5 h5read
#' @export
readMetaData <- function(file) {
  UseMethod("readMetaData", file);
}

#' @export
readMetaData.default <- function(file){
	if(!file.exists(file)){stop(paste("Error @readMetaData: ", file, " does not exist!", sep=""))};
	if(!isSnapFile(file)){stop(paste("Error @readMetaData: ", file, " is not a snap-format file!", sep=""))};
	# close the previously opened H5 file
	if(exists('h5closeAll', where='package:rhdf5', mode='function')){
		rhdf5::h5closeAll();		
	}else{
		rhdf5::H5close();
	}
	barcode = as.character(tryCatch(barcode <- h5read(file, '/BD/name'), error = function(e) {print(paste("Warning @readSnap: 'BD/name' not found in ",file)); return(vector(mode="character", length=0))}));
	TN = as.numeric(tryCatch(TN <- h5read(file, '/BD/TN'), error = function(e) {print(paste("Warning @readMetaData: 'BD/TN' not found in ",file)); return(c())}));
	UM = as.numeric(tryCatch(UM <- h5read(file, '/BD/UM'), error = function(e) {print(paste("Warning @readMetaData: 'BD/UM' not found in ",file)); return(c())}));
	PP = as.numeric(tryCatch(PP <- h5read(file, '/BD/PP'), error = function(e) {print(paste("Warning @readMetaData: 'BD/PP' not found in ",file)); return(c())}));
	UQ = as.numeric(tryCatch(UQ <- h5read(file, '/BD/UQ'), error = function(e) {print(paste("Warning @readMetaData: 'BD/UQ' not found in ",file)); return(c())}));
	CM = as.numeric(tryCatch(CM <- h5read(file, '/BD/CM'), error = function(e) {print(paste("Warning @readMetaData: 'BD/CM' not found in ",file)); return(c())}));
	data.frame(barcode, TN, UM, PP, UQ, CM)
}


#' Export barcode meta data
#' 
#' This function takes a snap object as input and export its barcode and corresponding attributes
#' 
#' @param obj A snap object
#' @param file Output file name
#' @param slot.names Name of slots to be exported c('barcode', 'tsne', 'umap', 'cluster', 'metaData')
#' @importFrom methods slot
#' @importFrom utils write.table
#' @importFrom methods is
#' @examples
#' data(demo.sp);
#' exportMetaData(demo.sp, file="demo.metadata.txt", slot.names=c("barcode", "tsne"));
#' @export
exportMetaData <- function(obj, file, slot.names){
    UseMethod("exportMetaData", obj);	
}

#' @export
exportMetaData.default <- function(obj, file, slot.names=c('barcode', 'cluster', 'tsne', 'umap', 'metaData')){
	subset.names.ref = c('barcode', 'cluster', 'tsne', 'umap', 'metaData');
	
	if(missing(obj)){
		stop("obj is missing")
	}else{
		if(!is(obj, "snap")){
			stop("'obj' is not a snap object")
		}
		if((x=nrow(obj))==0L){
			stop("obj is empty");
		}
	}
	
	if(file.exists(file)){
		stop("file already exists, remove it first")		
	}
	
	if(missing(slot.names)){
		stop("slot.names is missing")
	}

	if(!all(slot.names %in% subset.names.ref)){
		stop("'slot.names' must be subset of c('barcode', 'cluster', 'tsne', 'umap', 'metaData')");		
	}
	
	
	metaData.ls = lapply(as.list(slot.names), function(x){
		if(x == "barcode"){
			y = data.frame(slot(obj, x));			
			colnames(y) = "barcode"
		}else if(x == "tsne"){
			y = data.frame(slot(obj, x));			
			colnames(y) = c("tsne1", "tsne2");
		}else if(x == "umap"){
			y = data.frame(slot(obj, x));			
			colnames(y) = c("umap1", "umap2");
		}else if(x == "cluster"){
			y = data.frame(slot(obj, x));			
			colnames(y) = "cluster"
		}else{
			y = data.frame(slot(obj, x));			
		}
		y
	})
	
	if(!all(sapply(lapply(metaData.ls, nrow), FUN = identical, nrow(metaData.ls[[1]])))){
		stop("slot in subset.names have different length")
	}
	
	metaData.df = do.call(cbind, metaData.ls);

    write.table(metaData.df, file = file, append = FALSE, quote = FALSE, sep = "\t",
                eol = "\n", na = "NA", dec = ".", row.names = FALSE,
                col.names = TRUE, qmethod = c("escape", "double"),
                fileEncoding = "")
}


#' Check correlation of cell-by-bin matrix
#'
#' This function takes one or two snap object as input and calculate 
#' the correlation between cell-by-bin matrix between replicates. If
#' obj2 is NULL, obj1 will be randomly split into two pseudo replicates  
#' and the correlaion between these two pseudo-replicates will be 
#' calcualted and returned. For obj1, the cell-by-bin matrix
#' cannot be empty. This function helps check whether the current 
#' cell-by-bin matrix is sufficient for downstream analysis. If the 
#' pearson correlation is less than 0.95 recommend to use a bigger bin.size. 
#' 
#' @param obj1 A snap object for replicate 1
#' @param obj2 A snap object for replicate 2 [NULL].
#' @examples
#' data(demo.sp);
#' calBmatCor(demo.sp)
#' 
#' @return Return pearson correlation between replicates.
#' @importFrom stats cor
#' @importFrom methods is
#' @export
calBmatCor <- function(obj1, obj2) {
  UseMethod("calBmatCor", obj1);
}

#' @export
calBmatCor.default <- function(obj1, obj2=NULL){
	if(missing(obj1)){
		stop("obj1 is missing.")
	}else{
		if(!is(obj1, "snap")){
			stop("obj1 is not a snap object")
		}
		
		if((x=nrow(obj1@bmat)) == 0){
			stop("cell-by-bin matrix is empty")		
		}
		
		if((x=max(obj1@bmat)) > 1){
			obj1 = makeBinary(obj1, mat="bmat");
		}
	}
	
	if(!is.null(obj2)){
		# check if obj2 is a snap object
		if(!is(obj2, "snap")){
			stop("obj2 is not a snap object")
		}
		
		# check if obj2 has the same features
		if(any(obj1@feature$name != obj2@feature$name)){
			stop("'obj1' and 'obj2' have different features")			
		}
		if(max(obj2@bmat) > 1){
			obj2 = makeBinary(obj2, mat="bmat");									
		}
		cov1 = log(Matrix::colSums(obj1@bmat) + 1, 10);
		cov2 = log(Matrix::colSums(obj2@bmat) + 1, 10);			
	}else{
		ncell = nrow(obj1);
		idx1 = sort(sample(seq(ncell), ncell/2));
		idx2 = setdiff(seq(ncell), idx1);
		cov1 = log(Matrix::colSums(obj1@bmat[idx1,]) + 1, 10);
		cov2 = log(Matrix::colSums(obj1@bmat[idx2,]) + 1, 10);	
	}		
	return(cor(cov1, cov2, method="pearson"));
}

#' Combine snap objects
#'
#' Takes two snap objects and combines them.
#'
#' @param obj1 a snap object
#' @param obj2 a snap object
#' @return a combined snap object
#' @export
snapRbind <- function(obj1, obj2){
	if(!is.snap(obj1)){stop(paste("Error @snapRbind: obj1 is not a snap object!", sep=""))};
	if(!is.snap(obj2)){stop(paste("Error @snapRbind: obj2 is not a snap object!", sep=""))};

	# barcode from obj1 and obj2
	barcode1 = paste(obj1@file, obj1@barcode, sep=".");
	barcode2 = paste(obj2@file, obj2@barcode, sep=".");	
	
	# check barcode name, if there exists duplicate barcode raise error and exist
	if(length(unique(c(barcode1, barcode2))) < length(barcode1) + length(barcode2)){
		stop("Error: @snapRbind: identifcal barcodes found in obj1 and obj2!")
	}
	rm(barcode1, barcode2)
	gc()
	
	# check meta data
	if(nrow(obj1@metaData) > 0 && nrow(obj2@metaData) > 0){
		metaData = rbind(obj1@metaData, obj2@metaData);		
	}else{
		metaData = data.frame();
	}
	
	# check feature
	feature1 = obj1@feature;
	feature2 = obj2@feature;
	if((length(feature1) == 0) != (length(feature2) == 0)){
		stop("different feature found in obj1 and obj2!")
	}else{
		if(length(feature1) > 0){
			if(FALSE %in% (feature1$name == feature2$name)){
				stop("Error: @snapRbind: different feature found in obj1 and obj2!")
			}
			feature = feature1;					
		}else{
			feature = feature1;								
		}
	}
	gc()
	
	# check peak
	peak1 = obj1@peak;
	peak2 = obj2@peak;
	if((length(peak1) == 0) != (length(peak2) == 0)){
		stop("different peak found in obj1 and obj2!")
	}else{
		if(length(peak1) > 0){
			if(FALSE %in% (peak1$name == peak2$name)){
				stop("Error: @snapRbind: different feature found in obj1 and obj2!")
			}
			peak = peak1;					
		}else{
			peak = peak1;								
		}
	}
	rm(peak1, peak2)
	gc()
	
	# check bmat	
	bmat1 = obj1@bmat;
	bmat2 = obj2@bmat;
	if((length(bmat1) == 0) != (length(bmat2) == 0)){
		stop("bmat has different dimentions in obj1 and obj2!")
	}else{
		bmat = Matrix::rBind(bmat1, bmat2);
	}
	rm(bmat1, bmat2)
	gc()
	
	# check gmat	
	gmat1 = obj1@gmat;
	gmat2 = obj2@gmat;
	if((length(gmat1) == 0) != (length(gmat2) == 0)){
		stop("gmat has different dimentions in obj1 and obj2!")
	}else{
		gmat = Matrix::rBind(gmat1, gmat2);
	}
	rm(gmat1, gmat2)
	gc()

	# check pmat	
	pmat1 = obj1@pmat;
	pmat2 = obj2@pmat;
	if((length(pmat1) == 0) != (length(pmat2) == 0)){
		stop("pmat has different dimentions in obj1 and obj2!")
	}else{
		pmat = Matrix::rBind(pmat1, pmat2);
	}
	rm(pmat1, pmat2)
	gc()


	# check gmat	
	dmat1 = obj1@smat@dmat;
	dmat2 = obj2@smat@dmat;
	
	if((length(dmat1) == 0) != (length(dmat2) == 0)){
		stop("dmat has different dimentions in obj1 and obj2!")
	}else{
		dmat = Matrix::rBind(dmat1, dmat2);
	}
	rm(dmat1, dmat2)
	gc()

	res = newSnap();
	res@feature = feature;
	res@barcode = c(obj1@barcode, obj2@barcode);
	res@file = c(obj1@file, obj2@file);
	res@sample = c(obj1@sample, obj2@sample);
	res@metaData = metaData;
	res@bmat = bmat;
	res@pmat = pmat;
	res@peak = peak;
	res@gmat = gmat;
	res@smat@dmat = dmat;
	res@smat@sdev = obj1@smat@sdev;
	return(res)
}

#' Cell filtration
#'
#' This function takes a snap object as input and filter cells based on given cutoffs. 
#' We next identify the high-quality barcode based on the following metrices: 
#' 
#' 1) fragment.num - total number of fragments per barcode;
#' 2) UMI - unique molecular identifier;
#' 3) mito.ratio - mitochondrial ratio;
#' 4) dup.ratio - PCR duplicate ratio;
#' 5) pair.ratio - properly paired ratio;
#' 6) umap.ratio - uniquely mapped ratio;
#' 
#' Note we no longer use reads in peak ratio as a metric for cell selection mainly for two reasons. 
#' Reads-in-peak ration is highly cell type specific. For instance, according to published single 
#' cell ATAC-seq, human fibroblast (BJ) cells have significantly higher reads in peak ratio (40-60%) 
#' versus 20-40% for GM12878 cells. Similarly, in mammalian brain, glia cells overall have very 
#' different reads in peak ratio distribution compared to neuronal cells. We suspect this may reflect 
#' the nucleus size or global chromatin accessibility. Second, pre-defined set of accessibility peaks 
#' are incomplete and biased to the dominant populations. 
#' 
#' @param obj A snap object.
#' @param subset.names Attributes used to filter cells c('fragment.num', 'UMI', 'mito.ratio', 'umap.ratio', 'dup.ratio', 'pair.ratio'). 
#' @param low.thresholds Low cutoffs for the parameters (default is -Inf)
#' @param high.thresholds High cutoffs for the parameters (default is Inf)
#' 
#' @examples
#' data(demo.sp);
#' filterCells(
#'	obj=demo.sp, 
#'	subset.names=c("UMI"), 
#'	low.thresholds=c(10),
#'	high.thresholds=c(Inf)
#'	);
#'
#' @return Returns a snap object containing only the relevant subset of cells
#' 
#' @importFrom methods is
#' @export
filterCells <- function(obj, subset.names, low.thresholds, high.thresholds) {
  UseMethod("filterCells", obj);
}

#' @export
filterCells.default <- function(
	obj, 
	subset.names, 
	low.thresholds, 
	high.thresholds
){
	if(missing(obj)){
		stop("obj is missing");
	}else{
		if(!is(obj, "snap")){
			stop("obj is not a snap object");
		}
		metaData = obj@metaData;		
		if((x=nrow(metaData))==0L){
			stop("metaData is empty")
		}
	}
	
    if (missing(x = low.thresholds)){
      low.thresholds <- replicate(n = length(x = subset.names), expr = -Inf);
    }

    if (missing(x = high.thresholds)) {
      high.thresholds <- replicate(n = length(x = subset.names), expr = Inf);
    }

	if(!all(subset.names %in% c('fragment.num', 'mito.ratio', 'umap.ratio', 'dup.ratio', 'pair.ratio', 'UMI'))){
		stop("'subset.names' must be subset of c('UMI', 'fragment.num', 'mito.ratio', 'umap.ratio', 'dup.ratio', 'pair.ratio')");		
	}

    length.check <- sapply(
      X = list(subset.names, low.thresholds, high.thresholds),
      FUN = length
    )
	
    if (length(x = unique(x = length.check)) != 1L) {
      stop("'subset.names', 'low.thresholds', and 'high.thresholds' must all have the same length");
    }

	data.subsets <- data.frame(name=subset.names, lw=low.thresholds, hg=high.thresholds);
	
	metaData$UMI = metaData$UQ;
	metaData$fragment.num = metaData$TN;
	metaData$mito.ratio = (metaData$CM)/(metaData$UQ);
	metaData$umap.ratio = (metaData$UM)/(metaData$TN);
	metaData$dup.ratio = 1 - (metaData$UQ)/(metaData$PP);
	metaData$pair.ratio = (metaData$PP)/(metaData$UM);
	for(i in seq(nrow(data.subsets))){		
		f = as.character(data.subsets$name[i]);
		names.use <- which(colnames(metaData) %in% f);
		idx = which((metaData[,names.use] >= data.subsets$lw[i]) & (metaData[,names.use] <= data.subsets$hg[i]));
		obj = obj[idx,];
		metaData = metaData[idx,]
	}
	return(obj);
}

extractReadsFromOneCell <- function(
	barcode, 
	file
){
	if(missing(file)){
		stop("file is missing");
	}else{
		if(!isSnapFile(file)){
			stop(paste0(file, " is not a snap file"));			
		}
	}

	# read the reference barcode list
	barcode.list = as.character(tryCatch(barcode.list <- h5read(file, '/BD/name'), error = function(e) {print(paste("Warning @extractReadsFromOneCell: 'BD/name' not found in ",file)); return(vector(mode="character", length=0))}));
	if(missing(barcode)){
		stop("barcode is missing");
	}else{
		if(class(barcode) != "character"){
			stop(paste0("barocde ", barcode, " is not character object"));
		}else{
			if(!(barcode %in% barcode.list)){
				stop(paste0("barocde ", barcode, " does not exist in snape file", file));
			}
		}		
	}
	
	pos.list = as.numeric(tryCatch(pos.list <- h5read(file, "FM/barcodePos"), error = function(e) {print(paste("Warning @readMetaData: 'FM/barcodePos' not found in ",file)); return(c())}));
	len.list = as.numeric(tryCatch(len.list <- h5read(file, "FM/barcodeLen"), error = function(e) {print(paste("Warning @readMetaData: 'FM/barcodeLen' not found in ",file)); return(c())}));
	
	if((x=length(pos.list)) != (y=length(len.list))){
		stop("FM/barcodeLen has different length with FM/barcodePos")
	}
	
	pos = pos.list[match(barcode, barcode.list)];
	len = len.list[match(barcode, barcode.list)];
	idx.arr = seq(pos, pos + len - 1);

	chroms = h5read(file, "FM/fragChrom", index=list(idx.arr))
	starts = h5read(file, "FM/fragStart", index=list(idx.arr))
	lens = h5read(file, "FM/fragLen", index=list(idx.arr))
	frags.gr = GRanges(chroms, 
		IRanges(starts, starts + lens - 1), 
		barcode=rep(barcode, length(chroms)), 
		file=rep(file, length(chroms))
		);
	return(frags.gr)
}

#' Extract Reads By Barcodes
#'
#' This function takes a barcode list and snap file as input and quickly extract reads belonging to the given barcodes. 
#' 
#' @param barcodes A vector contains the selected barcodes.
#' @param files A vector contains the snap file that barcodes belong to.
#' @param do.par A logic variable indicates weather to run this in parallel with multiple processors.
#' @param num.cores Number of processors to use.
#' 
#' @return Returns A GenomicRanges object that contains the reads
#' 
#' @importFrom methods is
#' @importFrom doParallel registerDoParallel
#' @importFrom parallel makeCluster stopCluster detectCores
#' @importFrom foreach foreach %dopar%
#' @importFrom stats lm
#' @importFrom methods slot
#' @export
extractReads <- function(
	barcodes,
	files,
	do.par=TRUE,
	num.cores=1
){
	if(missing(barcodes)){
		stop("barcodes is missing")
	}else{
		if(class(barcodes) != "character"){
			stop("barcodes must be character");
		}
	}

	if(missing(files)){
		stop("files is missing")
	}else{
		if(class(files) != "character"){
			stop("files must be character");
		}
	}
	ncell = length(barcodes);
	nfile = length(files);
	fileList = as.list(unique(files));
	if(ncell != nfile){
		stop("barcodes have different length with barcodes");
	}
	
	# check if snap files exist
	if(any(do.call(c, lapply(fileList, function(x){file.exists(x)})) == FALSE)){
		idx = which(do.call(c, lapply(fileList, function(x){file.exists(x)})) == FALSE)
		print("error: these files does not exist")
		print(fileList[idx])
		stop()
	}
	
	# check if files are all snap files
	if(any(do.call(c, lapply(fileList, function(x){isSnapFile(x)})) == FALSE)){
		idx = which(do.call(c, lapply(fileList, function(x){isSnapFile(x)})) == FALSE)
		print("error: these files are not snap file")
		print(fileList[idx])
		stop()
	}
	
	# check if FM session exist
	if(any(do.call(c, lapply(fileList, function(x){ "FM" %in% h5ls(x, recursive=1)$name  })) == FALSE)){
		idx = which(do.call(c, lapply(fileList, function(x){ "FM" %in% h5ls(x, recursive=1)$name  })) == FALSE)
		print("error: the following nsap files do not contain FM session")
		print(fileList[idx])
		stop()
	}

	if(do.par){
	    # input checking for parallel options
		if(num.cores > 1){
	        if (num.cores == 1) {
	          num.cores = 1
	        } else if (num.cores > detectCores()) {
	          num.cores <- detectCores() - 1
	          warning(paste0("num.cores set greater than number of available cores(", parallel::detectCores(), "). Setting num.cores to ", num.cores, "."))
	        }
	      } else if (num.cores != 1) {
	        num.cores <- 1
		}
	}
	read.ls = mclapply(as.list(seq(barcodes)), function(i){
		extractReadsFromOneCell(
			barcode = barcodes[i], 
			file = files[i]
		)		
	}, mc.cores=num.cores);
	read.gr = Reduce(c, read.ls);
	return(read.gr);
}

#' Feature filtration
#'
#' This function takes a snap obj as input and perform feature selection in the following manner:
#' 1) calculate coverage of each genomic bin/feature;
#' 2) log scale the coverage by log10(count + 1);
#' 3) the log-nromal distribution is then converted into zscore; 
#' 4) bins with zscore beyond [low.threshold, high.threshold] are filtered;
#' 
#' @param obj A snap obj
#' @param low.threshold Low cutoffs for the parameters (default is -2)
#' @param high.threshold High cutoffs for the parameters (default is 2)
#' @param mat Matrix to filter c("bmat", "pmat")
#'
#' @examples
#' data(demo.sp);
#' filterBins(
#'	obj=demo.sp, 
#'	low.threshold=-2,
#'	high.threshold=2
#'	);
#' 
#' @return Returns a snap obj
#' @importFrom stats sd
#' @importFrom methods slot is
#' @export
filterBins <- function(obj, low.threshold, high.threshold, mat) {
  UseMethod("filterBins", obj);
}

#' @export
filterBins.default <- function(obj, low.threshold=-2, high.threshold=2, mat=c("bmat", "pmat")){
	if(missing(obj)){
		stop("obj is missing")
	}else{
		if(!is(obj, "snap")){
			stop("'obj' is not a snap obj")
		};		
	}
	
	mat = match.arg(mat);
	data.use = methods::slot(obj, mat);

	if((x=nrow(data.use)) == 0L){		
		stop("count matrix is empty")
	}

	idy = which(Matrix::colSums(data.use) > 0);
	cov = log(Matrix::colSums(data.use)[idy] + 1, 10);
	zcov = (cov - mean(cov)) / stats::sd(cov);	
	idy2 = which(zcov >= low.threshold & zcov <= high.threshold);
	idy = idy[idy2];
	methods::slot(obj, mat) = data.use[,idy,drop=FALSE];
	if(mat=="bmat"){
		obj@feature = obj@feature[idy];
	}else if(mat=="pmat"){
		obj@peak = obj@peak[idy];		
	}
	return(obj)
}

#' Check a snap-format file
#' 
#' This function takes a file name as input and check if the file is a snap-formated file 
#' @param file A file name.
#' @examples
#' file.name = system.file("extdata", "demo.snap", package = "SnapATAC");
#' isSnapFile(file.name);
#' 
#' @return Return a logical variable indicates whether file is a snap file.
#' @importFrom rhdf5 h5read h5ls
#' 
#' @export
#' 
isSnapFile <- function(file) {
  UseMethod("isSnapFile", file);
}

#' @export
isSnapFile.default <- function(file){
	if(!file.exists(file)){ 
		stop("file does not exist!")
	}

	monals = tryCatch(
		monals <- h5ls(file), 
		error = function(e) {
			return(character())
	})
	
	if(length(monals) != 0){
		magicString = as.character(tryCatch(magicString <- h5read(file, '/HD/MG'), error = function(e) {print(paste("Warning @isSnapFile: 'HD/MG' not found in ", file)); return(character())}))
		if(length(magicString)==0){
			return(FALSE);
		}
		if(magicString == "SNAP" || magicString == "MONA"){
			return(TRUE);
		}
	}
	return(FALSE);
}

readBins <- function(file, bin.size=5000){	
	if(exists('h5closeAll', where='package:rhdf5', mode='function')){
		rhdf5::h5closeAll();		
	}else{
		rhdf5::H5close();
	}
	if(!file.exists(file)){stop(paste("Error @addBmat: ", file, " does not exist!", sep=""))};
	if(!isSnapFile(file)){stop(paste("Error @addBmat: ", file, " is not a snap-format file!", sep=""))};
	if(!(bin.size %in% showBinSizes(file))){stop(paste("Error @addBmat: bin.size ", bin.size, " does not exist in ", file, "\n", sep=""))};
	options(scipen=999);
	binChrom = tryCatch(binChrom <- h5read(file, paste("AM", bin.size, "binChrom", sep="/")), error = function(e) {stop(paste("Warning @readaddBmatSnap: 'AM/bin.size/binChrom' not found in ",file))})
	binStart = tryCatch(binStart <- h5read(file, paste("AM", bin.size, "binStart", sep="/")), error = function(e) {stop(paste("Warning @addBmat: 'AM/bin.size/binStart' not found in ",file))})
	if(bin.size == 0){
		binEnd = tryCatch(binEnd <- h5read(file, paste("AM", bin.size, "binEnd", sep="/")), error = function(e) {stop(paste("Warning @addBmat: 'AM/bin.size/binStart' not found in ",file))})		
	}else{
		binEnd = binStart + as.numeric(bin.size) -1;
	}
	if((length(binChrom) == 0) || (length(binStart) == 0)){stop("Error @addBmat: bin is empty! Does not support empty snap file")}
	if(length(binChrom) != length(binStart)){
		stop(paste("Error @addBmat: ", "binChrom and binStart has different length!", sep=""))
	}else{
		nBin = length(binChrom);
	}
	bins = GRanges(binChrom, IRanges(as.numeric(binStart),binEnd), name=paste(paste(binChrom, binStart, sep=":"), binEnd, sep="-"));				
	rm(binChrom, binStart);
	return(bins)
}

readPeaks <- function(file){	
	if(exists('h5closeAll', where='package:rhdf5', mode='function')){
		rhdf5::h5closeAll();		
	}else{
		rhdf5::H5close();
	}
	
	if(!file.exists(file)){stop(paste("Error @addBmat: ", file, " does not exist!", sep=""))};
	if(!isSnapFile(file)){stop(paste("Error @addBmat: ", file, " is not a snap-format file!", sep=""))};
	options(scipen=999);
    binChrom = tryCatch(binChrom <- h5read(file, "PM/peakChrom"), error = function(e) {stop(paste("Warning @addPmat: 'PM/peakChrom' not found in ",file))})
    binStart = tryCatch(binStart <- h5read(file, "PM/peakStart"), error = function(e) {stop(paste("Warning @addPmat: 'PM/peakStart' not found in ",file))})
    binEnd = tryCatch(binEnd <- h5read(file, "PM/peakEnd"), error = function(e) {stop(paste("Warning @addPmat: 'PM/peakEnd' not found in ",file))})

    if((length(binChrom) == 0) || (length(binStart) == 0)){stop("Error @readSnap: bin is empty! Does not support empty snap file")}
    if(length(binChrom) != length(binStart)){
            stop(paste("Error @addPmat: ", "binChrom and binStart has different length!", sep=""))
    }else{
            nBin = length(binChrom);
    }

    bins = GRanges(binChrom, IRanges(as.numeric(binStart),binEnd), name=paste(paste(binChrom, binStart, sep=":"), binEnd, sep="-"));
    rm(binChrom, binStart);
	return(bins)
}

#' @importFrom methods is
#' @import Matrix
addBmatToSnapSingle <- function(obj, file, bin.size=5000){	
	# close the previously opened H5 file
	if(exists('h5closeAll', where='package:rhdf5', mode='function')){
		rhdf5::h5closeAll();		
	}else{
		rhdf5::H5close();
	}
	
	if(missing(obj)){
		stop("obj is missing")
	}else{
		if(!is(obj, "snap")){
			stop("obj is not a snap object")
		}
	}

	if(!file.exists(file)){stop(paste("Error @addBmat: ", file, " does not exist!", sep=""))};
	if(!isSnapFile(file)){stop(paste("Error @addBmat: ", file, " is not a snap-format file!", sep=""))};
	if(!(bin.size %in% showBinSizes(file))){stop(paste("Error @addBmat: bin.size ", bin.size, " does not exist in ", file, "\n", sep=""))};
	obj@bmat = Matrix(0,0,0);
	############################################################################
	barcode = as.character(tryCatch(barcode <- h5read(file, '/BD/name'), error = function(e) {print(paste("Warning @addBmat: 'BD/name' not found in ",file)); return(vector(mode="character", length=0))}));	

	bin.sizeList = showBinSizes(file);
	if(length(bin.sizeList) == 0){stop("Error @addBmat: bin.sizeList is empty! Does not support reading empty snap file")}
	if(!(bin.size %in% bin.sizeList)){stop(paste("Error @addBmat: ", bin.size, " does not exist in bin.sizeList, valid bin.size includes ", toString(bin.sizeList), "\n", sep=""))}
	
	bins = readBins(file, bin.size);
	obj@feature = bins;
	idx = as.numeric(tryCatch(idx <- h5read(file, paste("AM", bin.size, "idx", sep="/")), error = function(e) {stop(paste("Warning @addBmat: 'AM/bin.size/idx' not found in ",file))}));
	idy = as.numeric(tryCatch(idy <- h5read(file, paste("AM", bin.size, "idy", sep="/")), error = function(e) {stop(paste("Warning @addBmat: 'AM/bin.size/idy' not found in ",file))}));
	count = as.numeric(tryCatch(count <- h5read(file, paste("AM", bin.size, "count", sep="/")), error = function(e) {stop(paste("Warning @addBmat: 'AM/bin.size/count' not found in ",file))}));	

	if(!all(sapply(list(length(idx),length(idy),length(count)), function(x) x == length(count)))){stop("Error: idx, idy and count has different length in the snap file")}	
	nBarcode = length(barcode);
	nBin = length(obj@feature);
	M = sparseMatrix(i=idx, j =idy, x=count, dims=c(nBarcode, nBin));
	rownames(M) = barcode;
	obj@bmat = M[match(obj@barcode, rownames(M)),]
	rm(idx, idy, count, M);
	if(exists('h5closeAll', where='package:rhdf5', mode='function')){
		rhdf5::h5closeAll();		
	}else{
		rhdf5::H5close();
	}
	gc();
	return(obj);
}

#' @importFrom methods is
addGmatToSnapSingle <- function(obj, file){
        # close the previously opened H5 file
		if(exists('h5closeAll', where='package:rhdf5', mode='function')){
			rhdf5::h5closeAll();		
		}else{
			rhdf5::H5close();
		}
        # check the input
        if(!is(obj, "snap")){stop(paste("Error @addGmat: ", file, " does not exist!", sep=""))}
        if(!file.exists(file)){stop(paste("Error @addGmat: ", file, " does not exist!", sep=""))};
        if(!isSnapFile(file)){stop(paste("Error @addGmat: ", file, " is not a snap-format file!", sep=""))};

        ############################################################################
		barcode = as.character(tryCatch(barcode <- h5read(file, '/BD/name'), error = function(e) {print(paste("Warning @addBmat: 'BD/name' not found in ",file)); return(vector(mode="character", length=0))}));
        options(scipen=999);
        geneName = tryCatch(geneName <- h5read(file, "GM/name"), error = function(e) {stop(paste("Warning @addGmat: 'GM/name' not found in ",file))})
        if(length(geneName) == 0){
			stop("Error @addGmat: GM is empty")
		}

        idx = as.numeric(tryCatch(idx <- h5read(file, "GM/idx"), error = function(e) {stop(paste("Warning @addGmat: 'GM/idx' not found in ",file))}))
        idy = as.numeric(tryCatch(idy <- h5read(file, "GM/idy"), error = function(e) {stop(paste("Warning @addGmat: 'GM/idy' not found in ",file))}))
        count = as.numeric(tryCatch(count <- h5read(file, "GM/count"), error = function(e) {stop(paste("Warning @addGmat: 'GM/count' not found in ",file))}))

        if(!all(sapply(list(length(idx),length(idy),length(count)), function(x) x == length(count)))){stop("Error: idx, idy and count has different length in the snap GM session")}		
		nBarcode = length(barcode);
		nGene = length(geneName);
		M = sparseMatrix(i=idx, j =idy, x=count, dims=c(nBarcode, nGene));
		rownames(M) = barcode;
		obj@gmat = 	M[match(obj@barcode, rownames(M)),]
		colnames(obj@gmat) = geneName;
		rm(idx, idy, count, M);

		if(exists('h5closeAll', where='package:rhdf5', mode='function')){
			rhdf5::h5closeAll();		
		}else{
			rhdf5::H5close();
		}
		gc();
		return(obj);
}

#' @importFrom methods is
addPmatToSnapSingle <- function(obj, file){
        # close the previously opened H5 file
		if(exists('h5closeAll', where='package:rhdf5', mode='function')){
			rhdf5::h5closeAll();		
		}else{
			rhdf5::H5close();
		}
        # check the input
        if(!is(obj, "snap")){stop(paste("Error @addPmat: ", file, " does not exist!", sep=""))}
        if(!file.exists(file)){stop(paste("Error @addPmat: ", file, " does not exist!", sep=""))};
        if(!isSnapFile(file)){stop(paste("Error @addPmat: ", file, " is not a snap-format file!", sep=""))};

        ############################################################################
		barcode = as.character(tryCatch(barcode <- h5read(file, '/BD/name'), error = function(e) {print(paste("Warning @addBmat: 'BD/name' not found in ",file)); return(vector(mode="character", length=0))}));
        options(scipen=999);
        binChrom = tryCatch(binChrom <- h5read(file, "PM/peakChrom"), error = function(e) {stop(paste("Warning @addPmat: 'PM/peakChrom' not found in ",file))})
        binStart = tryCatch(binStart <- h5read(file, "PM/peakStart"), error = function(e) {stop(paste("Warning @addPmat: 'PM/peakStart' not found in ",file))})
        binEnd = tryCatch(binEnd <- h5read(file, "PM/peakEnd"), error = function(e) {stop(paste("Warning @addPmat: 'PM/peakEnd' not found in ",file))})

        if((length(binChrom) == 0) || (length(binStart) == 0)){stop("Error @readSnap: bin is empty! Does not support empty snap file")}
        if(length(binChrom) != length(binStart)){
                stop(paste("Error @addPmat: ", "binChrom and binStart has different length!", sep=""))
        }else{
                nBin = length(binChrom);
        }

        bins = GRanges(binChrom, IRanges(as.numeric(binStart),binEnd), name=paste(paste(binChrom, binStart, sep=":"), binEnd, sep="-"));
        rm(binChrom, binStart);
        obj@peak = bins;

        idx = as.numeric(tryCatch(idx <- h5read(file, "PM/idx"), error = function(e) {stop(paste("Warning @readSnap: 'PM/idx' not found in ",file))}))
        idy = as.numeric(tryCatch(idy <- h5read(file, "PM/idy"), error = function(e) {stop(paste("Warning @readSnap: 'PM/idy' not found in ",file))}))
        count = as.numeric(tryCatch(count <- h5read(file, "PM/count"), error = function(e) {stop(paste("Warning @readSnap: 'PM/count' not found in ",file))}))

        if(!all(sapply(list(length(idx),length(idy),length(count)), function(x) x == length(count)))){stop("Error: idx, idy and count has different length in the snap file")}
		nBarcode = length(barcode);
		nPeak = length(obj@peak);
		M = sparseMatrix(i=idx, j =idy, x=count, dims=c(nBarcode, nPeak));
		rownames(M) = barcode;
		obj@pmat = 	M[match(obj@barcode, rownames(M)),]
		rm(idx, idy, count, M);
		if(exists('h5closeAll', where='package:rhdf5', mode='function')){
			rhdf5::h5closeAll();		
		}else{
			rhdf5::H5close();
		}
		
		gc();
		return(obj);
}

#' @importFrom methods is
createSnapSingle <- function(file, sample, metaData=TRUE, description=NULL){	
	# close the previously opened H5 file
	if(exists('h5closeAll', where='package:rhdf5', mode='function')){
		rhdf5::h5closeAll();		
	}else{
		rhdf5::H5close();
	}
	# check the input
	if(!file.exists(file)){stop(paste(file, " does not exist!", sep=""))};
	if(!isSnapFile(file)){stop(paste(file, " is not a snap-format file!", sep=""))};
	if(!(is.logical(metaData))){stop(paste("metaData is not a logical variable!", sep=""))};
	
	if(!(is.null(description))){
		if(!is(description, "character")){
			stop("description must be character object")
		}
	}else{
		description=character()
	}
	
	# create an empty snap object
	res = newSnap();
	############################################################################
	barcode = as.character(tryCatch(barcode <- h5read(file, '/BD/name'), error = function(e) {print(paste("Warning @readSnap: 'BD/name' not found in ",file)); return(vector(mode="character", length=0))}));	
	if(metaData){
		metaData = readMetaData(file);
		if(any((metaData$barcode == barcode) == FALSE)){stop(paste("Error @readSnap: meta data does not match with barcode name!", sep=""))};
		res@metaData = metaData;
	}else{
		metaData = data.frame(barcode=barcode, TN=0, UM=0, PP=0, UQ=0, CM=0);
		res@metaData = metaData;
	}
	nBarcode = length(barcode);
	if((x=nBarcode) == 0L){
		stop("barcode is empty! Does not support reading an empty snap file")
	}
	res@barcode = barcode;
	res@des = description;
	res@sample = rep(sample, length(barcode));
	res@file = rep(normalizePath(file), length(res@barcode));
	if(exists('h5closeAll', where='package:rhdf5', mode='function')){
		rhdf5::h5closeAll();		
	}else{
		rhdf5::H5close();
	}
	gc();
	return(res);	
}
r3fang/SnapATAC documentation built on March 29, 2022, 4:33 p.m.