R/ncdfFlowSet-rbind-methods.R

#' combine multiple ncdfFlowSet objects into one
#' 
#' Similar to \code{\link[=rbind2,flowSet,flowSet-method]{flowCore:rbind2}}. 
#' But one needs to first construct a ncdfFlowList and then apply rbind2 to it instead of merging them pairwise 
#' 
#' @param x \code{ncdfFlowList}
#' @param ncdfFile \code{character} see details in \link{read.ncdfFlowset}
#'                                  when all the ncdfFlowSets shared the same cdf file, by supplying this argument, it will use the existing cdf and avoid writing to it unneccessarily.
#' @param samples \code{character} the vector of sample names which determine the physical sample storage order in original cdf file. 
#'                                  Default is NULL, which derives from the given ncdfFlowSet objects.
#' @param dim \code{integer} see details in \link{read.ncdfFlowset}.
#' @param compress \code{integer} see details in \link{read.ncdfFlowset}.
#' @return a new ncdfFlowSet with a new cdf file that combines multiple raw datasets.
#' 
#' @rdname rbind2-method
#' @importFrom Biobase copyEnv
#' @export 
#' @examples
#' library(ncdfFlow)
#' data(GvHD)
#' 
#' nc1 <- ncdfFlowSet(GvHD[1:2])
#' nc2 <- ncdfFlowSet(GvHD[3:4])
#' nc3 <- ncdfFlowSet(GvHD[5:6])

#' ncfslist <- ncdfFlowList(list(nc1,nc2,nc3))
#' nc4 <- rbind2(ncfslist)
#' nc4
setMethod("rbind2",
		signature=c("ncdfFlowList"),
		definition=function(x, ncdfFile=tempfile(pattern = "ncfs"), dim = 2, compress = 0, samples = NULL)
		{
			
            
			nclist <- x
            if(is.null(samples)){
              if(file.exists(ncdfFile)){
                stop("'samples' must be provided in order to ensure it is consistent with the existing ncdfFile :", ncdfFile) 
              }else #sample order can be derived from given ncdfFlowSets when new cdf file is to be created
              {
                samples <- unlist(lapply(nclist, sampleNames, level = 1),use.names=F)
              }
            }
              
			isDuplicated <- duplicated(samples)
			if(any(isDuplicated))
				stop("duplicated samples:",samples[isDuplicated])
			
			
			pdlist <- lapply(nclist,phenoData, level = 1)
			
			
			varLabelList<-lapply(pdlist,varLabels)
			
			isDuplicated<-duplicated(varLabelList)
			isSame<-isDuplicated[-1]
			if(!all(isSame))
				stop("The phenoData of the ncdfFlowSets don't match.",
						call.=FALSE)
			
			collist <- lapply(nclist,colnames, level = 1)
			isDuplicated<-duplicated(collist)
			isSame<-isDuplicated[-1]
			if(!all(isSame))
				stop("The colnames of the ncdfFlowSets don't match.",
						call.=FALSE)
			
			#new ncdf object 
			#make sure we put the new nc file in the same path as the old ncfile

           
              
            
  			
  			if (!length(grep(".", ncdfFile, fixed = TRUE)))  
              ncdfFile <- paste(ncdfFile, "nc", sep = ".")
  			
  			
  			##init the environment slots to be able to pass the validity check of flowSet object
  			frameEnv <- new.env(hash=TRUE, parent=emptyenv())
  			indiceEnv<-new.env(hash=TRUE, parent=emptyenv())
  			lapply(nclist,function(ncfs){
  								oldFrames<-ncfs@frames
  								copyEnv(oldFrames,frameEnv)
  				#				assign(i, nclist[[1]]@frames[[i]], env=env)
  									
  								lapply(ls(oldFrames),function(curSample)assign(curSample, NA, env=indiceEnv))
  								
  							}, level = 1)
  #			browser()
  			pdataList<-lapply(pdlist,pData)
  			newPd<-AnnotatedDataFrame(do.call(base::rbind,pdataList))
  			
  			
  			#create ncdf ncdf object 
  			ncfs<-new("ncdfFlowSet"
  					,file = ncdfFile
  					,frames = frameEnv
  					,maxEvents=max(unlist(lapply(nclist,function(ncfs)ncfs@maxEvents, level = 1)))
  					,flowSetId = flowCore:::guid()
  					,phenoData=newPd
  					,indices=indiceEnv
  					,origSampleVector = samples##need to assign the sample vector before add the actual frame
  					,origColnames=collist[[1]]
  			)
  			
            if(file.exists(ncdfFile)){
              #it is useful to skip creating cdf file and only deal with in-memory merging
              #when the cdf file is already generated by the previous run
              message("Using the existing nc file: ",ncdfFile)
              
            }else{
    			#create new ncdf file		
    			#NOTE: rbind2 will not save the metadata in the new file..
                dim <- 2
    			msgCreate <- try(createFile(ncdfFile, as.integer(ncfs@maxEvents), 
    							as.integer(length(colnames(ncfs))), as.integer(length(ncfs)),
    							as.integer(dim), as.integer(compress),is_libver_earliest = isTRUE(getOption("h5Flow_is_libver_earliest"))),silent = TRUE)
    			if(!msgCreate)stop(msgCreate)
    			
    			
    			
    			#add frames to env and ncdf file
    			for(nc in nclist@data)
    			{
    			
    				for(curSample in sampleNames(nc))
    				{	
    					ncfs[[curSample, compress = compress]] <- nc[[curSample]]
    				}	
    			}
	
          }				
			
			return(ncfs)
		})

Try the ncdfFlow package in your browser

Any scripts or data that you put into this service are public.

ncdfFlow documentation built on Nov. 8, 2020, 7:52 p.m.