R/utils.r

Defines functions .preprocessEB .summarize .combineSummaryFiles .readFileAsList

Documented in .combineSummaryFiles

.readFileAsList <- function(filePath=NULL){
	retList <- NULL
	con = file(filePath,"r")
	while(TRUE){
		line = readLines(con,n=1)
		if(length(line)==0){
			break
		}
		retList <- c(retList,line)
	}
	close(con)
	return(retList)
}


#' @title Combine Summary Files
#' @description Recursively seach for the summary CSV file of each processed dataset and combines
#'		them into a single dataframe.
#' @details User should define via the wd patameter the root of recursive search, which should usually
#' 		be the dir in which run() function was involked. This method then looks for any file with
#'		pattern defined by pattern parameter, reads each csv file and combine their data into a single
#'		data frame.
#' @param wd root dir of search
#' @param pattern pattern to match. By default ".summary.csv" is the pattern used by run() function
#' @return returns the combined data frame.
#' @export
.combineSummaryFiles <- function(wd=NULL,pattern=".summary.csv$"){
	if(is.null(wd)){
		path=getwd()
	}

	files <- list.files(path=path,pattern=pattern,recursive=TRUE,full.names=TRUE)	
	results <- lapply(files, read.csv)
	ret <- Reduce(function(x, y) merge(x, y, all=TRUE), results)
	ret$X=NULL
	return(ret)
}

.summarize <- function(result=NULL, target=NULL, header=NULL){
	if(is.null(result)|is.null(target)|is.null(header)){
		stop("Invalid argument to summarize: null parameter")
	}
	
	result <- data.frame(result)
	targetIndex = NaN
	if("ID" %in% colnames(result)){
		if(any(grepl(target,result$ID))){
			targetIndex <- grep(target,result$ID)
		}
	}else if("GENE.SET" %in% colnames(result)){
		if(any(grepl(target,result$GENE.SET))){
			targetIndex <- grep(target,result$GENE.SET)
		}
	}else{
		stop("Error in summarize: column names")
	}
	
	P = Inf
	R = Inf
	FDR.BH = Inf

	if(!is.nan(targetIndex)){
		P <- result[targetIndex,]$PVAL
		R <- max(which(result$PVAL == P))
	
		FDR.BH <- result[targetIndex,]$FDR.BH
	}

	N <- nrow(result)
	ret <- list(R=R, N=N, P=P, FDR.BH=FDR.BH)

	names(ret) <- c(paste(header,"R",sep="."),paste(header,"N",sep="."),paste(header,"P",sep="."),paste(header,"FDR.BH",sep="."))
	return(ret)
}

.preprocessEB <- function(dataset=NULL, plots=FALSE, plots.dir=NULL){
	# name of dataset, e.g. "GSE1145"
	name <- dataset@experimentData@name
	
	allSE <- probe2gene(dataset)

	beforeNorm <- assay(allSE)
	allSE <- normalize(allSE)
	afterNorm <- assay(allSE)

	allSE$GROUP <- ifelse(allSE$Group=="d",1,0)
	allSE <- deAna(allSE, padj.method="BH")
	
	#Plots
	if(plots){
		if(is.null(plots.dir)){
			png(paste(name,".norm.png",sep=""))
			par(mfrow=c(1,2))
			boxplot(beforeNorm)
			boxplot(afterNorm)
			dev.off()
			png(paste(name,".DE.png",sep=""))
			par(mfrow=c(1,2))
			pdistr(rowData(allSE)$PVAL)
			volcano(rowData(allSE)$FC, rowData(allSE)$ADJ.PVAL)
			dev.off()
		}else{
			png(paste(plots.dir,"/",name,".norm.png",sep=""))
			par(mfrow=c(1,2))
			boxplot(beforeNorm)
			boxplot(afterNorm)
			dev.off()
			png(paste(plots.dir,"/",name,".DE.png",sep=""))
			par(mfrow=c(1,2))
			pdistr(rowData(allSE)$PVAL)
			volcano(rowData(allSE)$FC, rowData(allSE)$ADJ.PVAL)
			dev.off()
		}
	}
	
	return(allSE)
}
allenaigit/spiapcc.demo documentation built on April 16, 2020, 11:53 a.m.