R/virtualArrayExpressionSets.R

virtualArrayExpressionSets <-
function(all_expression_sets=FALSE,identifier="SYMBOL",covars=3,collapse_fun=median,removeBatcheffect="EB",sampleinfo=FALSE,supervised=FALSE, ...){

# load packages
# require(affyPLM)
# require(affy)
# require(plyr)
# require(reshape2)
# require(preprocessCore)
# require(org.Hs.eg.db)

if(class(try(utils::win.version(),silent=T))=="try-error")
	{require(parallel)
	lapply=mclapply}
# create list with loaded ExpressionSets
 if(class(all_expression_sets) != "character")
	{
	all_expression_sets <- sapply(X=ls(envir=.GlobalEnv),FUN=function(x){class(eval(as.symbol(x)))})
	all_expression_sets <- grep(all_expression_sets,pattern="ExpressionSet",value=TRUE)
	all_expression_sets <- names(grep(all_expression_sets,pattern="ExpressionSet",value=TRUE))
	}
all_expression_sets <- sapply(X=all_expression_sets,FUN=function(x){as.symbol(x)},USE.NAMES=TRUE)
# build new ExpressionSets with valid fData and remap features to selected identiifier in both exprs and fData, return list of ExpressionSets
expsts <- sapply(all_expression_sets,virtualArrayBuildfData,collapse_fun=collapse_fun,identifier=identifier)
# build data.frames of the exprs and add features as column 1
dtfrms <- sapply(expsts,virtualArrayBuildExprs,simplify=FALSE)
cat("\nMatching and merging all data sets. This could take some time...\n")
#merged <- na.omit(merge_recurse(dtfrms,by="identifier"))
merged <- virtualArrayMergeRecurse(dtfrms,by="identifier",incomparables=NA)
rownames(merged) <- merged[,1]
merged <- merged[2:dim(merged)[2]]
cat("\nSize of expression matrix of whole dataset:",dim(merged)[1],"rows and",dim(merged)[2],"columns\n")
merged <- new("ExpressionSet",exprs=as.matrix(merged))

merged_norm <- normalize.ExpressionSet.quantiles(merged)
sample_info <- virtualArrayBuildSampleInfo(all_expression_sets)
sample_info <- as.data.frame(sample_info)
#print(sample_info[,1])
#print(sampleNames(merged_norm))
sampleNames(merged_norm) <- sample_info[,1]
#sample_info[1] <- sampleNames(merged_norm)
#sample_info[2] <- sampleNames(merged_norm)
write.table(sample_info,file="sample_info.txt",sep="\t",row.names=FALSE)
cat("\nThe file 'sample_info.txt' has been written to your current working directory. Please modify it appropriately!\n")
if(supervised==FALSE){
	# non-supervised => use only covars=3 and standard sampleinfo
	covars=3
	sample_info <- read.table(file="sample_info.txt",sep="\t",header=TRUE)
	cat("\nUsing only 'Batch' column as information for batch effect removal.\n")}
if(supervised==TRUE){
	# supervised mode => use covars="all" and extended sampleinfo
	covars="all"
	if(sampleinfo==FALSE){
		answer <- readline(prompt="Did you modify sample_info.txt? [y] or [n] ")
		sample_info <- read.table(file="sample_info.txt",sep="\t",header=TRUE)
		cat("\nUsing 'Batch' and 'Covariate.1' columns as information for batch effect removal.\n")}
	if(sampleinfo!=FALSE){
		sample_info <- read.table(file=sampleinfo,sep="\t",header=TRUE)}
	cat("\nUsing 'Batch' and 'Covariate.1' columns as information for batch effect removal.\n")}

merged_combat <- merged_norm
#print(sample_info)
if (removeBatcheffect == "EB") {exprs(merged_combat) <- virtualArrayComBat(expression_xls=exprs(merged_combat),sample_info_file=sample_info,covariates=covars,write=FALSE,prior.plots=FALSE,...)}
rownames(sample_info) <- sample_info[,1]
pData(merged_combat) <- as.data.frame(sample_info)
annotation(merged_combat) <- "org.Hs.eg"
if (removeBatcheffect == "QD") {merged_combat <- normalize.ExpressionSet.qd(merged_combat,...)}
if (removeBatcheffect == "GQ") {merged_combat <- normalize.ExpressionSet.gq(merged_combat,...)}
if (removeBatcheffect == "NORDI") {merged_combat <- normalize.ExpressionSet.nordi(merged_combat,...)}
if (removeBatcheffect == "MRS") {merged_combat <- normalize.ExpressionSet.mrs(merged_combat,...)}
if (removeBatcheffect == "MC") {merged_combat <- normalize.ExpressionSet.mc(merged_combat,...)}
merged_combat_norm <- normalize.ExpressionSet.quantiles(merged_combat)
#require(org.Hs.eg.db)
#fData(virtualArray$combo_combat_norm.exst) <- virtualArray$annot_longest[match(x=rownames(fData(virtualArray$combo_combat_norm.exst)),table=virtualArray$annot_longest$Gene.Symbol),]
varMetadata(merged_combat_norm)[,1] <- c("name of the array in the data set; in most cases this corresponds to the file name","further description of the array, e.g the biological source","this designates the array platform or chip type","additional information to group samples together so that biological information isn't lost during batch removal")
return(merged_combat_norm)
}
ShixiangWang/arrayConnector documentation built on May 14, 2019, 6:02 a.m.