arrayConnector: Connect high throughput Array data from different platforms

virtualArrayExpressionSets <-
function(all_expression_sets=FALSE,identifier="SYMBOL",covars=3,collapse_fun=median,removeBatcheffect="EB",sampleinfo=FALSE,supervised=FALSE, ...){

# load packages
# require(affyPLM)
# require(affy)
# require(plyr)
# require(reshape2)
# require(preprocessCore)
# require(org.Hs.eg.db)

if(class(try(utils::win.version(),silent=T))=="try-error")
	{require(parallel)
	lapply=mclapply}
# create list with loaded ExpressionSets
 if(class(all_expression_sets) != "character")
	{
	all_expression_sets <- sapply(X=ls(envir=.GlobalEnv),FUN=function(x){class(eval(as.symbol(x)))})
	all_expression_sets <- grep(all_expression_sets,pattern="ExpressionSet",value=TRUE)
	all_expression_sets <- names(grep(all_expression_sets,pattern="ExpressionSet",value=TRUE))
	}
all_expression_sets <- sapply(X=all_expression_sets,FUN=function(x){as.symbol(x)},USE.NAMES=TRUE)
# build new ExpressionSets with valid fData and remap features to selected identiifier in both exprs and fData, return list of ExpressionSets
expsts <- sapply(all_expression_sets,virtualArrayBuildfData,collapse_fun=collapse_fun,identifier=identifier)
# build data.frames of the exprs and add features as column 1
dtfrms <- sapply(expsts,virtualArrayBuildExprs,simplify=FALSE)
cat("\nMatching and merging all data sets. This could take some time...\n")
#merged <- na.omit(merge_recurse(dtfrms,by="identifier"))
merged <- virtualArrayMergeRecurse(dtfrms,by="identifier",incomparables=NA)
rownames(merged) <- merged[,1]
merged <- merged[2:dim(merged)[2]]
cat("\nSize of expression matrix of whole dataset:",dim(merged)[1],"rows and",dim(merged)[2],"columns\n")
merged <- new("ExpressionSet",exprs=as.matrix(merged))

merged_norm <- normalize.ExpressionSet.quantiles(merged)
sample_info <- virtualArrayBuildSampleInfo(all_expression_sets)
sample_info <- as.data.frame(sample_info)
#print(sample_info[,1])
#print(sampleNames(merged_norm))
sampleNames(merged_norm) <- sample_info[,1]
#sample_info[1] <- sampleNames(merged_norm)
#sample_info[2] <- sampleNames(merged_norm)
write.table(sample_info,file="sample_info.txt",sep="\t",row.names=FALSE)
cat("\nThe file 'sample_info.txt' has been written to your current working directory. Please modify it appropriately!\n")
if(supervised==FALSE){
	# non-supervised => use only covars=3 and standard sampleinfo
	covars=3
	sample_info <- read.table(file="sample_info.txt",sep="\t",header=TRUE)
	cat("\nUsing only 'Batch' column as information for batch effect removal.\n")}
if(supervised==TRUE){
	# supervised mode => use covars="all" and extended sampleinfo
	covars="all"
	if(sampleinfo==FALSE){
		answer <- readline(prompt="Did you modify sample_info.txt? [y] or [n] ")
		sample_info <- read.table(file="sample_info.txt",sep="\t",header=TRUE)
		cat("\nUsing 'Batch' and 'Covariate.1' columns as information for batch effect removal.\n")}
	if(sampleinfo!=FALSE){
		sample_info <- read.table(file=sampleinfo,sep="\t",header=TRUE)}
	cat("\nUsing 'Batch' and 'Covariate.1' columns as information for batch effect removal.\n")}

merged_combat <- merged_norm
#print(sample_info)
if (removeBatcheffect == "EB") {exprs(merged_combat) <- virtualArrayComBat(expression_xls=exprs(merged_combat),sample_info_file=sample_info,covariates=covars,write=FALSE,prior.plots=FALSE,...)}
rownames(sample_info) <- sample_info[,1]
pData(merged_combat) <- as.data.frame(sample_info)
annotation(merged_combat) <- "org.Hs.eg"
if (removeBatcheffect == "QD") {merged_combat <- normalize.ExpressionSet.qd(merged_combat,...)}
if (removeBatcheffect == "GQ") {merged_combat <- normalize.ExpressionSet.gq(merged_combat,...)}
if (removeBatcheffect == "NORDI") {merged_combat <- normalize.ExpressionSet.nordi(merged_combat,...)}
if (removeBatcheffect == "MRS") {merged_combat <- normalize.ExpressionSet.mrs(merged_combat,...)}
if (removeBatcheffect == "MC") {merged_combat <- normalize.ExpressionSet.mc(merged_combat,...)}
merged_combat_norm <- normalize.ExpressionSet.quantiles(merged_combat)
#require(org.Hs.eg.db)
#fData(virtualArray$combo_combat_norm.exst) <- virtualArray$annot_longest[match(x=rownames(fData(virtualArray$combo_combat_norm.exst)),table=virtualArray$annot_longest$Gene.Symbol),]
varMetadata(merged_combat_norm)[,1] <- c("name of the array in the data set; in most cases this corresponds to the file name","further description of the array, e.g the biological source","this designates the array platform or chip type","additional information to group samples together so that biological information isn't lost during batch removal")
return(merged_combat_norm)
}

ShixiangWang/arrayConnector documentation built on May 14, 2019, 6:02 a.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

ShixiangWang/arrayConnector
Connect high throughput Array data from different platforms

R/virtualArrayExpressionSets.R
In ShixiangWang/arrayConnector: Connect high throughput Array data from different platforms

R Package Documentation

Browse R Packages

We want your feedback!

ShixiangWang/arrayConnector Connect high throughput Array data from different platforms

R/virtualArrayExpressionSets.R In ShixiangWang/arrayConnector: Connect high throughput Array data from different platforms

R Package Documentation

Browse R Packages

We want your feedback!

ShixiangWang/arrayConnector
Connect high throughput Array data from different platforms

R/virtualArrayExpressionSets.R
In ShixiangWang/arrayConnector: Connect high throughput Array data from different platforms