inst/tests/test.virtualArray.R

## The script for testing virtualArray package

#'Testing virtualArray package
#'
#'Use data from GEO to test the
#'virtualArray package, see what
#'should be modified and promoted
#'
#'
#'@author Shixiang Wang
#'@reference \url{https://static-content.springer.com/esm/art%3A10.1186%2F1471-2105-14-75/MediaObjects/12859_2012_5720_MOESM1_ESM.pdf}

require(GEOquery)
# GSE23042 <- getGEO("GSE23402", destdir = "geo_data", GSEMatrix = TRUE, AnnotGPL = TRUE)
#system("mkdir geo_data")
# GSE23042 <- getGEO("GSE23402", destdir = "geo_data")
# GSE26428 <- getGEO("GSE26428", destdir = "geo_data")
#GSE28688 <- getGEO("GSE28688", destdir = "geo_data")

GSE23042 <- getGEO(filename = "inst/tests/geo_data/GSE23402_series_matrix.txt.gz",
                   destdir = "inst/tests/geo_data/")
GSE26428 <- getGEO(filename = "inst/tests/geo_data/GSE26428_series_matrix.txt.gz",
                   destdir = "inst/tests/geo_data/")

GSE23042 <- GSE23042[, 1:24]

summary(exprs(GSE23042)[,1:3])
summary(exprs(GSE26428))

# log2-transform the Affymetrix data
# transform the Agilent data from 20 bit to 16 bit resolution

exprs(GSE23042) <- log2(exprs(GSE23042))
exprs(GSE26428) <- (exprs(GSE26428)/20*16)

summary(exprs(GSE23042)[,1:3])
summary(exprs(GSE26428))

annotation(GSE23042) <- "hgu133plus2"
annotation(GSE26428) <- "hgug4112a"
# biocLite(c("hgu133plus2.db", "hgug4112a.db"))

# require(virtualArray)
# creat an empty object to hold virtual arrays
library(plyr)
library(preprocessCore)
library(reshape2)
library(affyPLM)

my_virtualArrays <- NULL
my_virtualArrays$iPSC_hESC_noBatchEffect <- virtualArrayExpressionSets(removeBatcheffect = "QN")
my_virtualArrays$iPSC_hESC_withBatchEffect <- virtualArrayExpressionSets(removeBatcheffect = F)
pData(my_virtualArrays$iPSC_hESC_noBatchEffect)[5] <-
    c(as.character(pData(GSE23042)[,8]), as.character(pData(GSE26428)[,1]))
pData(my_virtualArrays$iPSC_hESC_noBatchEffect)[6] <-
    c(rep("red",24), rep("blue", 3))

hist(my_virtualArrays$iPSC_hESC_noBatchEffect)
hist(my_virtualArrays$iPSC_hESC_withBatchEffect)
boxplot(my_virtualArrays$iPSC_hESC_noBatchEffect)
boxplot(my_virtualArrays$iPSC_hESC_withBatchEffect)

# A glimpse at the results
dist_iPSC_hESC_noBatchEffect <-
    dist(t(exprs(my_virtualArrays$iPSC_hESC_noBatchEffect)), method = "euclidian")
dist_iPSC_hESC_withBatchEffect <-
    dist(t(exprs(my_virtualArrays$iPSC_hESC_withBatchEffect)), method = "euclidian")

# trees are formed using average distance between clusters
hc_iPSC_hESC_noBatchEffect <-
    hclust(dist_iPSC_hESC_noBatchEffect, method="average")
hc_iPSC_hESC_noBatchEffect$call <- NULL

hc_iPSC_hESC_withBatchEffect <-
    hclust(dist_iPSC_hESC_withBatchEffect, method="average")
hc_iPSC_hESC_withBatchEffect$call <- NULL

# plot in color
virtualArrayHclust(hc_iPSC_hESC_withBatchEffect,
                   lab.col = pData(my_virtualArrays$iPSC_hESC_noBatchEffect)[,6],
                   lab = pData(my_virtualArrays$iPSC_hESC_noBatchEffect)[,5],
                   main = "batch effect NOT removed", cex=0.7,
                   xlab = "sample names")

virtualArrayHclust(hc_iPSC_hESC_noBatchEffect,
                   lab.col = pData(my_virtualArrays$iPSC_hESC_noBatchEffect)[,6],
                   lab = pData(my_virtualArrays$iPSC_hESC_noBatchEffect)[,5],
                   main = "batch effect removed", cex=0.7,
                   xlab = "sample names")

##########################
# run in supervised mode #
##########################

# modify sample_info.txt file in work directory
my_virtualArrays$iPSC_hESC_supervised <- virtualArrayExpressionSets(supervised = TRUE)

dist_iPSC_hESC_supervised <-
    dist(t(exprs(my_virtualArrays$iPSC_hESC_supervised)),
         method = "euclidian")
hc_iPSC_hESC_supervised <- hclust(dist_iPSC_hESC_supervised,
                                  method = "average")
hc_iPSC_hESC_supervised$call <- NULL
virtualArrayHclust(hc_iPSC_hESC_supervised,
                   lab.col = pData(my_virtualArrays$iPSC_hESC_noBatchEffect)[,6],
                   lab = pData(my_virtualArrays$iPSC_hESC_noBatchEffect)[,5],
                   main = "batch effect removed - supervised mode", cex=0.7,
                   xlab = "sample names")

# principle component analysis
pca_supervised <- prcomp(t(exprs(my_virtualArrays$iPSC_hESC_supervised)))
plot(pca_supervised$x, pch=19, cex=.5, col=c(rep("red", 24), rep("blue", 3),pch=17))
legend("topleft", c("GSE23402", "GSE26428"),
       col=c("red", "blue"), pch=19, cex=.8)

pca_unsupervised <- prcomp(t(exprs(my_virtualArrays$iPSC_hESC_noBatchEffect)))
plot(pca_unsupervised$x, pch=19, cex=.5, col=c(rep("red", 24), rep("blue", 3),
                                            pch=17))
legend("topleft", c("GSE23402", "GSE26428"),
       col=c("red", "blue"), pch=19, cex=.8)
View(pca_supervised$x)
View(pca_unsupervised$x)
ShixiangWang/arrayConnector documentation built on May 14, 2019, 6:02 a.m.