R/diagnostics_BatchEffects.R

####
#
# Teemu Daniel Laajala
# Examine GEX profiles of re-processed files through raw files obtained from GEO
# PCA, etc. to detect potential batch effects and/or signal differences
#
####

# Change to working directory with the processed data
load("GEX_Barwick.RData")
load("GEX_Sun.RData")
load("GEX_Taylor.RData")

# Check individual distributions
x11()
par(mfrow=c(1,3))
library(oligo)
oligo::boxplot(GEX_Barwick, main="Barwick", ylim=c(0,20))
oligo::boxplot(GEX_Sun, main="Sun", ylim=c(0,20))
oligo::boxplot(GEX_Taylor, main="Taylor", ylim=c(0,20))

#> dim(GEX_Sun)
#Features  Samples 
#   22283       79 
#> dim(GEX_Barwick)
#Features  Samples 
#     497      139 
#> dim(GEX_Taylor)
#Features  Samples 
#   22011      370


# Check a PCA between Taylor and Sun
TaylorSunGenes <- intersect(rownames(GEX_Taylor), rownames(GEX_Sun))

#> length(TaylorSunGenes)
#[1] 3812

# Surpringly small number?

GEX_Taylor_Sun <- cbind(exprs(GEX_Taylor)[TaylorSunGenes,], exprs(GEX_Sun)[TaylorSunGenes,])

# PCA
pc <- prcomp(t(GEX_Taylor_Sun))

x11()
plot(pc$x[,1:2], pch=16, col=c(rep("red", times=ncol(GEX_Taylor)), rep("blue", times=ncol(GEX_Sun))), main="GEX re-processed")
legend("topright", col=c("red", "blue"), pch=16, c("Taylor", "Sun"))
Syksy/curatedTools documentation built on May 27, 2019, 9:55 a.m.