###
#
# Teemu Daniel Laajala
# Pipeline from processing the data from
# https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE16560
# Platform: GPL5474 Human 6k Transcriptionally Informative Gene Panel for DASL
# N = 281
#
# PLATFORM
# Status Public on Jan 07, 2008
# Title Human 6k Transcriptionally Informative Gene Panel for DASL
# Technology type oligonucleotide beads
# Distribution custom-commercial
# Organism Homo sapiens
# Manufacturer Illumina Inc.
# Manufacture protocol http://www.illumina.com/pages.ilmn?ID=10
#
# ORIGINAL DATA PROCESSING
#
# For each probe, Red and Green intensities are computed as average of single bead values. The final signal is the sum of the Red and Green intensities. Data were median-centered by subtraction of median (log2 scale) within sample and within DAP, and median set to a target value computed as the overall within DAP median. Data were then normalized using the qspline algorithm (on the log2 intensity scale) with reference distribution computed as the marginal (always within DAP) mean of every gene
#
###
if (!requireNamespace("BiocManager", quietly = TRUE))
install.packages("BiocManager")
BiocManager::install("GEOquery", version = "3.8")
# Required illumina annotations (v3 human)
BiocManager::install("illuminaHumanv3.db", version = "3.8")
# Optionally, relative path to the readily downloaded supplementary files
library(GEOquery)
# Annotation for the probes
#tmp <- read.table(url("https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?view=data&acc=GPL5474&id=8021&db=GeoDb_blob92"), comment="#", sep="\t")
tmp <- read.table("GPL5474.txt", sep="\t", comment="#", header=T, quote="", row.names=1)
# Processed original 2-channel illumina array
Sboner_Original <- getGEO("GSE16560")
Sboner_platform <- getGEO("GPL5474")
Sboner_platform2 <- Sboner_platform@dataTable@table
rownames(Sboner_platform2) <- Sboner_platform2[,1]
Sboner_platform2 <- Sboner_platform2[,-1]
f <- list.files()
f <- f[grep("GSM", f)]
raw <- lapply(f, FUN=function(z) { read.table(f, header=T, sep="\t") } )
tmp <- f[grep("GSM416241", f)]
tmp_fours <- lapply(tmp, FUN=function(z) { read.table(z, sep="\t", header=T) })
illumina_raw <- readIllumina(illuminaAnnotation = "Humanv3")
# Bead array analysis pipeline example:
# https://www.bioconductor.org/packages/devel/data/experiment/vignettes/BeadArrayUseCases/inst/doc/BeadArrayUseCases.pdf
# Visualizing raw green channel intensities
boxplot(test, transFun=logGreenChannelTransform, col="green")
# Red
boxplot(test, transFun=logRedChannelTransform, col="red")
# 2d-plot example
imageplot(test, array=1, high="darkgreen", low="lightgreen")
# Outliers appear rather randomly scattered spatially
#> invisible(lapply(1:9, FUN=function(z) { outlierplot(test, array=z) }))
#689 outliers found on the section
#697 outliers found on the section
#752 outliers found on the section
#576 outliers found on the section
#1161 outliers found on the section
#706 outliers found on the section
#1107 outliers found on the section
#563 outliers found on the section
#872 outliers found on the section
# Test plotting of control/housekeeping
# blima-package for bead level normalization: https://www.bioconductor.org/packages/release/bioc/vignettes/blima/inst/doc/blima.pdf
# MBCB (Model-based Background Correction for Beadarray)
# https://www.ncbi.nlm.nih.gov/pmc/articles/PMC2654805/
# https://www.bioconductor.org/packages/devel/bioc/html/MBCB.html
#> length(unique(getBeadData(test, what="ProbeID")))
#[1] 1624
# Box plotting the raw bead intensities
# Raw data without normalization
par(mfrow=c(2,1))
boxplot(test, transFun = logGreenChannelTransform, color="green")
boxplot(test, transFun = logRedChannelTransform, color="red")
# Systematic differences between the two color channels
## Re-do; fetch raw data for Sboner et al. directly from GEO
# Approx 1 GB in size
library("GEOquery")
Sboner_raw <- getGEOSuppFiles("GSE16560")
# Untar the package
untar(tarfile=rownames(Sboner_raw))
# Gunzip the .gz
lapply(list.files()[grep(".gz", list.files())], FUN=gunzip)
# Use 'beadarray' to read the raw two color intensities
# i.e. following http://www.bioconductor.org/packages//data/experiment/vignettes/BeadArrayUseCases/inst/doc/BeadArrayUseCases.pdf
if (!requireNamespace("BiocManager", quietly = TRUE))
install.packages("BiocManager")
if(!require("beadarray")){
BiocManager::install("beadarray", version = "3.8")
library("beadarray")
}
#Sboner_bead <- readIllumina()
# Each GSM-patient has 4 DAC-files, and have to be parsed separately
fileNames <- list.files()[grep(".txt", list.files())]
# Extract GSM-portion of the file name
GSMs <- unlist(lapply(fileNames, FUN=function(z) { strsplit(z, "_")[[1]][1] }))
#> all(table(GSMs)==4)
#[1] TRUE
#
## All GSM-patients have four DACs
Sboner_raw <- readIllumina(illuminaAnnotation = "Humanv3")
#> head(Sboner_raw[[1]])
# ProbeID GrnX GrnY Grn RedX RedY Red
#[1,] 2 1514.0130 1096.2180 207 1515.1320 1110.7690 99
#[2,] 2 831.3730 455.8590 150 833.0906 470.2326 47
#[3,] 2 768.7205 969.8980 139 770.4794 984.2288 54
#[4,] 2 1327.2620 579.4638 178 1328.5030 594.0328 129
#[5,] 2 1395.5850 737.8775 190 1396.6320 752.3868 69
#[6,] 2 1143.9980 1375.1310 194 1145.3350 1389.4260 88
#
## ProbeID == 2 appears to be control probe, not listed in the platform
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.