Nothing
###########################################################################
# Replication test
#
# Description:
# This test verifies that aroma.affymetrix can reproduce the estimates
# of the MAT (Model-based Analysis of Tiling arrays) algorithm.
#
# Author: Mark Robinson and Henrik Bengtsson
# Created: 2008-12-09
# Last modified: 2012-09-02
###########################################################################
library("aroma.affymetrix")
library("matrixStats"); # colMedians()
library("limma"); # makeContrast()
verbose <- Arguments$getVerbose(-20, timestamp=TRUE)
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Data set
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
dataSet <- "GSE24546"
tags <- "testset"
chipType <- "Hs_PromPR_v02"
sampleNamesMap <- c(
GSM605951="Prec1_MeDNA_Input1",
GSM605952="Prec1_MeDNA_IP2",
GSM605953="Prec1_MeDNA_IP1"
)
cdf <- AffymetrixCdfFile$byChipType(chipType)
print(cdf)
csR <- AffymetrixCelSet$byName(dataSet, tags=tags, cdf=cdf)
print(csR)
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Normalize the data using the MAT model
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
mn <- MatNormalization(csR)
csM <- process(mn, verbose=more(verbose, 3))
print(csM)
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Convert data set such that it maps to the "unique" CDF
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
csU <- convertToUnique(csM, verbose=verbose)
print(csU)
# Rename
setFullNamesTranslator(csU, function(names, ...) { sampleNamesMap[names] })
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# MatSmoothing: Design #1
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
sampleNames <- getNames(csU)
design1 <- makeContrasts(Prec1_MeDNA_IP1-Prec1_MeDNA_Input1, levels=sampleNames)
colnames(design1) <- "Prec1_IP1_minus_Input"
print(design1)
ms1 <- MatSmoothing(csU, design=design1, probeWindow=600, tag="singleIP")
csMS1 <- process(ms1, units=NULL, verbose=verbose)
print(csMS1)
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# MatSmoothing: Design #2
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
sampleNames <- getNames(csU)
design2 <- makeContrasts(Prec1_MeDNA_IP1 + Prec1_MeDNA_IP2-Prec1_MeDNA_Input1, levels=sampleNames)
colnames(design2) <- "Prec1_IPs_minus_Input"
ms2 <- MatSmoothing(csU, design=design2, probeWindow=800, tag="multipleIP")
csMS2 <- process(ms2, units=NULL,verbose=verbose)
print(csMS2)
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Compare to precomputed estimates from external MAT software
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Compare common units with prefix "chr1F".
cdfU <- getCdf(csU)
units <- indexOf(cdfU, "^chr1F")
cells <- getCellIndices(cdfU, units=units, stratifyBy="pm",
unlist=TRUE, useNames=FALSE)
# Get the chromosomal positions of these cells
acp <- AromaCellPositionFile$byChipType(getChipType(cdfU))
pos <- acp[cells,2,drop=TRUE]
# Order cells by chromsomal position
o <- order(pos)
pos <- pos[o]
cells <- cells[o]
# Extract the corresponding signals
Y1 <- extractMatrix(csMS1, cells=cells, verbose=verbose)
Y2 <- extractMatrix(csMS2, cells=cells, verbose=verbose)
Y <- cbind(Y1, Y2)
# Compare on the log2 scale
Y <- log2(Y)
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Loat external signals generated by the MAT software
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# files with native-MAT summarized signals
summaryFiles <- c("Prec1_MeDNA_600_IP1-Input.bar.txt",
"Prec1_MeDNA_800_IPs-Input.bar.txt")
barList <- lapply(summaryFiles, FUN=function(filename) {
columnNames <- c("chromosome", "position", "signal")
TabularTextFile(filename, path=getPath(csR), columnNames=columnNames)
})
print(barList)
YMAT <- sapply(barList, FUN=function(bar) {
columnNames <- c("chromosome", "position", "signal")
data <- readDataFrame(bar, colClasses=colClasses("cin"), nrow=435000)
# Extract the subset available in the aroma.affymetrix estimate
data <- subset(data, chromosome == "chr1" & position %in% pos)
# Order by position
o <- order(data$position)
data <- data[o,,drop=FALSE]
# Extract the external estimates
y <- data$signal
# External score that are exactly zero, they are NAs
y[y == 0] <- NA
y
})
# Sanity check
stopifnot(identical(dim(YMAT), dim(Y)))
colnames(YMAT) <- colnames(Y)
# Graphical comparison
designNames <- colnames(Y)
for (key in designNames) {
toPNG(getFullName(csU), tags=c(key, "MatSmoothing_vs_MAT"), {
xlab <- expression(log[2](y[MAT]))
ylab <- expression(log[2](y))
plot(YMAT[,key], Y[,key], pch=".", xlab=xlab, ylab=ylab)
abline(a=0, b=1, col="red", lwd=2, lty=3)
stext(side=3, pos=0, key)
stext(side=3, pos=1, "Chr01")
stext(side=4, pos=1, sprintf("n=%d", nrow(Y)))
stext(side=4, pos=0, getFullName(csU))
})
}
# Numerical comparison
D2 <- (Y - YMAT)^2
avgDiff <- colMedians(D2, na.rm=TRUE)
names(avgDiff) <- colnames(D2)
cat("Average differences:\n")
print(avgDiff)
# Sanity check
stopifnot(all(avgDiff < 0.003))
print(sessionInfo())
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.