21.MatSmoothing_vs_MAT.R
In aroma.affymetrix: Analysis of Large Affymetrix Microarray Data Sets

###########################################################################
# Replication test
#
# Description:
# This test verifies that aroma.affymetrix can reproduce the estimates
# of the MAT (Model-based Analysis of Tiling arrays) algorithm.
#
# Author: Mark Robinson and Henrik Bengtsson
# Created: 2008-12-09
# Last modified: 2012-09-02
###########################################################################
library("aroma.affymetrix")
library("matrixStats"); # colMedians()
library("limma");  # makeContrast()
verbose <- Arguments$getVerbose(-20, timestamp=TRUE)


# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Data set
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
dataSet <- "GSE24546"
tags <- "testset"
chipType <- "Hs_PromPR_v02"
sampleNamesMap <- c(
  GSM605951="Prec1_MeDNA_Input1",
  GSM605952="Prec1_MeDNA_IP2",
  GSM605953="Prec1_MeDNA_IP1"
)

cdf <- AffymetrixCdfFile$byChipType(chipType)
print(cdf)

csR <- AffymetrixCelSet$byName(dataSet, tags=tags, cdf=cdf)
print(csR)


# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Normalize the data using the MAT model
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
mn <- MatNormalization(csR)
csM <- process(mn, verbose=more(verbose, 3))
print(csM)


# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Convert data set such that it maps to the "unique" CDF
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
csU <- convertToUnique(csM, verbose=verbose)
print(csU)

# Rename
setFullNamesTranslator(csU, function(names, ...) { sampleNamesMap[names] })


# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# MatSmoothing: Design #1
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
sampleNames <- getNames(csU)

design1 <- makeContrasts(Prec1_MeDNA_IP1-Prec1_MeDNA_Input1, levels=sampleNames)
colnames(design1) <- "Prec1_IP1_minus_Input"
print(design1)

ms1 <- MatSmoothing(csU, design=design1, probeWindow=600, tag="singleIP")
csMS1 <- process(ms1, units=NULL, verbose=verbose)
print(csMS1)


# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# MatSmoothing: Design #2
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
sampleNames <- getNames(csU)

design2 <- makeContrasts(Prec1_MeDNA_IP1 + Prec1_MeDNA_IP2-Prec1_MeDNA_Input1, levels=sampleNames)
colnames(design2) <- "Prec1_IPs_minus_Input"

ms2 <- MatSmoothing(csU, design=design2, probeWindow=800, tag="multipleIP")
csMS2 <- process(ms2, units=NULL,verbose=verbose)
print(csMS2)


# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Compare to precomputed estimates from external MAT software
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Compare common units with prefix "chr1F".
cdfU <- getCdf(csU)
units <- indexOf(cdfU, "^chr1F")
cells <- getCellIndices(cdfU, units=units, stratifyBy="pm",
                        unlist=TRUE, useNames=FALSE)

# Get the chromosomal positions of these cells
acp <- AromaCellPositionFile$byChipType(getChipType(cdfU))
pos <- acp[cells,2,drop=TRUE]

# Order cells by chromsomal position
o <- order(pos)
pos <- pos[o]
cells <- cells[o]

# Extract the corresponding signals
Y1 <- extractMatrix(csMS1, cells=cells, verbose=verbose)
Y2 <- extractMatrix(csMS2, cells=cells, verbose=verbose)
Y <- cbind(Y1, Y2)
# Compare on the log2 scale
Y <- log2(Y)


# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Loat external signals generated by the MAT software
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# files with native-MAT summarized signals
summaryFiles <- c("Prec1_MeDNA_600_IP1-Input.bar.txt",
                  "Prec1_MeDNA_800_IPs-Input.bar.txt")
barList <- lapply(summaryFiles, FUN=function(filename) {
  columnNames <- c("chromosome", "position", "signal")
  TabularTextFile(filename, path=getPath(csR), columnNames=columnNames)
})
print(barList)

YMAT <- sapply(barList, FUN=function(bar) {
  columnNames <- c("chromosome", "position", "signal")
  data <- readDataFrame(bar, colClasses=colClasses("cin"), nrow=435000)

  # Extract the subset available in the aroma.affymetrix estimate
  data <- subset(data, chromosome == "chr1" & position %in% pos)

  # Order by position
  o <- order(data$position)
  data <- data[o,,drop=FALSE]

  # Extract the external estimates
  y <- data$signal

  # External score that are exactly zero, they are NAs
  y[y == 0] <- NA

  y
})


# Sanity check
stopifnot(identical(dim(YMAT), dim(Y)))
colnames(YMAT) <- colnames(Y)

# Graphical comparison
designNames <- colnames(Y)
for (key in designNames) {
  toPNG(getFullName(csU), tags=c(key, "MatSmoothing_vs_MAT"), {
    xlab <- expression(log[2](y[MAT]))
    ylab <- expression(log[2](y))
    plot(YMAT[,key], Y[,key], pch=".", xlab=xlab, ylab=ylab)
    abline(a=0, b=1, col="red", lwd=2, lty=3)
    stext(side=3, pos=0, key)
    stext(side=3, pos=1, "Chr01")
    stext(side=4, pos=1, sprintf("n=%d", nrow(Y)))
    stext(side=4, pos=0, getFullName(csU))
  })
}


# Numerical comparison
D2 <- (Y - YMAT)^2
avgDiff <- colMedians(D2, na.rm=TRUE)
names(avgDiff) <- colnames(D2)
cat("Average differences:\n")
print(avgDiff)

# Sanity check
stopifnot(all(avgDiff < 0.003))


print(sessionInfo())

Any scripts or data that you put into this service are public.

aroma.affymetrix documentation built on May 29, 2024, 4:32 a.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

aroma.affymetrix
Analysis of Large Affymetrix Microarray Data Sets

inst/testScripts/replication/chipTypes/Hs_PromPR_v02/21.MatSmoothing_vs_MAT.R
In aroma.affymetrix: Analysis of Large Affymetrix Microarray Data Sets

Try the aroma.affymetrix package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

aroma.affymetrix Analysis of Large Affymetrix Microarray Data Sets

inst/testScripts/replication/chipTypes/Hs_PromPR_v02/21.MatSmoothing_vs_MAT.R In aroma.affymetrix: Analysis of Large Affymetrix Microarray Data Sets

Try the aroma.affymetrix package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

aroma.affymetrix
Analysis of Large Affymetrix Microarray Data Sets

inst/testScripts/replication/chipTypes/Hs_PromPR_v02/21.MatSmoothing_vs_MAT.R
In aroma.affymetrix: Analysis of Large Affymetrix Microarray Data Sets