#### metCorrect (mset) ####
#' Signal drift and batch effect correction
#'
#' Signal drift and batch effect correction
#'
#'
#' @param x An S4 object of class \code{MultiDataSet}
#' @param referenceSampeTypeC Character: sample type to be used as reference for the correction (as indicated in the 'colnameSampleType' column from the
#' pData(x); e.g. 'pool')
#' @param colnameBatchC Character: name of the column from pData(x) containing the batch information (encoded as characters)
#' @param colnameInjectionOrder Character: name of the column from pData(x) containing the injection order information (encoded as numerics)
#' @param colnameSampleTypeC Character: name of the column from pData(x) containing the sample type information (encoded as characters)
#' @param spanN Numeric: smoothing parameter for the loess regression; between 0 and 1; (default set to 1)
#' @param fig.pdfC Character: Name of the file for the graphics from the significant features;
#' if NA, the graphics are displayed on the screen;
#' @param info.txtC Character: File name for the printed results (call to
#' 'sink()'); if NA (default), messages will be printed on the screen; if NULL,
#' no verbose will be generated
#' @return \code{MultiDataSet} including the adjusted p-values in fData (or the vector of adjusted p-values when returnAdjustOnlyL is TRUE)
#' @rdname metCorrect
#' @export
#' @examples
#' # to be provided
setMethod("metCorrect", signature(x = "MultiDataSet"),
function(x,
referenceSampleTypeC = c("pool", "sample")[1],
colnameBatchC = "batch",
colnameInjectionOrderC = "injectionOrder",
colnameSampleTypeC = "sampleType",
spanN = 1,
fig.pdfC = NA,
info.txtC = NA) {
if (!is.null(info.txtC) &&
!is.na(info.txtC))
sink(info.txtC, append = TRUE)
infTxtC <- info.txtC
if (!is.null(infTxtC))
infTxtC <- NA
for (setC in names(x)) {
if (!is.null(info.txtC))
cat("\nCorrecting the '",
setC,
"' dataset:\n",
sep = "")
ese <- metabolis::metCorrect(x[[setC]],
referenceSampleTypeC = referenceSampleTypeC,
colnameBatchC = colnameBatchC,
colnameInjectionOrderC = colnameInjectionOrderC,
colnameSampleTypeC = colnameSampleTypeC,
spanN = spanN,
fig.pdfC = fig.pdfC,
info.txtC = infTxtC)
x <- MultiDataSet::add_eset(x,
ese,
dataset.type = setC,
GRanges = NA,
overwrite = TRUE,
warnings = FALSE)
}
if (!is.null(info.txtC) &&
!is.na(info.txtC))
sink()
return(invisible(x))
})
#### metCorrect (eset) ####
#' Signal drift and batch effect correction
#'
#' Signal drift and batch effect correction
#'
#' @param x An S4 object of class \code{ExpressionSet}
#' @param referenceSampeTypeC Character: sample type to be used as reference for the correction (as indicated in the 'colnameSampleType' column from the
#' pData(x); e.g. 'pool')
#' @param colnameBatchC Character: name of the column from pData(x) containing the batch information (encoded as characters)
#' @param colnameInjectionOrder Character: name of the column from pData(x) containing the injection order information (encoded as numerics)
#' @param colnameSampleTypeC Character: name of the column from pData(x) containing the sample type information (encoded as characters)
#' @param spanN Numeric: smoothing parameter for the loess regression; between 0 and 1; (default set to 1)
#' @param fig.pdfC Character: Name of the file for the graphics from the significant features;
#' if NA, the graphics are displayed on the screen;
#' @param info.txtC Character: File name for the printed results (call to
#' 'sink()'); if NA (default), messages will be printed on the screen; if NULL,
#' no verbose will be generated
#' @return \code{ExpressionSet} including the adjusted p-values in fData (or the vector of adjusted p-values when returnAdjustOnlyL is TRUE)
#' @rdname metCorrect
#' @export
#' @examples
#' sacSet <- metRead(system.file("extdata/sacurine", package = "metabolis"))
#' sacSet <- metCorrect(sacSet)
setMethod("metCorrect", signature(x = "ExpressionSet"),
function(x,
referenceSampleTypeC = c("pool", "sample")[1],
colnameBatchC = "batch",
colnameInjectionOrderC = "injectionOrder",
colnameSampleTypeC = "sampleType",
spanN = 1,
fig.pdfC = NA,
info.txtC = NA) {
if (!is.null(info.txtC) &&
!is.na(info.txtC))
sink(info.txtC, append = TRUE)
x <- .metCorrect(x,
refC = referenceSampleTypeC,
colBatC = colnameBatchC,
colInjC = colnameInjectionOrderC,
colSamC = colnameSampleTypeC,
spnN = spanN,
fig.pdfC = fig.pdfC)
validObject(x)
if (!is.null(info.txtC) &&
!is.na(info.txtC))
sink()
return(invisible(x))
})
.metCorrect <- function(eset,
refC,
colBatC,
colInjC,
colSamC,
spnN,
fig.pdfC) {
rawMN <- t(Biobase::exprs(eset))
samDF <- Biobase::pData(eset)
varDF <- Biobase::fData(eset)
## checking
if (sum(grepl(refC, samDF[, colSamC])) == 0)
stop("No '", refC, "' reference sample type found in the '", colSamC, "' column of the sampleMetadata.")
refMN <- rawMN[samDF[, colSamC] == refC, ]
refNasZerVl <- apply(refMN, 2,
function(refVn)
all(sapply(refVn,
function(refN) {is.na(refN) || refN == 0})))
if (sum(refNasZerVl)) {
cat("The following variables have 'NA' or 0 values in all reference samples; they will be removed from the data:\n", sep = "")
rawMN <- rawMN[, !refNasZerVl, drop = FALSE]
varDF <- varDF[!refNasZerVl, , drop = FALSE]
}
## Computation
## ordering (batch and injection order)
samDF[, "ordIniVi"] <- 1:nrow(rawMN)
ordBatInjVi <- order(samDF[, colBatC],
samDF[, colInjC])
rawMN <- rawMN[ordBatInjVi, ]
samDF <- samDF[ordBatInjVi, ]
## signal drift and batch-effect correction
nrmMN <- .shiftBatchCorrect(rawMN,
samDF,
refC,
spnN,
colBatC,
colSamC)
## figure
if (!is.null(fig.pdfC)) {
if (!is.na(fig.pdfC))
pdf(fig.pdfC,
onefile = TRUE,
width = 11,
height = 7)
.plotBatch(rawMN, samDF, spnN, colBatC, colSamC)
.plotBatch(nrmMN, samDF, spnN, colBatC, colSamC)
if (!is.na(fig.pdfC))
dev.off()
}
## returning to initial order
ordIniVi <- order(samDF[, "ordIniVi"])
nrmMN <- nrmMN[ordIniVi, ]
samDF <- samDF[ordIniVi, ]
samDF <- samDF[, colnames(samDF) != "ordIniVi", drop = FALSE]
## ending
Biobase::exprs(eset) <- t(nrmMN)
Biobase::pData(eset) <- samDF
Biobase::fData(eset) <- varDF
## returning
eset
}
.loess <- function(datVn, qcaVi, preVi, span) {
if (length(qcaVi) < 5) {
return(predict(lm(datVn[qcaVi] ~ qcaVi),
newdata = data.frame(qcaVi = preVi)))
} else {
return(predict(loess(datVn[qcaVi] ~ qcaVi,
control = loess.control(surface = "direct"),
span = span),
newdata = data.frame(qcaVi = preVi)))
}
## Note:
## the surface = 'direct' argument allows extrapolation
}
.plotBatch <- function(datMN,
samDF.arg,
spnN.arg,
colBatC.arg,
colSamC.arg) {
maiC <- switch(gsub("MN", "", deparse(substitute(datMN))),
raw = "Raw",
nrm = "Normalized")
colVc <- c(samp = "green4",
biol = "green4",
pool = "red",
blan = "black",
other = "yellow")
par(font = 2, font.axis = 2, font.lab = 2, lwd = 2, pch = 18)
layout(matrix(c(1, 1, 2, 3), nrow = 2),
widths = c(0.7, 0.3))
obsNamVc <- rownames(datMN)
obsColVc <- sapply(substr(samDF.arg[, colSamC.arg], 1, 4),
function(typC)
ifelse(typC %in% names(colVc), colVc[typC], colVc["other"]))
## Graphic 1: Sum of intensities for each sample
par(mar = c(3.6, 3.6, 3.1, 0.6))
batTab <- table(samDF.arg[, colBatC.arg])
sumVn <- rowSums(datMN, na.rm = TRUE)
plot(sumVn,
cex = 1.2,
col = obsColVc,
pch = 18,
xaxs = "i",
xlab = "",
ylab = "")
mtext("Injection order",
line = 2.2,
side = 1)
mtext("Sum of variable intensities",
line = 2.2,
side = 2)
mtext(maiC, cex = 1.2, line = 1.5, side = 3)
abline(v = cumsum(batTab) + 0.5,
col = "red")
mtext(names(batTab),
at = batTab / 2 + c(0, cumsum(batTab[-length(batTab)])))
obsColVuc <- obsColVc[sort(unique(names(obsColVc)))]
text(rep(batTab[1], times = length(obsColVuc)),
par("usr")[3] + (0.97 - length(obsColVuc) * 0.03 + 1:length(obsColVuc) * 0.03) * diff(par("usr")[3:4]),
col = obsColVuc,
font = 2,
labels = names(obsColVuc),
pos = 2)
for (batC in names(batTab)) {
batSeqVi <- which(samDF.arg[, colBatC.arg] == batC)
batPooVi <- intersect(batSeqVi,
which(samDF.arg[, colSamC.arg] == "pool"))
batSamVi <- intersect(batSeqVi,
which(samDF.arg[, colSamC.arg] == "sample"))
if (length(batPooVi))
lines(batSeqVi,
.loess(sumVn, batPooVi, batSeqVi, spnN.arg),
col = colVc["pool"])
if (length(batSamVi))
lines(batSeqVi,
.loess(sumVn, batSamVi, batSeqVi, spnN.arg),
col = colVc["samp"])
}
## Graphics 2 and 3 (right): PCA score plots of components 1-4
radVn <- seq(0, 2 * pi, length.out = 100)
epsN <- .Machine[["double.eps"]] ## [1] 2.22e-16
pcaMN <- datMN
if (any(is.na(pcaMN))) {
minN <- min(pcaMN, na.rm = TRUE)
pcaMN[is.na(pcaMN)] <- minN
}
pcaLs <- ropls::opls(pcaMN, predI = 4, algoC = "svd", fig.pdfC = NULL, info.txtC = NULL)
tMN <- ropls::getScoreMN(pcaLs)
vRelVn <- pcaLs@modelDF[, "R2X"]
n <- nrow(tMN)
hotN <- 2 * (n - 1) * (n^2 - 1) / (n^2 * (n - 2))
hotFisN <- hotN * qf(0.95, 2, n - 2)
pcsLs <- list(c(1, 2), c(3, 4))
par(mar = c(3.6, 3.6, 0.6, 1.1))
for (pcsN in 1:length(pcsLs)) {
pcsVn <- pcsLs[[pcsN]]
tcsMN <- tMN[, pcsVn]
micMN <- solve(cov(tcsMN))
n <- nrow(tMN)
hotN <- 2 * (n - 1) * (n^2 - 1) / (n^2 * (n - 2))
hotFisN <- hotN * qf(0.95, 2, n - 2)
hotVn <- apply(tcsMN,
1,
function(x) 1 - pf(1 / hotN * t(as.matrix(x)) %*% micMN %*% as.matrix(x), 2, n - 2))
obsHotVi <- which(hotVn < 0.05)
xLabC <- paste("t",
pcsVn[1],
"(",
round(vRelVn[pcsVn[1]] * 100),
"%)",
sep = "")
yLabC <- paste("t",
pcsVn[2],
"(",
round(vRelVn[pcsVn[2]] * 100),
"%)",
sep = "")
xLimVn <- c(-1, 1) * max(sqrt(var(tcsMN[, 1]) * hotFisN), max(abs(tcsMN[, 1])))
yLimVn <- c(-1, 1) * max(sqrt(var(tcsMN[, 2]) * hotFisN), max(abs(tcsMN[, 2])))
plot(tcsMN,
main = "",
type = "n",
xlab = "",
ylab = "",
xlim = xLimVn,
ylim = yLimVn)
mtext(xLabC,
line = 2.2,
side = 1)
mtext(yLabC,
line = 2.2,
side = 2)
par(lwd = 1)
abline(v = axTicks(1),
col = "grey")
abline(h = axTicks(2),
col = "grey")
abline(v = 0)
abline(h = 0)
lines(sqrt(var(tcsMN[, 1]) * hotFisN) * cos(radVn),
sqrt(var(tcsMN[, 2]) * hotFisN) * sin(radVn))
points(tcsMN,
col = obsColVc,
pch = 18)
if (length(obsHotVi))
text(tcsMN[obsHotVi, 1],
tcsMN[obsHotVi, 2],
col = obsColVc[obsHotVi],
labels = obsNamVc[obsHotVi],
pos = 3)
} ## for(pcsN in 1:length(pcsLs)) {
return(invisible(list(sumVn = sumVn,
tcsMN = tcsMN)))
} ## plotBatchF
.shiftBatchCorrect <- function(rawMN.arg,
samDF.arg,
refC.arg,
spnN.arg,
colBatC.arg,
colSamC.arg) {
cat("\nReference observations are: ", refC.arg, "\n")
## computing median off all pools (or samples) for each variable
refMeaVn <- apply(rawMN.arg[samDF.arg[, colSamC.arg] == refC.arg, ],
2,
function(feaRefVn) mean(feaRefVn, na.rm = TRUE))
## splitting data and sample metadata from each batch
batRawLs <- split(as.data.frame(rawMN.arg),
f = samDF.arg[, colBatC.arg])
batRawLs <- lapply(batRawLs, function(inpDF) as.matrix(inpDF))
batSamLs <- split(as.data.frame(samDF.arg),
f = samDF.arg[, colBatC.arg])
## checking extrapolation: are there pools at the first and last observations of each batch
pooExtML <- matrix(FALSE, nrow = 2, ncol = length(batRawLs),
dimnames = list(c("first", "last"), names(batRawLs)))
for (batC in names(batSamLs)) {
batSamTypVc <- batSamLs[[batC]][, colSamC.arg]
pooExtML["first", batC] <- head(batSamTypVc, 1) == refC.arg
pooExtML["last", batC] <- tail(batSamTypVc, 1) == refC.arg
}
if (!all(c(pooExtML))) {
cat("\nWarning: Reference samples are missing at the first and/or last position of the following batches:\n")
pooExtBatVi <- which(!apply(pooExtML, 2, all))
for (i in 1:length(pooExtBatVi))
cat(names(pooExtBatVi)[i], ": ",
paste(rownames(pooExtML)[!pooExtML[, pooExtBatVi[i]]], collapse = ", "), "\n", sep = "")
cat("Extrapolating loess fits for these batches may result in inaccurate modeling!\n")
}
## normalizing
nrmMN <- NULL ## normalized data matrix to be computed
cat("\nProcessing batch:")
for (batC in names(batRawLs)) { ## processing each batch individually
cat("\n", batC)
batRawMN <- batRawLs[[batC]]
batSamDF <- batSamLs[[batC]]
batAllVi <- 1:nrow(batRawMN)
batRefVi <- which(batSamDF[, colSamC.arg] == refC.arg)
if (length(batRefVi) < 5)
cat("\nWarning: less than 5 '", refC.arg,
"'; linear regression will be performed instead of loess regression for this batch\n",
sep = "")
## prediction of the loess fit
batLoeMN <- apply(batRawMN,
2,
function(rawVn) .loess(rawVn,
batRefVi,
batAllVi,
spnN.arg))
## normalization
batLoeMN[batLoeMN <= 0] <- NA
batNrmMN <- batRawMN / batLoeMN
nrmMN <- rbind(nrmMN,
batNrmMN)
}
cat("\n")
nrmMN <- sweep(nrmMN, MARGIN = 2, STATS = refMeaVn, FUN = "*")
return(nrmMN)
} ## shiftBatchCorrectF
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.