inst/doc/MBQNpackage.R

## ----setup, include=FALSE-----------------------------------------------------
knitr::opts_chunk$set(echo = TRUE)

## ----bc-installation, eval = FALSE--------------------------------------------
#  # if (!requireNamespace("BiocManager", quietly = TRUE))
#  #    install.packages("BiocManager")
#  BiocManager::install("MBQN")

## ----dependencies1, eval = FALSE----------------------------------------------
#  # if (!requireNamespace("BiocManager", quietly = TRUE))
#  #    install.packages("BiocManager")
#  # BiocManager::install(pkgs = c("preprocessCore","limma","rpx","SummarizedExperiment"))

## ----example1, eval = TRUE----------------------------------------------------
## basic example
library("MBQN")
set.seed(1234)
# data generation 
mtx <- mbqnSimuData("omics.dep")
# data distortion
mtx <- mbqnSimuDistortion(mtx)$x.mod

## ----figure1, fig.height = 5, fig.width = 6, fig.align = "left", fig.cap = "Fig. 1 Boxplot of the unnormalized, distorted intensity data matrix. The first feature is an RI feature (red line). It has maximum intensity for each sample!"----
plot.new()
mbqnBoxplot(mtx, irow = 1, main = "Unnormalized")

## ---- eval = TRUE-------------------------------------------------------------
res <- mbqnGetNRIfeatures(mtx, low_thr = 0.5)

## ----figure2, fig.height = 5, fig.width = 6, fig.align = 'left', fig.cap = "Fig. 2 Quantile normalized intensities with balanced and unbalanced normalized RI feature. Classical quantile normalization suppresses any intensity variation of the RI feature, while the MBQN preserves its variation while reducing systematic batch effects!"----
plot.new()
mbqn.mtx <- mbqnNRI(x = mtx, FUN = median, verbose = FALSE) # MBQN
qn.mtx <- mbqnNRI(x = mtx, FUN = NULL, verbose = FALSE) # QN
mbqnBoxplot(mbqn.mtx, irow = res$ip, vals = data.frame(QN = qn.mtx[res$ip,]), main = "Normalized")

## ----example2, eval = TRUE----------------------------------------------------
## basic example
library("MBQN")
set.seed(1234)
mtx <- mbqnSimuData("omics.dep") 
# Alternatively: mtx <- matrix(
#            c(5,2,3,NA,2,4,1,4,2,3,1,4,6,NA,1,3,NA,1,4,3,NA,1,2,3),ncol=4)

## ---- eval = TRUE-------------------------------------------------------------
qn.mtx <- mbqn(mtx,FUN=NULL, verbose = FALSE)
mbqn.mtx <- mbqn(mtx,FUN = "median", verbose = FALSE)
qn.nri.mtx <- mbqnNRI(mtx,FUN = "median", low_thr = 0.5, verbose = FALSE)

## ---- eval = TRUE-------------------------------------------------------------
res <- mbqnGetNRIfeatures(mtx, low_thr = 0.5)
# Maximum frequency of RI/NRI feature(s):  100 %

## ----example3, eval = TRUE----------------------------------------------------
#plot.new()
mtx <- mbqnSimuData("omics.dep", show.fig = FALSE)
mod.mtx <- mbqnSimuDistortion(mtx, s.mean = 0.05, s.scale = 0.01)
mtx2 <- mod.mtx
mod.mtx <- mod.mtx$x.mod

res <- mbqnGetNRIfeatures(mod.mtx, low_thr = 0.5)

# undistorted feature
feature1 <- mtx[1,]
# distorted feature
feature1mod = mod.mtx[1,]
# feature after normalization
qn.feature1 = mbqn(mod.mtx, verbose = FALSE)[1,]
qn.mtx = mbqn(mod.mtx,verbose = FALSE)

mbqn.mtx = mbqn(mod.mtx, FUN = "mean",verbose = FALSE)
mbqn.feature1 = mbqn(mod.mtx, FUN = "mean",verbose = FALSE)[1,]

## ---- eval = TRUE-------------------------------------------------------------
# undistorted feature
ttest.res0 <- t.test(feature1[seq_len(9)], feature1[c(10:18)],
                    var.equal =TRUE)
# distorted feature
ttest.res1 <- t.test(feature1mod[seq_len(9)], feature1mod[c(10:18)],
                    var.equal =TRUE)
# mbqn normalized distorted feature
ttest.res <- t.test(mbqn.feature1[seq_len(9)], mbqn.feature1[c(10:18)],
                    var.equal =TRUE)

## ---- eval = TRUE-------------------------------------------------------------

## ----figure3, fig.height = 5, fig.width = 6, fig.align = "left", fig.cap = "Fig. 3 "----
plot.new()
matplot(t(rbind(feature1 = feature1,
    mod.feature1 = (feature1mod-mean(feature1mod))/25+mean(feature1),
    qn.feature1 = (qn.feature1-mean(qn.feature1))+mean(feature1),
    mbqn.feature1 = (
        mbqn.feature1-mean(mbqn.feature1))+mean(feature1))),
    type = "b", lty = c(1,1,1), pch = "o",
    ylab = "intensity",
    xlab = "sample",
    main = "Differentially expressed RI feature",
    ylim = c(34.48,34.85))
legend(x=11,y= 34.86, legend = c("feature","distorted feature/25" ,
                                "QN feature", " MBQN feature"),pch = 1,
        col = c(1,2,3,4), lty= c(1,1,1,1), bty = "n", y.intersp = 1.5,
        x.intersp = 0.2)
legend(x = .1, y = 34.6,
        legend = paste("p-value (t-test) =",round(ttest.res1$p.value,2),
            "\np-value (t-test, mbqn) =", round(ttest.res$p.value,4)),
        bty = "n", x.intersp = 0)


if (ttest.res$p.value<0.05)
    message("H0 (=equal mean) is rejected!")


# print(mtx2$x.mod)
# print(mtx2$mx.offset)
# print(mtx2$mx.scale)
print(paste("ttest.undistorted =",ttest.res0)) 
print(paste("ttest.distorted =", ttest.res1))
print(paste("ttest.mbqndistorted =", ttest.res))

## ----example4, eval = TRUE----------------------------------------------------
library("SummarizedExperiment")
pxd_id <- "PXD001584"   

# Download file from PRIDE to currentdir/PXDxxx
#getPXDfile(pxd_id)

# Load file
out <- mbqnLoadFile(pxd_id, file.pattern = "proteinGroups.txt")

# filter for potential contaminants and identified only by site features
out <- out[!rowData(out)[["ixs"]],] 

# extract data and feature annotation
mtx <- assays(out)[["data"]]
featureAnnotations <- rowData(out)

low_thr <- 0.5
ylim <- NULL
ix <- seq_len(ncol(mtx))

ix <- c(1:9,19:27)
mtx <- mtx[,ix]
ylim.qn <- ylim <- c(22.5,36)

####################################################################################

## ----figure4, fig.height = 3, fig.width = 6, fig.align = "left", fig.cap = "Fig. 4 Correctly detected and identified RI (100 %RI) and a NRI feature (75 %RI) each with a data coverage of 100% across samples. "----
plot.new()
res <- mbqnPlotAll(mtx,
                   FUN = median,
                   low_thr = low_thr,
                   las = 2,
                   type = "l",
                   feature_index = NULL,
                   show_nri_only = TRUE,
                   axis.cex = 0.5,
                   y.intersp= 0.5)
#dev.off()

# get protein name of strongest nri/ri feature
nri_max <- as.numeric(names(which.max(res$nri)))
#featureAnnotations$proteinDescription[nri_max]
#featureAnnotations$proteinName[nri_max]
#featureAnnotations$nbPeptides[nri_max]

colnames(mtx) <- gsub("LFQ intensity","",colnames(mtx))
mbqn.mtx <- mbqn(mtx,FUN = median)
qn.mtx <- mbqn(mtx,FUN = NULL)

# Boxplot of QN intensity features, highlight RI/NRI Features
if(length(ylim)==0) {
  ylim <- c(floor(min(range(mbqn.mtx, na.rm = TRUE))),ceiling(max(range(mbqn.mtx, na.rm = TRUE))))
  ylim.qn <- c(floor(min(range(qn.mtx, na.rm = TRUE))),ceiling(max(range(mbqn.mtx, na.rm = TRUE))))
}

df <- data.frame(qn.mtx[as.numeric(names(res$nri)),])
if(ncol(df)==1) df <- t(df)
rownames(df) <- paste("QN feature",names(res$nri))
df2 <- data.frame(mtx[as.numeric(names(res$nri)),])
if(ncol(df2)==1) df2 <- t(df2)
rownames(df2) <- paste("unnormal. feature",names(res$nri))
colnames(df) <- colnames(df2)
df <- rbind(df,df2)
df <- as.data.frame(t(df))

## ----figure5, fig.height = 5, fig.width = 6, fig.align = "left", fig.cap = "Fig. 5 "----
  mtx.nri <- mbqnNRI(mtx,FUN = median,low_thr = 0.5, verbose = FALSE)

plot.new()
mbqnBoxplot(mtx = mtx.nri,
            irow = as.numeric(names(res$nri)),
            ylim = ylim.qn, xlab= "", las=2,
            ylab = "LFQ intensity",
            vals = df,lwd = 1.,
            main = "QN with RI/NRI balanced",
            cex.axis = 1, cex.lab = .9, cex = .9, y.intersp = 0.5)

Try the MBQN package in your browser

Any scripts or data that you put into this service are public.

MBQN documentation built on Nov. 8, 2020, 8:13 p.m.