inst/doc/synapter.R

## ----environment, echo=FALSE--------------------------------------------------
suppressPackageStartupMessages(library("synapter"))
suppressPackageStartupMessages(library("synapterdata"))
suppressPackageStartupMessages(library("BiocStyle"))
suppressPackageStartupMessages(library("qvalue"))
## preparing some data
data(ups25b)  ## from synapterdata
res <- ups25b ## for synergise and plotRt

## ----install,  eval=FALSE-----------------------------------------------------
#  if (!require("BiocManager"))
#      install.packages("BiocManager")
#  BiocManager::install("synapter")

## ----library,  eval=FALSE-----------------------------------------------------
#  library("synapter")

## ----prepare-synergise--------------------------------------------------------
library("synapterdata")
hdmsefile <- getHDMSeFinalPeptide()[2]
basename(hdmsefile)
msefile <- getMSeFinalPeptide()[2]
basename(msefile)
msepep3dfile <- getMSePep3D()[2]
basename(msepep3dfile)
fas <- getFasta()
basename(fas)
## the synergise input is a (named) list of filenames
input <- list(identpeptide = hdmsefile,
              quantpeptide = msefile,
              quantpep3d = msepep3dfile,
              fasta = fas)
## a report and result files will be stored
## in the 'output' directory
output <- tempdir()
output

## ----synergise, eval=FALSE----------------------------------------------------
#  res <- synergise(filenames = input, outputdir = output)

## ----performance--------------------------------------------------------------
performance(res)

## ----inputfiles, echo=FALSE, eval=TRUE----------------------------------------
inputfiles <- getHDMSeFinalPeptide()

## ----masterFdr, cache=TRUE----------------------------------------------------
## using the full set of 6 HDMSe files and a
## fasta database from the synapterdata package
inputfiles <- getHDMSeFinalPeptide()
fasta <- getFasta()
cmb <- estimateMasterFdr(inputfiles, fasta, masterFdr = 0.02)
cmb

## ----estimateFdrPlot, fig.cap="Figure illustrating the relation between the number of unique peptides in the combined HDMS$^E$ file and the resulting false discovery rate. The symbols on the figure represent the number of files for that particular combination. The dotted line is the user defined threshold for the combined FDR (`masterFdr` parameter). The best combination, i.e the one that maximises the number of unique peptides while keeping the FDR below `masterFdr` is highlighted in red."----
plot(cmb)

## ----bestComb-----------------------------------------------------------------
bestComb(cmb)

## ----master, cache=TRUE, warning=FALSE----------------------------------------
master <- makeMaster(inputfiles[bestComb(cmb)])
master

## ----loadups------------------------------------------------------------------
data(ups25a, ups25b, ups25c, ups50a, ups50b, ups50c)

## ----setas--------------------------------------------------------------------
ms25a <- as(ups25a, "MSnSet")
class(ups25a)[1]
class(ms25a)[1]
ms25a

## ----updatesamplename---------------------------------------------------------
sampleNames(ms25a)
sampleNames(ms25a) <- "ups25a"
sampleNames(ms25a)

## ----accessor-----------------------------------------------------------------
tail(exprs(ms25a))
tail(fData(ms25a)[, c(2,9)])
## all fetaure metadata
fvarLabels(ms25a)

## ----dotop3-------------------------------------------------------------------
ms25a <- topN(ms25a,
              groupBy = fData(ms25a)$protein.Accession,
              n = 3)
nPeps <- nQuants(ms25a,
                 groupBy = fData(ms25a)$protein.Accession)
ms25a <- combineFeatures(ms25a,
                         fData(ms25a)$protein.Accession,
                         "sum", na.rm = TRUE,
                         verbose = FALSE)
head(exprs(ms25a))
head(nPeps)
## scale intensities
exprs(ms25a) <- exprs(ms25a) * (3/nPeps)
## NaN result from the division by 0, when
## no peptide was found for that protein
head(exprs(ms25a))

## ----batch--------------------------------------------------------------------
nms <- c(paste0("ups", 25, c("b", "c")),
         paste0("ups", 50, c("a", "b", "c")))
tmp <- sapply(nms, function(.ups) {
  cat("Processing", .ups, "... ")
  ## get the object from workspace and convert to MSnSet
  x <- get(.ups, envir = .GlobalEnv)
  x <- as(x, "MSnSet")
  sampleNames(x) <- .ups
  ## extract top 3 peptides
  x <- topN(x, groupBy = fData(x)$protein.Accession, n = 3)
  ## calculate the number of peptides that are available
  nPeps <- nQuants(x, fData(x)$protein.Accession)
  ## sum top3 peptides into protein quantitation
  x <- combineFeatures(x, fData(x)$protein.Accession,
                       "sum", na.rm = TRUE, verbose = FALSE)
  ## adjust protein intensity based on actual number of top peptides
  exprs(x) <- exprs(x) * (3/nPeps)
  ## adjust feature variable names for combine
  x <- updateFvarLabels(x, .ups)
  ## save the new MSnExp instance in the workspace
  varnm <- sub("ups", "ms", .ups)
  assign(varnm, x, envir = .GlobalEnv)
  cat("done\n")
})

## ----combine25, echo=TRUE-----------------------------------------------------
ms25 <- combine(ms25a, ms25b)
ms25 <- combine(ms25, ms25c)
dim(ms25)
ms25 <- filterNA(ms25, pNA = 1/3)
dim(ms25)

## ----combine50----------------------------------------------------------------
ms50 <- combine(ms50a, ms50b)
ms50 <- combine(ms50, ms50c)
dim(ms50)
ms50 <- filterNA(ms50, pNA = 1/3)
dim(ms50)

## ----msUps--------------------------------------------------------------------
msUps <- combine(ms25, ms50)
msUps <- filterNA(msUps, pNA = 2/6)
head(exprs(msUps))
table(apply(exprs(msUps), 1,
            function(.x)sum(!is.na(.x))))

## ----scale--------------------------------------------------------------------
ecoli <- -grep("ups$", featureNames(msUps))
meds <- apply(exprs(msUps)[ecoli, ], 2,
              median, na.rm=TRUE)
exprs(msUps) <- t(apply(exprs(msUps), 1,
                        "/", meds))

## ----stats--------------------------------------------------------------------
## use log2 data for t-test
exprs(msUps) <- log2(exprs(msUps))
## apply a t-test and extract the p-value
pv <- apply(exprs(msUps), 1 ,
            function(x)t.test(x[1:3], x[4:6])$p.value)
## calculate q-values
library("qvalue")
qv <- qvalue(pv)$qvalues
## calculate log2 fold-changes
lfc <- apply(exprs(msUps), 1 ,
             function(x) mean(x[1:3], na.rm=TRUE)-mean(x[4:6], na.rm=TRUE))
## create a summary table
res <- data.frame(cbind(exprs(msUps), pv, qv, lfc))
## reorder based on q-values
res <- res[order(res$qv), ]
knitr::kable(head(round(res, 3)))

## ----writecsv, eval=FALSE-----------------------------------------------------
#  write.csv(res, file = "upsResults.csv")

## ----volcano, fig.cap="Volcano plot. On the volcano plot, each protein is represented by a dot and positioned according to its $log_2$ fold-change along the x axis and $-log_{10}$ of its q-value along the y axis. Proteins with large fold-changes are positioned on the sides of the plot, while proteins with low q-values are at the top of the figure. The most promising candidates are this located on the top corners. In our case, the UPS proteins (in blue) have $log2$ fold-changes around -1 (vertical dotted line), as expected. The horizontal dotted line represents a q-value threshold of 0.10."----
plot(res$lfc, -log10(res$qv),
     col = ifelse(grepl("ups$", rownames(res)),
       "#4582B3AA",
       "#A1A1A180"),
     pch = 19,
     xlab = expression(log[2]~fold-change),
     ylab = expression(-log[10]~q-value))
grid()
abline(v = -1, lty = "dotted")
abline(h = -log10(0.1), lty = "dotted")
legend("topright", c("UPS", "E. coli"),
       col = c("#4582B3AA", "#A1A1A1AA"),
       pch = 19, bty = "n")

## ----hmap, fig.keep="last", fig.cap="A heatmap of all UPS1 proteins present in the final data set."----
heatmap(exprs(msUps)[grep("ups",featureNames(msUps)), ])

## ----falsepos, echo=FALSE-----------------------------------------------------
sel1 <- (res$qv < 0.1)
signms <- rownames(res)[sel1]
i <- grep("ups", signms)
n1 <- length(i)
n2 <- length(signms) - n1

## ----upstab, echo=FALSE, results='asis'---------------------------------------
k <- grep("ups", rownames(res))
knitr::kable(round(res[k, ], 3),
             caption = "UPS1 proteins.")

## ----sessioninfo--------------------------------------------------------------
sessionInfo()

Try the synapter package in your browser

Any scripts or data that you put into this service are public.

synapter documentation built on Nov. 8, 2020, 6:25 p.m.