knitr::opts_chunk$set(echo = TRUE)
This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
# ---------- Preparations ---------- # Load libraries library(MSnbase) # MS features library(xcms) # Swiss army knife for metabolomics library(RColorBrewer) # For colors # Set encoding of files and set locale options(encoding="UTF-8") Sys.setlocale(category="LC_ALL", locale="C") # Data directories #data_dir <- "./inst/mzML" data_dir <- system.file("mzML/", package="mtbls297")
# Multicore parallel nSlaves <- parallel::detectCores(all.tests=FALSE, logical=TRUE) library(doParallel) registerDoParallel(nSlaves) register(DoparParam(), default=TRUE) # Input variables polarity <- "positive" pol <- substr(x=polarity, start=1, stop=3) ms1_intensity_cutoff <- 16000 #approx. 0.01% ms2_fragment_intensity_cutoff <- 100 mz_ppm <- 25
# ---------- Peak detection ---------- # Load files mzml_dir <- data_dir mzml_files <- list.files(mzml_dir, pattern="*.mzML", recursive=T, full.names=T) mzml_names <- gsub('(.*)_.*', '\\1', basename(mzml_files)) # Build phenodata mzml_pheno <- gsub(x=mzml_files, pattern=".*\\d\\d\\d\\d ", replacement="", perl=TRUE) mzml_pheno <- as.factor(gsub(x=mzml_pheno, pattern=" .*", replacement="", perl=TRUE)) mzml_pheno <- data.frame(sample_name=mzml_names, sample_group=mzml_pheno) # Import raw data as MSnbase object raw_data <- readMSData(files=mzml_files, pdata=new("NAnnotatedDataFrame", mzml_pheno), mode="onDisk", centroided=TRUE) table(msLevel(raw_data)) head(isolationWindowLowerMz(raw_data)) head(isolationWindowUpperMz(raw_data)) head(fData(raw_data)[, c("isolationWindowTargetMZ", "isolationWindowLowerOffset", "isolationWindowUpperOffset", "msLevel", "retentionTime")]) # Get base peak chromatograms chromas <- chromatogram(raw_data, aggregationFun="max")
# Plot chromatograms based on phenodata groups sample_group_colors <- brewer.pal(length(unique(mzml_pheno$sample_group)), "Set1") names(sample_group_colors) <- unique(mzml_pheno$sample_group) plot(chromas, col=sample_group_colors[raw_data$sample_group]) # Get TICs tics <- split(tic(raw_data), f=fromFile(raw_data)) boxplot(tics, col=sample_group_colors[raw_data$sample_group], ylab="intensity", main="Total ion current")
# Peak detection in MS1 data ms1_params <- CentWaveParam(ppm=mz_ppm, mzCenterFun="mean", prefilter=c(3, 100), peakwidth=c(10, 20), snthresh=10, integrate=2, firstBaselineCheck=TRUE, verboseColumns=FALSE, fitgauss=FALSE, roiList=list(), roiScales=numeric()) ms1_data <- findChromPeaks(raw_data, param=ms1_params) # Per file summary ms1_summary <- lapply(split.data.frame(chromPeaks(ms1_data), f = chromPeaks(ms1_data)[, "sample"]), FUN = function(z) { c(peak_count = nrow(z), rt = quantile(z[, "rtmax"] - z[, "rtmin"])) } ) ms1_summary <- do.call(rbind, ms1_summary) rownames(ms1_summary) <- basename(fileNames(ms1_data)) print(ms1_summary) table(msLevel(ms1_data)) # To get a global overview of the peak detection we can plot the frequency of identified peaks per file along the retention time axis. This allows to identify time periods along the MS run in which a higher number of peaks was identified and evaluate whether this is consistent across files. plotChromPeakImage(ms1_data, main="Frequency of identified peaks per RT") # Group peaks ms1_data <- groupChromPeaks(ms1_data, param=PeakDensityParam(sampleGroups=ms1_data$sample_group, minFraction=0.5, bw=20)) # RT correction ms1_data <- adjustRtime(ms1_data, param=PeakGroupsParam(minFraction=0.5,smooth="loess",span=0.2,family="gaussian")) # Plot the difference of raw and adjusted retention times par(mfrow=c(2,1), mar=c(4.5,4.2,1,0.5)) plot(chromas, col=sample_group_colors[chromas$sample_group], peakType="none", main="Raw chromatograms") plotAdjustedRtime(ms1_data, col=sample_group_colors[ms1_data$sample_group], main="RT correction") par(mfrow=c(1,1), mar=c(4,4,4,1), oma=c(0,0,0,0), cex.axis=0.9, cex=0.8) # Group peaks ms1_data <- groupChromPeaks(ms1_data, param=PeakDensityParam(sampleGroups=ms1_data$sample_group, minFraction=0.5, bw=20)) # Get integrated peak intensity per feature/sample print(head(featureValues(ms1_data, value="into"))) # Fill peaks #ms1_data <- fillChromPeaks(ms1_data, param=FillChromPeaksParam(ppm=35, fixedRt=0, expandRt=4, diffRt=4)) #head(featureValues(ms1_data)) #head(featureSummary(ms1_data, group=ms1_data$sample_group)) # Evaluate grouping ms1_pca <- prcomp(t(na.omit(log2(featureValues(ms1_data, value="into")))), center=TRUE) plot(ms1_pca$x[, 1], ms1_pca$x[,2], pch=21, main="PCA: Grouping of samples", xlab=paste0("PC1: ", format(summary(ms1_pca)$importance[2, 1] * 100, digits=3), " % variance"), ylab=paste0("PC2: ", format(summary(ms1_pca)$importance[2, 2] * 100, digits=3), " % variance"), col="darkgrey", bg=sample_group_colors[ms1_data$sample_group], cex=2) grid() text(ms1_pca$x[, 1], ms1_pca$x[,2], labels=ms1_data$sample_name, col="darkgrey", pos=3, cex=0.5) # Show peaks tail(chromPeaks(ms1_data)) tail(chromPeakData(ms1_data)) # Show process history processHistory(ms1_data)
# Peak picking in all isolation windows (buckets) of SWATH data ms2_params <- CentWaveParam(ppm=mz_ppm, mzCenterFun="mean", prefilter=c(3, 100), peakwidth=c(10, 20), snthresh=10, integrate=2, firstBaselineCheck=TRUE, verboseColumns=FALSE, fitgauss=FALSE, roiList=list(), roiScales=numeric()) ms2_data <- findChromPeaksIsolationWindow(ms1_data, param=ms2_params) # Count the number of peaks within each isolation window ms2_peaks <- chromPeakData(ms2_data) table(ms2_peaks$isolationWindow) # Filter MS2 spectra swath_data <- filterRt(ms2_data, rt=c(60,1200)) # Inspect number of spectra in MS1 and MS2 table(msLevel(swath_data)) head(fData(swath_data)[, c("isolationWindowTargetMZ", "isolationWindowLowerOffset", "isolationWindowUpperOffset", "msLevel", "retentionTime")]) head(isolationWindowLowerMz(swath_data)) head(isolationWindowUpperMz(swath_data)) table(isolationWindowTargetMz(swath_data)) # Compare MS1 to SWATH sum(chromPeakData(swath_data)$ms_level == 1) sum(chromPeakData(swath_data)$ms_level == 2) table(chromPeakData(swath_data)$isolationWindow) #install_github("sneumann/xcms", ref="02e4aaf80a6e8c5737d8c41d8e83e09d94fd2bef") # Define an MS2 spectra for each MS1 peak swath_spectra <- reconstructChromPeakSpectra(swath_data, minCor=0.5) # Save MS2 spectra info and convert to peak list ms2_spectra <- fData(swath_data)[, c("fileIdx", "originalPeaksCount", "totIonCurrent", "retentionTime", "basePeakMZ")] ms2_spectra$fileIdx <- as.character(unlist(lapply(X=ms2_spectra$fileIdx, FUN=function(x) { mzml_names[x] }))) # ---------- Build feature matrices ---------- # Build feature matrix ms1_matrix <- featureValues(ms1_data, method="medret", value="into") colnames(ms1_matrix) <- mzml_names dim(ms1_matrix) feat_list <- t(ms1_matrix) # Build feature summary ms1_summary <- featureSummary(ms1_data) ms1_def <- featureDefinitions(ms1_data) # Missing value imputation feat_list[is.na(feat_list)] <- 1 # Transform and clean data feat_list <- log2(feat_list) feat_list[is.na(feat_list)] <- 0 feat_list[which(feat_list < 0)] <- 0 feat_list[is.infinite(feat_list)] <- 0 #feat_list <- feat_list[!apply(feat_list, MARGIN=1, function(x) max(x,na.rm=TRUE) == min(x,na.rm=TRUE)),] # Create single 0/1 matrix bina_list <- feat_list bina_list[bina_list < log2(ms1_intensity_cutoff)] <- 0 bina_list[bina_list != 0] <- 1
# ---------- Link MS2 spectra to MS1 matrix ---------- # Merge MS/MS spectra in each sample ms2_spectra <- list() ms2_spectra <- foreach(i=1:nrow(ms1_def)) %dopar% { # Extract spectra for feature feature_of_interest <- ms1_def[i, "mzmed"] peaks_of_interest <- chromPeaks(swath_data, msLevel=1, mz=feature_of_interest, ppm=mz_ppm) swath_spectra_of_interest <- swath_spectra[which(mcols(swath_spectra)$peak_id %in% rownames(peaks_of_interest))] merged_swath_spectrum_of_interest <- combineSpectra(swath_spectra_of_interest, mzd=0.05, ppm=50, intensityFun="max") # Plot single spectra + merged spectrum, i.e. 1010 if (FALSE) { nonempty_spectra <- NULL for (i in 1:length(swath_spectra_of_interest@listData)) { if (length(swath_spectra_of_interest@listData[[i]]@precursorMz) > 0) if (! is.null(names(swath_spectra_of_interest@listData[[i]]@mz))) nonempty_spectra <- c(nonempty_spectra, i) } par(mfrow=c(3,2), mar=c(4.5,4.2,1,0.5)) for (i in nonempty_spectra) { plot(x=swath_spectra_of_interest@listData[[i]]@mz, y=swath_spectra_of_interest@listData[[i]]@intensity, type="h", xlab="m/z", ylab="intensity", main=paste("Precursor m/z",swath_spectrum_of_interest@listData[[i]]@precursorMz)) swath_spectra[which(mcols(swath_spectra)$peak_id %in% names(swath_spectra_of_interest@listData[[i]]@mz))] } par(mfrow=c(1,1), mar=c(4,4,4,1), oma=c(0,0,0,0), cex.axis=0.9, cex=0.8) plot(x=merged_swath_spectrum_of_interest@listData[[1]]@mz, y=merged_swath_spectrum_of_interest@listData[[1]]@intensity, type="h", xlab="m/z", ylab="intensity", main=paste("Precursor m/z",merged_swath_spectrum_of_interest@listData[[1]]@precursorMz)) } return(merged_swath_spectrum_of_interest) } # Assign matching SWATH spectra to MS1 features feat_spectra <- list() for (i in 1:nrow(ms1_def)) { feat_rt_med <- ms1_def[i, "rtmed"] feat_mz_med <- ms1_def[i, "mzmed"] feat_ms2_list <- chromPeaks(swath_data, msLevel=1, mz=feat_mz_med, ppm=mz_ppm) if (nrow(feat_ms2_list) > 0) { feat_spectra <- swath_spectra[which(mcols(swath_spectra)$peak_id %in% rownames(feat_ms2_list))] feat_spectra_merged <- combineSpectra(feat_spectra, mzd=0.05, ppm=50, intensityFun="max") if (feat_spectra_merged@listData[[1]]@peaksCount == 0) { print(paste0("No spectrum found for #", i, " ", rownames(ms1_def)[i])) feat_spectra[[i]] <- list() names(feat_spectra)[i] <- "" } else if (is.na(feat_spectra_merged@listData[[1]]@mz[1])) { print(paste0("Empty spectrum found for #", i, " ", rownames(ms1_def)[i])) feat_spectra[[i]] <- list() names(feat_spectra)[i] <- "" } else { feat_spectra[[i]] <- feat_spectra_merged names(feat_spectra)[i] <- rownames(ms1_def)[i] } } else { print(paste0("No spectra found for #", i, " ", rownames(ms1_def)[i])) feat_spectra[[i]] <- list() names(feat_spectra)[i] <- "" } } print(length(colnames(feat_list))) print(length(which(colnames(feat_list) %in% names(feat_spectra)))) # Filter MS1 matrix to contain only features that have spectra assigned feat_list <- feat_list[, which(colnames(feat_list) %in% names(feat_spectra))] bina_list <- bina_list[, which(colnames(bina_list) %in% names(feat_spectra))]
You can also embed plots, for example:
plot(pressure)
Note that the echo = FALSE
parameter was added to the code chunk to prevent printing of the R code that generated the plot.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.