MetaboAnalystR is designed for comprehensive metabolomic data analysis, visualization, and interpretation. While it does support raw data preprocessing, users may wish to solely use the XCMS R package (Smith et al. 2006) directly to process their raw LC-MS/GC-MS data, and then use MetaboAnalystR to perform a wide-range of metabolomic data analyses (statistical analysis, functional analysis, and multiple omics data integration). This tutorial will guide users through using the XCMS R package to perform peak identification, retention time correction, and peak alignment of an example dataset to create a peak table in a usable form for MetaboAnalystR. Then with the MetaboAnalystR package, users will perform various statistical analyses including fold-change analysis, PCA, and PLS-DA. The goal of this tutorial is to demonstrate the ease of using the two R packages for a holsitic metabolomic data analysis workflow. For users who wish to perform other types of analyses with their created peak table, please refer to the other MetaboAnalystR vignettes.
In the example workflow below, users must have both the XCMS and the MetaboAnalystR packages installed. Further, the tutorial utilizes example data provided from the "faahKO" package (usually installed with XCMS). Please verify first that both of these packages are installed on your computer using installed.packages().
The default settings for the LC-MS data preprocessing with xcms
used below
might not be suited for other data sets. For more information on the different
algorithms and their settings are provided in the vignettes from the xcms
package.
The export functionality from xcms
below has been introduced in xcms
version 3.3.3. The code block below evaluates whether the minimal required version of
xcms
is installed and installs the most recent from GitHub otherwise.
if (packageVersion("xcms") < "3.3.3") devtools::install_github("sneumann/xcms")
# Load necessary libraries library(xcms) library(MetaboAnalystR) # Create file path to example data cdfpath <- system.file("cdf", package = "faahKO") cdffiles <- list.files(cdfpath, recursive = TRUE, full.names = TRUE) # Define the group assignment of the samples smp_grp <- rep("WT", length(cdffiles)) smp_grp[grep("ko", basename(cdffiles))] <- "KO" # Define a data.frame with sample descriptions pd <- data.frame(file = cdffiles, sample_group = smp_grp) # Read the files data <- readMSData(cdffiles, pd = new("NAnnotatedDataFrame", pd), mode = "onDisk") # Perform chromatographic peak detection using default settings. data <- findChromPeaks(data, param = MatchedFilterParam()) # Perform the alignment after a first peak grouping. data <- groupChromPeaks(data, param = PeakDensityParam( sampleGroups = data$sample_group)) data <- adjustRtime(data, param = PeakGroupsParam(family = "symmetric")) # Perform the correspondence analysis data <- groupChromPeaks(data, param = PeakDensityParam( sampleGroups = data$sample_group, bw = 10)) # At last filling-in missing peak data. data <- fillChromPeaks(data, param = FillChromPeaksParam()) # Export the feature table in the MetaboAnalyst format. Parameter 'label' # defines the group assignment of the samples. exportMetaboAnalyst(data, file = "met_test1.csv", label = data$sample_group) # Perform data analysis using the MetaboAnalystR package # First step is to create the mSet Object, specifying that the data to be uploaded # is a peak table ("pktable") and that statistical analysis will be performed ("stat"). mSet <- InitDataObjects("pktable", "stat", FALSE) # The second step is to read in the processed data (created above) mSet <- Read.TextData(mSet, "met_test1.csv", "colu", "disc"); # The third step is to perform data processing using MetaboAnalystR (filtering/normalization) mSet <- SanityCheckData(mSet) mSet <- ReplaceMin(mSet); mSet <- FilterVariable(mSet, "iqr", "F", 25) mSet <- PreparePrenormData(mSet) mSet <- Normalization(mSet, "NULL", "LogNorm", "AutoNorm", ratio=FALSE, ratioNum=20) mSet <- PlotNormSummary(mSet, "norm_0_", "png", 72, width=NA) mSet <- PlotSampleNormSummary(mSet, "snorm_0_", "png", 72, width=NA) # The fourth step is to perform fold-change analysis mSet <- FC.Anal.unpaired(mSet, 2.0, 0) mSet <- PlotFC(mSet, "fc_0_", "png", 72, width=NA) # The fifth step is to perform t-test analysis mSet <- Ttests.Anal(mSet, F, 0.05, FALSE, TRUE) mSet <- PlotTT(mSet, "tt_0_", "png", 72, width=NA) # The sixth step is to perform PCA mSet <- PCA.Anal(mSet) mSet <- PlotPCAPairSummary(mSet, "pca_pair_0_", "png", 72, width=NA, 5) mSet <- PlotPCAScree(mSet, "pca_scree_0_", "png", 72, width=NA, 5) mSet <- PlotPCA2DScore(mSet, "pca_score2d_0_", "png", 72, width=NA, 1,2,0.95,1,0) mSet <- PlotPCALoading(mSet, "pca_loading_0_", "png", 72, width=NA, 1,2,"scatter", 1); mSet <- PlotPCABiplot(mSet, "pca_biplot_0_", "png", 72, width=NA, 1,2) mSet <- PlotPCA3DScoreImg(mSet, "pca_score3d_0_", "png", 72, width=NA, 1,2,3, 40) # The seventh step is to perform PLS-DA mSet <- PLSR.Anal(mSet, reg=TRUE) mSet <- PlotPLSPairSummary(mSet, "pls_pair_0_", "png", 72, width=NA, 5) mSet <- PlotPLS2DScore(mSet, "pls_score2d_0_", "png", 72, width=NA, 1,2,0.95,1,0) mSet <- PlotPLS3DScoreImg(mSet, "pls_score3d_0_", "png", 72, width=NA, 1,2,3, 40) mSet <- PlotPLSLoading(mSet, "pls_loading_0_", "png", 72, width=NA, 1, 2,"scatter", 1); mSet <- PLSDA.CV(mSet, "L",5, "Q2") mSet <- PlotPLS.Classification(mSet, "pls_cv_0_", "png", 72, width=NA) mSet <- PlotPLS.Imp(mSet, "pls_imp_0_", "png", 72, width=NA, "vip", "Comp. 1", 15,FALSE) # The last step is to create a summary report of the statistical analysis PreparePDFReport(mSet, "User Name")
Smith, C.A., Want, E.J., O'Maille, G., Abagyan,R., Siuzdak, G. (2006). “XCMS: Processing mass spectrometry data for metabolite profiling using nonlinear peak alignment, matching and identification.” Analytical Chemistry, 78, 779–787.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.