knitr::opts_chunk$set(echo = TRUE)
The batch effect correction analysis module performs an automatic or specific correction on the peak data table generated by peak picking step. Multiple correction methods have been embeded inside. They include combat, WaveICA, EigenMS, QC_RLSC, ANCOVA, RUV_random, RUV_2, RUVseq series (RUV_s, RUV_r and RUV_g), NOMIS, CCMN. The detailed limitation and mathmatic mechanism of different methods have been illustrated in our manuscript. Here, two cases will be provided to show users a step to step workflow for this moduel.
The peak table generated by FormatPeakList function needs to be manually prepared by hand to supplements the corresponding information below.
·1st column The samples name;
·2nd column Sample groups/classes information, if there are any QC samples, please mark them as QC;
·3rd column Sample injection order, if no related information, please leave them empty;
·4th column Batch information, please provide consistent characters within the same batch;
·other column Features' intensity should be provided. If any internal standards, please mark the feature with "IS".
·NOTE: Please provide as more information as possible. If some information ommitted, please leave the columns empty.
library(knitr) d<-kable(read.csv("vignette_figures/bc.csv",header = T)) d
# Use Google API for data downloading here. # Please "install.packages('googledrive')" and "install.packages('httpuv')"first. library(googledrive); temp <- tempfile(fileext = ".csv") # Please authorize your google account to access the data dl <- drive_download( as_id("1vObn7HfgtZ2E6tPOyQyxRLineUZB1zeG"), path = temp, overwrite = TRUE)
cat('File downloaded: * BC_example.csv Saved locally as: * /tmp/RtmpIBzO2x/file11c61acc1747.csv')
# Load the MetaboAnalystR package library("MetaboAnalystR")
mSet <- InitDataObjects("pktable", "utils", FALSE)
cat('[1] "MetaboAnalyst R objects initialized ..."')
## we set samples in "row" according to the table format. If your samples are in column, set it as "col". mSet <- Read.BatchCSVdata(mSet, dl$local_path, "row")
mSet <- PerformBatchCorrection(mSet)
cat('[1] "Correcting using the Automatically method !" [1] "Correcting with Combat..." Standardizing Data across genes [1] "Correcting with WaveICA..." [1] "Correcting with EigenMS..." [1] "Correcting with QC-RLSC..." [1] "Correcting with ANCOVA..." [1] "Correcting with RUV-random..." [1] "Correcting with RUV-2..." [1] "Correcting with RUVs..." [1] "Correcting with RUVr..." Loading required package: edgeR Loading required package: limma [1] "Correcting with RUVg..." [1] "Correcting with NOMIS..." [1] "Correcting with CCMN..." [1] "Best results generated by combat !"')
# Show the distances between batches mSet$dataSet$interbatch_dis
cat('combat_edata WaveICA_edata EigenMS_edata QC_RLSC_edata ANCOVA_edata RUV_random_edata RUV_2_edata RUV_s_edata RUV_r_edata 9.367183 10.099356 13.105851 10.978753 11.470403 10.163140 10.842479 22.628060 11.146660 RUV_g_edata NOMIS_edata CCMN_edata 11.018191 11.196554 11.104648 ')
knitr::include_graphics("vignette_figures/Example1_BC PCAdpi300.png",dpi = 480) knitr::include_graphics("vignette_figures/Example1_BC Trenddpi300.png",dpi = 480)
This Benchmark data is a large batch data with over 600 samples. The data file size is over 70 MB. This part is designed for repeating our published results. Running of this part may take over 30min to finish. You are strongly recommonded to use your own data instead of this large file to avoid the consuming of patience.
# Use Google API for data downloading peak feature here. # Please "install.packages('googledrive')" and "install.packages('httpuv')"first. library(googledrive); temp <- tempfile(fileext = ".csv") # Please authorize your google account to access the data dl1 <- drive_download( as_id("1wEh2P81J_xFWJs5y4mq98-FsjxJ5wmBO"), path = temp, overwrite = TRUE)
# Use Google API for data downloading meta data here. # Please "install.packages('googledrive')" and "install.packages('httpuv')"first. library(googledrive); temp <- tempfile(fileext = ".csv") # Please authorize your google account to access the data dl2 <- drive_download( as_id("1IOF2pCYrd0sbmFt3x9XOcwYj6XEA2z1n"), path = temp, overwrite = TRUE)
cat('File downloaded: * metaboanalyst_input.csv Saved locally as: * /tmp/RtmpIBzO2x/file11c64ad429b2.csv')
# Load the MetaboAnalystR package library("MetaboAnalystR")
mSet<-InitDataObjects("pktable", "stat", FALSE)
cat('Starting Rserve: /usr/lib/R/bin/R CMD /home/xialab/R/x86_64-pc-linux-gnu-library/3.6/Rserve/libs//Rserve --no-save R version 3.6.3 (2020-02-29) -- "Holding the Windsock" Copyright (C) 2020 The R Foundation for Statistical Computing Platform: x86_64-pc-linux-gnu (64-bit) R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions.', "Type 'license()' or 'licence()' for distribution details. Natural language support but running in an English locale R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R.", '##> SOCK_ERROR: bind error #98(address already in use) Rserv started in daemon mode. [1] "MetaboAnalyst R objects initialized ..."')
mSet<-Read.TextData(mSet, dl1$local_path, "col", "disc")
mSet<-SanityCheckData(mSet) mSet<-ReplaceMin(mSet); mSet<-FilterVariable(mSet, "iqr", "F", 25) mSet<-PreparePrenormData(mSet) mSet<-Normalization(mSet, "MedianNorm", "LogNorm", "NULL", ratio=FALSE, ratioNum=20)
cat('[1] "Successfully passed sanity check!" [2] "Samples are not paired." [3] "4 groups were detected in samples." [4] "Only English letters, numbers, underscore, hyphen and forward slash (/) are allowed." [5] "<font color=\"orange\">Other special characters or punctuations (if any) will be stripped off.</font>" [6] "All data values are numeric." [7] "A total of 744120 (14.4%) missing values were detected." [8] "<u>By default, these values will be replaced by a small value.</u>" [9] "Click <b>Skip</b> button if you accept the default practice" [10] "Or click <b>Missing value imputation</b> to use other methods" ')
cat('[1] " Further feature filtering based on Interquantile Range Reduced to 5000 features based on Interquantile Range" [1] " Further feature filtering based on Interquantile Range Reduced to 5000 features based on Interquantile Range"')
### Data Orgnization normalized_set <- mSet[["dataSet"]][["norm"]] ordered_normalized_set <- normalized_set[ order(row.names(normalized_set)), ] ### import metadata meta_data <- read.csv(dl2$local_path) new_normalized_set <- cbind(meta_data[,2:4], ordered_normalized_set); write.csv(new_normalized_set,file = "new_normalized_set.csv")
#perform PCA mSet <- PCA.Anal(mSet) mSet <- PlotPCAPairSummary(mSet, "pca_pair_0_", "png", 72, width=NA, 5) mSet <- PlotPCAScree(mSet, "pca_scree_0_", "png", 72, width=NA, 5) mSet <- PlotPCA2DScore(mSet, "pca_score2d_0_", "png", 72, width=NA,style = 2, 1,2,0.95,0,0)
knitr::include_graphics("vignette_figures/IBD_BC_pca_score2d_0_dpi72.png",dpi = 120)
mSet <- InitDataObjects("pktable", "utils", FALSE)
cat('[1] "MetaboAnalyst R objects initialized ..."')
## we set samples in "row" according to the table format. If your samples are in column, set it as "col". mSet <- Read.BatchCSVdata(mSet, "new_normalized_set.csv", "row")
mSet <- PerformBatchCorrection(mSet)
cat('[1] "Correcting using the Automatically method !" [1] "Correcting with Combat..." Standardizing Data across genes [1] "Correcting with WaveICA..." [1] "Correcting with EigenMS..." [1] "Correcting with QC-RLSC..." Cluster size 2216 broken into 551 1665 Done cluster 551 Cluster size 1665 broken into 1025 640 Done cluster 1025 Done cluster 640 Done cluster 1665 [1] "Correcting with ANCOVA..." [1] "Best results generated by EigenMS !"')
# Show the distances between batches mSet$dataSet$interbatch_dis
cat('combat_edata WaveICA_edata EigenMS_edata QC_RLSC_edata ANCOVA_edata 39.42686 38.32145 24.18179 36.58138 38.38796 ')
knitr::include_graphics("vignette_figures/Example2_image_BC PCAdpi300.png",dpi = 480) knitr::include_graphics("vignette_figures/Example2_image_BC Trenddpi300.png",dpi = 480)
## remove the batch column in the corrected result data_corrected <- read.csv("/home/xialab/Documents/MetaboAnalyst_batch_data.csv") data_corrected_new <- data_corrected[,-3] write.csv(data_corrected_new,file = "MetaboAnalyst_batch_data_stats.csv",row.names = F)
mSet<-InitDataObjects("pktable", "stat", FALSE) mSet<-Read.TextData(mSet, "MetaboAnalyst_batch_data_stats.csv", "row", "disc") mSet<-SanityCheckData(mSet) mSet<-ReplaceMin(mSet); mSet<-FilterVariable(mSet, "iqr", "F", 25) mSet<-PreparePrenormData(mSet) mSet<-Normalization(mSet, "NULL", "NULL", "NULL", ratio=FALSE, ratioNum=20) # No need to normalize again mSet <- PCA.Anal(mSet) mSet <- PlotPCAPairSummary(mSet, "pca_pair_0_", "png", 72, width=NA, 5) mSet <- PlotPCAScree(mSet, "pca_scree_0_", "png", 72, width=NA, 5) mSet <- PlotPCA2DScore(mSet, "pca_score2d_0_", "png", 72, width=NA, style = 2, 1,2,0.95,0,0)
knitr::include_graphics("vignette_figures/IBD_BCfree_pca_score2d_0_dpi72.png",dpi = 120)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.