knitr::opts_chunk$set(fig.width=10, fig.height=6, echo = TRUE, results = TRUE, warning = FALSE, message=FALSE) options(repos=structure(c(CRAN="https://mirror.ibcp.fr/pub/CRAN/"))) setwd("~/Documents/12 - Bacillus cereus/Bacillus_cereus_media_experiment/")
RomicsProcessor is loaded.
#load RomicsProcessor library("RomicsProcessor")
A maxQuant search was performed, iBAQ intensities are imported using the function extractMaxQuant() from RomicsProcessor. Simple import functions similar to this one can be created for other data types. 'extractMaxQuant()' enables to extract only a specific quantitation type from a maxQuant file (here, the iBAQ intensities from the proteinGroups.txt file generated by maxQuant and to format it appropriately for RomicsProcessor. The "iBAQ." is removed from the columns names and the corresponding metadata is loaded.
#read data data<-extractMaxQuant(file= "proteinGroups.txt",quantification_type="iBAQ", cont.rm=TRUE,site.rm=TRUE, rev.rm=TRUE) #remove the iBAQ. from the colname as it is missing in the metadata colname colnames(data)<-sub("iBAQ.","",colnames(data)) #export data as csv file write.csv(data,file="unprocessed_data.csv") #read metadata.csv file metadata<-read.csv("metadata.csv")
The data and the metadata are then placed in an romics_object named "romics_proteins".
romics_proteins<-romicsCreateObject(data,metadata)
A second version of this romics_object that will be kept unprocessed is created ("unprocessed_romics_proteins").
unprocessed_romics_proteins<-romics_proteins
The Zeros of the romics_proteins object are replaced by missing values. The missingness is plotted for each sample.
#replace missing values by NA romics_proteins<-romicsZeroToMissing(romics_proteins) #plot missingness in each sample romicsPlotMissing(romics_proteins)
Only the proteins with a completeness of 70% in at least one factor (here "media") were conserved for quantification.
#filter with a minimum of 70% completness for each cell type romics_proteins<-romicsFilterMissing(romics_proteins, percentage_completeness=70) #plot this missingness after filtering romicsPlotMissing(romics_proteins)
The data is log2 transformed and the distribution boxplot are plotted for each sample as well as the overall frequency distribution of the data (barplot).
#log2 transform the data romics_proteins<-log2transform(romics_proteins) #plot the distribution for the differents samples distribBoxplot(romics_proteins) #plot the data overal distribution distribHistogramGlobal(romics_proteins)
The log transformed intensities are then median centered (RomicsProcessor is able to automatically recognize if an object has previously be log transformed). The normalized boxplot is plotted.
#median center the data romics_proteins<-medianCenterSample(romics_proteins) #plot the distribution for the differents samples distribBoxplot(romics_proteins)
A hierarchical clustering is performed to verify if the samples are clustering by factor
#Display hclust romicsHclust(romics_proteins)
The data contained missing values, in order to perform certain statistics we decided to impute the data using the method described in the paper by Tyranova et al. in 2016 (Nature Methods, PMID: 27348712). The distribution of the imputed data is shown in yellow, the data is in gray.
#the missingvalue imputation is evaluated and plotted imputeMissingEval(romics_proteins,bin=0.5, nb_stdev = 1.8, scale_x=c(-10,10),width_stdev = 0.5) #the missingvalue imputation from Tyranova et al. was applied romics_proteins<-imputeMissing(romics_proteins, nb_stdev = 1.8,width_stdev = 0.5, seed = 42)
A Principal component analysis is performed after this imputation, various plots are displayed.
#store PCA results in an object called pca_results pca_results<-romicsPCA(romics_proteins) #3 PCA plots were plotted : one showing the explained variance for each component, a 2D PCA plot for the component 1 and 2 and a 3D pca plot indPCAplot(romics_proteins,plotType="percentage", label = FALSE) indPCAplot(romics_proteins,plotType="individual", label = FALSE) indPCA3D(romics_proteins)
The T.tests and ANOVA pvalues are calculated using the "media"" as factor (as media is the main factor of romics_proteins it was not necessary to indicate the factor)
romics_proteins<-romicsTtest(romics_proteins) romics_proteins<-romicsANOVA(romics_proteins)
The Mean and stdev by group were added to the statistics layer. The Z-scores were also added to the statistics layer.
romics_proteins<-romicsMean(romics_proteins) romics_proteins<-romicsSd(romics_proteins) romics_proteins<-romicsZscores(romics_proteins)
The heatmap for the proteins with an adjusted pvalue <0.05 was plotted
romicsHeatmap(romics_proteins, variable_hclust_number=8,ANOVA_filter = "padj",p=0.05)
To capture the clusters displayed on the heatmap, the variable hierachical clustering is recorded in the stat layer of the romics_object
romics_proteins<-romicsVariableHclust(romics_proteins, clusters=8, ANOVA_filter="padj", p=0.05, plot=FALSE)
The abundance of few proteins of interest were plotted by factor
#components of the two most important known enterotoxins responsible for the diarrheal syndrome associated with B. cereus #Hbl p1<-singleVariablePlot(romics_proteins, variable="Q7BYC6", type = "jb", factor="main", limits = c(-10,7), title="Hemolysin BL lytic component L1") p2<-singleVariablePlot(romics_proteins, variable="Q7BYC7", type = "jb", factor="main", limits = c(-10,5), title="Hemolysin BL lytic component L2") #Nhe p3<-singleVariablePlot(romics_proteins, variable="Q81EZ7", type = "jb", factor="main", limits = c(-2.5,2.5), title="Non-hemolytic enterotoxin lytic component L1") p4<-singleVariablePlot(romics_proteins, variable="Q81EZ8", type = "jb", factor="main", limits = c(-5,5),title="Non-hemolytic enterotoxin lytic component L2") grid.arrange(p1,p2,p3,p4,ncol=2) #Sporulation related proteins p1 <- singleVariablePlot(romics_proteins, variable="P0A4I3", type = "jb", factor="main", limits = c(-2,6),title="Stage 0 sporulation protein A ") p2 <- singleVariablePlot(romics_proteins, variable="Q81DD1", type = "jb", factor="main", limits = c(-10,0),title="Stage II sporulation protein SA") p3 <- singleVariablePlot(romics_proteins, variable="Q819B3", type = "jb", factor="main", limits = c(-10,6),title="Anti-sigma F factor") grid.arrange(p1,p2,p3,ncol=2)
Using the romicsSteps function we identify the transformative functions that were applied to the romics_proteins object. RomicsProcessor is able to record both the functions applied but also to store the user inputed parameters.
romicsSteps(romics_proteins, show_dates = TRUE, show_details = TRUE)
A pipeline (named romics_pipeline) was created from these steps
romics_pipeline<-romicsCreatePipeline(romics_proteins)
The created pipeline was applied to the unprocessed_romics_proteins object
reprocessed_romics_proteins<-romicsApplyPipeline(unprocessed_romics_proteins, romics_pipeline)
This step verify if the data re-processed using the pipeline is identical to the one generated by the code above
processed_romics_proteins<-romics_proteins if(sum(processed_romics_proteins$data==reprocessed_romics_proteins$data)== nrow(processed_romics_proteins$data)*ncol(processed_romics_proteins$data)){ print("The outcome of the reprocessing gave EXACTLY THE SAME RESULTS as the manually processed data!")}else { print("The outcome of the reprocessing gave DIFFERENT results than the manually processed data") }
The unprocessed_romics_proteins, the processed romics_proteins and the romics_pipeline are exported as .rda files
save(data, file = "Example_romics_data.rda") save(metadata, file = "Example_romics_metadata.rda") save(unprocessed_romics_proteins, file = "Example_unprocessed_romics_proteins.rda") save(processed_romics_proteins, file = "Example_processed_romics_proteins.rda") save(romics_pipeline, file = "Example_romics_pipeline.rda")
The data was exported in a .csv file to create an excel table formated for publication
df<-romicsExportData(romics_proteins, statistics = TRUE) write.csv(df,"b_cereus_processed_data_with_statistics.csv")
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.