inst/doc/parallel-annotation.R

## ----biocstyle, echo = FALSE, results = "asis"--------------------------------
BiocStyle::markdown()

## ---- echo = FALSE------------------------------------------------------------
knitr::opts_chunk$set(
    collapse = TRUE,
    comment = "#>"
)

## ----init, message = FALSE, echo = FALSE, results = "hide"--------------------
## Silently loading all packages
library(BiocStyle)
library(peakPantheR)
library(faahKO)
library(pander)
library(doParallel)
library(foreach)

## ---- out.width = "700px", echo = FALSE---------------------------------------
knitr::include_graphics("../man/figures/parallelAnnotation.png")

## ---- out.width = "700px", echo = FALSE---------------------------------------
knitr::include_graphics("../man/figures/parallelAnnotation_procedure.png")

## -----------------------------------------------------------------------------
library(faahKO)
## file paths
input_spectraPaths  <- c(system.file('cdf/KO/ko15.CDF', package = "faahKO"),
                        system.file('cdf/KO/ko16.CDF', package = "faahKO"),
                        system.file('cdf/KO/ko18.CDF', package = "faahKO"))
input_spectraPaths

## ---- eval = FALSE------------------------------------------------------------
#  # targetFeatTable
#  input_targetFeatTable <- data.frame(matrix(vector(), 2, 8, dimnames=list(c(),
#                          c("cpdID", "cpdName", "rtMin", "rt", "rtMax", "mzMin",
#                              "mz", "mzMax"))), stringsAsFactors=FALSE)
#  input_targetFeatTable[1,] <- c("ID-1", "Cpd 1", 3310., 3344.888, 3390.,
#                                  522.194778, 522.2, 522.205222)
#  input_targetFeatTable[2,] <- c("ID-2", "Cpd 2", 3280., 3385.577, 3440.,
#                                  496.195038, 496.2, 496.204962)
#  input_targetFeatTable[,c(3:8)] <- sapply(input_targetFeatTable[,c(3:8)],
#                                          as.numeric)

## ---- results = "asis", echo = FALSE------------------------------------------
# use pandoc for improved readability
input_targetFeatTable <- data.frame(matrix(vector(), 2, 8, dimnames=list(c(), 
                        c("cpdID", "cpdName", "rtMin", "rt", "rtMax", "mzMin", 
                        "mz", "mzMax"))), stringsAsFactors=FALSE)
input_targetFeatTable[1,] <- c("ID-1", "Cpd 1", 3310., 3344.888, 3390., 
                                522.194778, 522.2, 522.205222)
input_targetFeatTable[2,] <- c("ID-2", "Cpd 2", 3280., 3385.577, 3440., 
                                496.195038, 496.2, 496.204962)
input_targetFeatTable[,c(3:8)] <- sapply(input_targetFeatTable[,c(3:8)], 
                                        as.numeric)
rownames(input_targetFeatTable) <- NULL
pander::pandoc.table(input_targetFeatTable, digits = 9)

## ---- eval=FALSE--------------------------------------------------------------
#  # spectra Metadata
#  input_spectraMetadata  <- data.frame(matrix(c("sample type 1", "sample type 2",
#                              "sample type 1"), 3, 1,
#                              dimnames=list(c(),c("sampleType"))),
#                              stringsAsFactors=FALSE)

## ---- results = "asis", echo = FALSE------------------------------------------
# use pandoc for improved readability
input_spectraMetadata  <- data.frame(matrix(c("sample type 1", "sample type 2", 
                                            "sample type 1"), 3, 1, 
                                            dimnames=list(c(),c("sampleType"))),
                                    stringsAsFactors=FALSE)
pander::pandoc.table(input_spectraMetadata)

## -----------------------------------------------------------------------------
library(peakPantheR)
init_annotation <- peakPantheRAnnotation(spectraPaths = input_spectraPaths,
                        targetFeatTable = input_targetFeatTable,
                        spectraMetadata = input_spectraMetadata)

## -----------------------------------------------------------------------------
init_annotation

## -----------------------------------------------------------------------------
# annotate files serially
annotation_result <- peakPantheR_parallelAnnotation(init_annotation, ncores=0,
                                                    curveModel='skewedGaussian',
                                                    verbose=TRUE)

# successful fit
nbSamples(annotation_result$annotation)
data_annotation   <- annotation_result$annotation
data_annotation

# list failed fit
annotation_result$failures

## -----------------------------------------------------------------------------
updated_annotation  <- annotationParamsDiagnostic(data_annotation, verbose=TRUE)

# uROI now exist
updated_annotation

## ---- eval=FALSE--------------------------------------------------------------
#  # create a colourScale based on the sampleType
#  uniq_sType <- sort(unique(spectraMetadata(updated_annotation)$sampleType),
#                      na.last=TRUE)
#  col_sType  <- unname( setNames(c('blue', 'red'),
#                  c(uniq_sType))[spectraMetadata(updated_annotation)$sampleType] )
#  
#  # create a temporary location to save the diagnotic (otherwise provide the path
#  # to the selected location)
#  output_folder <- tempdir()
#  
#  # output fit diagnostic to disk
#  outputAnnotationDiagnostic(updated_annotation, saveFolder=output_folder,
#                              savePlots=TRUE, sampleColour=col_sType,
#                              verbose=TRUE, ncores=2)

## ---- results = "asis", echo = FALSE------------------------------------------
# use pandoc for improved readability, display the diagnostic results
tmp_csv <- data.frame(matrix(nrow=2,ncol=21,dimnames=list(c(), c('cpdID', 
        'cpdName', 'X', 'ROI_rt', 'ROI_mz','ROI_rtMin', 'ROI_rtMax', 
        'ROI_mzMin', 'ROI_mzMax', 'X', 'uROI_rtMin', 'uROI_rtMax', 'uROI_mzMin',
        'uROI_mzMax', 'uROI_rt', 'uROI_mz', 'X', 'FIR_rtMin', 'FIR_rtMax', 
        'FIR_mzMin', 'FIR_mzMax'))), stringsAsFactors=FALSE)
tmp_csv[1,] <- c('ID-1','Cpd 1', '|', 3344.888, 522.2, 3310., 3390., 522.194778,
                522.205222,'|', 3305.75893, 3411.436284, 522.194778, 522.205222,
                3344.888, 522.2, '|', 3326.10635, 3407.272648, 522.194778, 
                522.205222)
tmp_csv[2,] <- c('ID-2','Cpd 2', '|', 3385.577, 496.2, 3280., 3440., 496.195038,
                496.204962,'|',3337.376665, 3462.449033, 496.195038, 496.204962,
                3385.577, 496.2, '|', 3365.023857, 3453.404957, 496.195038, 
                496.204962)
tmp_csv[,-c(1,2,3,10,17)]  <- sapply(tmp_csv[,-c(1,2,3,10,17)], as.numeric)
colnames(tmp_csv) <- c('cpdID', 'cpdName', 'X', 'ROI_rt', 'ROI_mz','ROI_rtMin', 
                    'ROI_rtMax', 'ROI_mzMin', 'ROI_mzMax', 'X', 'uROI_rtMin', 
                    'uROI_rtMax', 'uROI_mzMin', 'uROI_mzMax', 'uROI_rt', 
                    'uROI_mz', 'X', 'FIR_rtMin', 'FIR_rtMax', 'FIR_mzMin', 
                    'FIR_mzMax')
pander::pandoc.table(tmp_csv, digits=9)

## ---- out.width = "700px", echo = FALSE---------------------------------------
knitr::include_graphics(
    "../man/figures/parallel_annotation_diagnostic_cpd1.png")

## ---- results="asis", echo=FALSE----------------------------------------------
# Example with constant correction.
rtCorrectionOutput <- retentionTimeCorrection(updated_annotation,
                            rtCorrectionReferences=c('ID-1'),
                            method='constant',
                            robust=FALSE,
                            rtWindowWidth=15,
                            diagnostic=TRUE)
updated_annotation <- rtCorrectionOutput$annotation
# The ggplot2 plot object
rtCorrectionOutput$plot

# Example with second degree polynomial, without using RANSAC
# # to obtain a robust fit
rtCorrectionOutput <- retentionTimeCorrection(updated_annotation,
                            rtCorrectionReferences=NULL,
                            method='polynomial',
                            params=list(polynomialOrder=2),
                            robust=FALSE, rtWindowWidth=15,
                            diagnostic=TRUE)

## ---- eval=FALSE--------------------------------------------------------------
#  update_csv_path <- '/path_to_new_csv/'
#  
#  # load csv
#  new_annotation <- peakPantheR_loadAnnotationParamsCSV(update_csv_path)
#  #> uROIExist set to TRUE
#  #> New peakPantheRAnnotation object initialised for 2 compounds
#  
#  new_annotation
#  #> An object of class peakPantheRAnnotation
#  #>  2 compounds in 0 samples.
#  #>   updated ROI exist (uROI)
#  #>   does not use updated ROI (uROI)
#  #>   does not use fallback integration regions (FIR)
#  #>   is not annotated
#  
#  new_annotation <- resetFIR(new_annotation)
#  #> FIR will be reset with uROI values

## -----------------------------------------------------------------------------
## new files
new_spectraPaths   <- c(system.file('cdf/KO/ko15.CDF', package = "faahKO"),
                        system.file('cdf/WT/wt15.CDF', package = "faahKO"),
                        system.file('cdf/KO/ko16.CDF', package = "faahKO"),
                        system.file('cdf/WT/wt16.CDF', package = "faahKO"),
                        system.file('cdf/KO/ko18.CDF', package = "faahKO"),
                        system.file('cdf/WT/wt18.CDF', package = "faahKO"))

new_spectraPaths

## -----------------------------------------------------------------------------
## new spectra metadata
new_spectraMetadata  <- data.frame(matrix(c("KO", "WT", "KO", "WT", "KO", "WT"),
                                        6, 1, dimnames=list(c(), c("Group"))), 
                                    stringsAsFactors=FALSE)

## ---- results = "asis", echo = FALSE------------------------------------------
# use pandoc for improved readability
new_spectraMetadata  <- data.frame(matrix(c("KO", "WT", "KO", "WT", "KO", "WT"),
                                        6, 1, dimnames=list(c(), c("Group"))), 
                                    stringsAsFactors=FALSE)
pander::pandoc.table(new_spectraMetadata)

## ---- echo=FALSE--------------------------------------------------------------
new_annotation <- resetAnnotation(updated_annotation, 
                                spectraPaths=new_spectraPaths, 
                                spectraMetadata=new_spectraMetadata, 
                                useUROI=TRUE, useFIR=TRUE, verbose=FALSE) 

## ---- eval=FALSE--------------------------------------------------------------
#  ## add new samples to the annotation loaded from csv, useUROI, useFIR
#  
#  new_annotation <- resetAnnotation(new_annotation, spectraPaths=new_spectraPaths,
#                                  spectraMetadata=new_spectraMetadata,
#                                  useUROI=TRUE, useFIR=TRUE)
#  #> peakPantheRAnnotation object being reset:
#  #>   Previous "ROI", "cpdID" and "cpdName" value kept
#  #>   Previous "uROI" value kept
#  #>   Previous "FIR" value kept
#  #>   Previous "cpdMetadata" value kept
#  #>   New "spectraPaths" value set
#  #>   New "spectraMetadata" value set
#  #>   Previous "uROIExist" value kept
#  #>   New "useUROI" value set
#  #>   New "useFIR" value set

## -----------------------------------------------------------------------------
new_annotation

## -----------------------------------------------------------------------------
# annotate files serially
new_annotation_result <- peakPantheR_parallelAnnotation(new_annotation, 
                                                        ncores=0, verbose=FALSE)

# successful fit
nbSamples(new_annotation_result$annotation)

final_annotation      <- new_annotation_result$annotation
final_annotation

# list failed fit
new_annotation_result$failures

## ---- eval=FALSE--------------------------------------------------------------
#  # create a colourScale based on the sampleType
#  uniq_group <- sort(unique(spectraMetadata(final_annotation)$Group),na.last=TRUE)
#  col_group  <- unname( setNames(c('blue', 'red'),
#                      c(uniq_sType))[spectraMetadata(final_annotation)$Group] )
#  
#  # create a temporary location to save the diagnotic (otherwise provide the path
#  # to the selected location)
#  final_output_folder <- tempdir()
#  
#  # output fit diagnostic to disk
#  outputAnnotationDiagnostic(final_annotation, saveFolder=final_output_folder,
#                          savePlots=TRUE, sampleColour=col_group, verbose=TRUE)

## ---- eval=FALSE--------------------------------------------------------------
#  # peakTables for the first sample
#  peakTables(final_annotation)[[1]]

## ---- results = "asis", echo = FALSE------------------------------------------
# use pandoc for improved readability
pander::pandoc.table(peakTables(final_annotation)[[1]])

## ---- eval=FALSE--------------------------------------------------------------
#  # Extract the found peak area for all compounds and all samples
#  annotationTable(final_annotation, column='peakArea')

## ---- results = "asis", echo = FALSE------------------------------------------
# use pandoc for improved readability
pander::pandoc.table(annotationTable(final_annotation, column='peakArea'))

## ---- eval=FALSE--------------------------------------------------------------
#  # create a temporary location to save the diagnotic (otherwise provide the path
#  # to the selected location)
#  final_output_folder <- tempdir()
#  
#  # save
#  outputAnnotationResult(final_annotation, saveFolder=final_output_folder,
#                          annotationName='ProjectName', verbose=TRUE)
#  #> Compound metadata saved at /final_output_folder/ProjectName_cpdMetadata.csv
#  #> Spectra metadata saved at
#  #>     /final_output_folder/ProjectName_spectraMetadata.csv
#  #> Peak measurement "found" saved at /final_output_folder/ProjectName_found.csv
#  #> Peak measurement "rtMin" saved at /final_output_folder/ProjectName_rtMin.csv
#  #> Peak measurement "rt" saved at /final_output_folder/ProjectName_rt.csv
#  #> Peak measurement "rtMax" saved at /final_output_folder/ProjectName_rtMax.csv
#  #> Peak measurement "mzMin" saved at /final_output_folder/ProjectName_mzMin.csv
#  #> Peak measurement "mz" saved at /final_output_folder/ProjectName_mz.csv
#  #> Peak measurement "mzMax" saved at /final_output_folder/ProjectName_mzMax.csv
#  #> Peak measurement "peakArea" saved at
#  #>     /final_output_folder/ProjectName_peakArea.csv
#  #> Peak measurement "maxIntMeasured" saved at
#  #>     /final_output_folder/ProjectName_maxIntMeasured.csv
#  #> Peak measurement "maxIntPredicted" saved at
#  #>     /final_output_folder/ProjectName_maxIntPredicted.csv
#  #> Peak measurement "is_filled" saved at
#  #>     /final_output_folder/ProjectName_is_filled.csv
#  #> Peak measurement "ppm_error" saved at
#  #>     /final_output_folder/ProjectName_ppm_error.csv
#  #> Peak measurement "rt_dev_sec" saved at
#  #>     /final_output_folder/ProjectName_rt_dev_sec.csv
#  #> Peak measurement "tailingFactor" saved at
#  #>     /final_output_folder/ProjectName_tailingFactor.csv
#  #> Peak measurement "asymmetryFactor" saved at
#  #>     /final_output_folder/ProjectName_asymmetryFactor.csv
#  #> Summary saved at /final_output_folder/ProjectName_summary.csv

Try the peakPantheR package in your browser

Any scripts or data that you put into this service are public.

peakPantheR documentation built on Nov. 8, 2020, 6:38 p.m.