inst/doc/SIAMCAT_holdout.R

## ----start, message=FALSE, warning=FALSE--------------------------------------
library(SIAMCAT)

# this is data from Zeller et al., Mol. Syst. Biol. 2014
fn.feat.fr  <-
    'https://www.embl.de/download/zeller/FR-CRC/FR-CRC-N141_tax-ab-specI.tsv'
fn.meta.fr  <-
    'https://www.embl.de/download/zeller/FR-CRC/FR-CRC-N141_metadata.tsv'

# this is the external dataset from Yu et al., Gut 2017
fn.feat.cn  <-
    'https://www.embl.de/download/zeller/CN-CRC/CN-CRC-N128_tax-ab-specI.tsv'
fn.meta.cn  <-
    'https://www.embl.de/download/zeller/CN-CRC/CN-CRC-N128_metadata.tsv'

## ----siamcat_fr---------------------------------------------------------------
# features
# be vary of the defaults in R!!!
feat.fr  <- read.table(fn.feat.fr, sep='\t', quote="",
    check.names = FALSE, stringsAsFactors = FALSE)
# the features are counts, but we want to work with relative abundances
feat.fr.rel <- prop.table(as.matrix(feat.fr), 2)

# metadata
meta.fr  <- read.table(fn.meta.fr, sep='\t', quote="",
    check.names=FALSE, stringsAsFactors=FALSE)

# create SIAMCAT object
siamcat.fr <- siamcat(feat=feat.fr.rel, meta=meta.fr,
    label='Group', case='CRC')

## ----siamcat_cn---------------------------------------------------------------
# features
feat.cn  <- read.table(fn.feat.cn, sep='\t', quote="",
    check.names = FALSE)
feat.cn.rel <- prop.table(as.matrix(feat.cn), 2)

# metadata
meta.cn  <- read.table(fn.meta.cn, sep='\t', quote="",
    check.names=FALSE, stringsAsFactors = FALSE)

# SIAMCAT object
siamcat.cn <- siamcat(feat=feat.cn.rel, meta=meta.cn,
        label='Group', case='CRC')

## ----preprocessing_fr---------------------------------------------------------
siamcat.fr <- filter.features(
    siamcat.fr,
    filter.method = 'abundance',
    cutoff = 0.001,
    rm.unmapped = TRUE,
    verbose=2
)

siamcat.fr <- normalize.features(
    siamcat.fr,
    norm.method = "log.std",
    norm.param = list(log.n0 = 1e-06, sd.min.q = 0.1),
    verbose = 2
)

## ----build_model_fr, results='hide'-------------------------------------------
siamcat.fr <-  create.data.split(
    siamcat.fr,
    num.folds = 5,
    num.resample = 2
)

siamcat.fr <- train.model(
    siamcat.fr,
    method = "lasso"
)

## ----predict_evaluate_fr, results='hide'--------------------------------------
siamcat.fr <- make.predictions(siamcat.fr)

siamcat.fr <-  evaluate.predictions(siamcat.fr)

## ----normalize_cn-------------------------------------------------------------

siamcat.cn <- normalize.features(siamcat.cn,
    norm.param=norm_params(siamcat.fr),
    feature.type='original',
    verbose = 2)


## ----predict_cn, results='hide'-----------------------------------------------
siamcat.cn <- make.predictions(
    siamcat = siamcat.fr,
    siamcat.holdout = siamcat.cn,
    normalize.holdout = FALSE)

## ----alternative_pipeline_cn, eval=FALSE--------------------------------------
#  ## Alternative Code, not run here
#  siamcat.cn <- siamcat(feat=feat.cn.rel, meta=meta.cn,
#      label='Group', case='CRC')
#  siamcat.cn <- make.predictions(siamcat = siamcat.fr,
#      siamcat.holdout = siamcat.cn,
#      normalize.holdout = TRUE)

## ----eval_cn, message=FALSE---------------------------------------------------
siamcat.cn <- evaluate.predictions(siamcat.cn)

## ----eval_plot, eval=FALSE----------------------------------------------------
#  model.evaluation.plot('FR-CRC'=siamcat.fr,
#      'CN-CRC'=siamcat.cn,
#      colours=c('dimgrey', 'orange'))

## ----session_info-------------------------------------------------------------
sessionInfo()

Try the SIAMCAT package in your browser

Any scripts or data that you put into this service are public.

SIAMCAT documentation built on Nov. 8, 2020, 5:14 p.m.