inst/doc/optimalFlow_vignette.R

## ----setup, include=FALSE-----------------------------------------------------
knitr::opts_chunk$set(echo = TRUE)

## ----ej00, eval = FALSE-------------------------------------------------------
#  if (!requireNamespace("BiocManager", quietly = TRUE))
#      install.packages("BiocManager")
#  
#  BiocManager::install("optimalFLow")

## ----ej0, echo = TRUE, message = FALSE----------------------------------------
library(optimalFlowData)
library(optimalFlow)
library(ellipse)

## ----ej1, echo = TRUE---------------------------------------------------------
database <- buildDatabase(
 dataset_names = paste0('Cytometry', c(2:5, 7:9, 12:17, 19, 21)),
   population_ids = c('Monocytes', 'CD4+CD8-', 'Mature SIg Kappa', 'TCRgd-'))

## ----ej1110, eval = FALSE-----------------------------------------------------
#  templates.optimalFlow <-
#    optimalFlowTemplates(
#      database = database
#      )

## ----ej111, echo = TRUE-------------------------------------------------------
templates.optimalFlow <-
  optimalFlowTemplates(
    database = database, templates.number = 5, cl.paral = 1
    )

## ----ej1112, echo = TRUE------------------------------------------------------
length(templates.optimalFlow$templates) # The number of clusters, and, hence, of templates 
length(templates.optimalFlow$templates[[1]]) # The number of elements of the first template, it contains four cell types
templates.optimalFlow$templates[[1]][[1]] # The first element of the first template

## ----ej1113, echo = TRUE------------------------------------------------------
templates.optimalFlow$clustering

## ----ej1114, echo = TRUE------------------------------------------------------
length(templates.optimalFlow$database.elliptical) # the number of elements in the database
length(templates.optimalFlow$database.elliptical[[1]]) # the number of cell types in the first element of the database
templates.optimalFlow$database.elliptical[[1]][[1]] # the parameters corresponding to the first cell type in the first cytometry of the database 

## ----ej2, echo = TRUE---------------------------------------------------------
cytoPlotDatabase(templates.optimalFlow$database.elliptical[which(templates.optimalFlow$clustering == 3)], dimensions = c(4,3), xlim = c(0, 8000), ylim = c(0, 8000), xlab = "", ylab = "")

## ----ej22, eval = FALSE-------------------------------------------------------
#  cytoPlotDatabase3d(templates.optimalFlow$database.elliptical[which(templates.optimalFlow$clustering == 3)], dimensions = c(4, 3, 9), xlim = c(0, 8000), ylim = c(0, 8000), zlim = c(0, 8000))

## ----ej3, echo = TRUE---------------------------------------------------------
cytoPlot(templates.optimalFlow$templates[[3]], dimensions = c(4,3), xlim = c(0, 8000), ylim = c(0, 8000), xlab = "", ylab = "")

## ----ej32, eval = FALSE-------------------------------------------------------
#  cytoPlot3d(templates.optimalFlow$templates[[3]], dimensions = c(4, 3, 9), xlim = c(0, 8000), ylim = c(0, 8000), zlim = c(0, 8000))

## ----ej4, echo = TRUE---------------------------------------------------------
cytoPlotDatabase(templates.optimalFlow$database.elliptical[which(templates.optimalFlow$clustering == 3)], dimensions = c(4,3), xlim = c(0, 8000), ylim = c(0, 8000), xlab = "", ylab = "", colour = FALSE)

## ----ej42, eval = FALSE-------------------------------------------------------
#  cytoPlotDatabase3d(templates.optimalFlow$database.elliptical[which(templates.optimalFlow$clustering == 3)], dimensions = c(4, 3, 9), xlim = c(0, 8000), ylim = c(0, 8000), zlim = c(0, 8000), colour = FALSE)

## ----ej5, echo = TRUE---------------------------------------------------------
templates.optimalFlow.barycenter <- 
  optimalFlowTemplates(
    database = database, templates.number = 5, consensus.method = "k-barycenter",
    barycenters.number = 4, bar.repetitions = 10, alpha.bar = 0.05, cl.paral = 1
    )

## ----ej52, echo = TRUE--------------------------------------------------------
templates.optimalFlow.hdbscan <- 
  optimalFlowTemplates(
    database = database, templates.number = 5, consensus.method = "hierarchical",
    cl.paral = 1
    )

## ----ej6, echo = TRUE---------------------------------------------------------
cytoPlot(templates.optimalFlow.barycenter$templates[[3]], dimensions = c(4,3), xlim = c(0, 8000), ylim = c(0, 8000), xlab = "", ylab = "")

## ----ej62, eval = FALSE-------------------------------------------------------
#  cytoPlot3d(templates.optimalFlow.barycenter$templates[[3]], dimensions = c(4, 3, 9), xlim = c(0, 8000), ylim = c(0, 8000), zlim = c(0, 8000))

## ----ej7, echo=TRUE-----------------------------------------------------------
cytoPlot(templates.optimalFlow.hdbscan$templates[[3]], dimensions = c(4,3), xlim = c(0, 8000), ylim = c(0, 8000), xlab = "", ylab = "")

## ----ej72, eval = FALSE-------------------------------------------------------
#  cytoPlot3d(templates.optimalFlow.hdbscan$templates[[3]], dimensions = c(4, 3, 9), xlim = c(0, 8000), ylim = c(0, 8000), zlim = c(0, 8000))

## ----ej77, echo = TRUE--------------------------------------------------------
templates.optimalFlow.unsup <-
  optimalFlowTemplates(
    database = database, hclust.method = "hdbscan", cl.paral = 1, consensus.method = "hierarchical"
    )
print(templates.optimalFlow.unsup$clustering)
print(templates.optimalFlow$clustering)
cytoPlot(templates.optimalFlow.unsup$templates[[5]], dimensions = c(4,3), xlim = c(0, 8000), ylim = c(0, 8000), xlab = "", ylab = "")

## ----ej80, echo = TRUE--------------------------------------------------------
test.cytometry <- Cytometry1[which(match(Cytometry1$`Population ID (name)`, c("Monocytes", "CD4+CD8-", "Mature SIg Kappa", "TCRgd-"), nomatch = 0) > 0), ]

## ----ej8, echo = TRUE---------------------------------------------------------
classification.optimalFlow <- 
  optimalFlowClassification(
    test.cytometry[, 1:10], database, templates.optimalFlow, 
    consensus.method = "pooling", cl.paral = 1
    )

## ----ej82, echo = TRUE--------------------------------------------------------
head(classification.optimalFlow$cluster)
table(classification.optimalFlow$cluster)

## ----ej83, echo = TRUE--------------------------------------------------------
length(classification.optimalFlow$clusterings)
table(classification.optimalFlow$clusterings[[1]]$cluster)

## ----ej84, echo = TRUE--------------------------------------------------------
classification.optimalFlow$assigned.template.index
templates.optimalFlow$clustering

## ----ej85, echo = TRUE--------------------------------------------------------
scoreF1.optimalFlow <- optimalFlow::f1Score(classification.optimalFlow$cluster,
                                            test.cytometry, noise.types)
print(scoreF1.optimalFlow)

## ----ej9, echo = TRUE---------------------------------------------------------
classification.optimalFlow.barycenter <-
  optimalFlowClassification(
    test.cytometry[, 1:10],
    database, templates.optimalFlow.barycenter, consensus.method = "k-barycenter", cl.paral = 1
    )

## ----ej92, echo = TRUE--------------------------------------------------------
table(classification.optimalFlow.barycenter$cluster)
classification.optimalFlow.barycenter$cluster.vote

## ----ej922, echo = TRUE-------------------------------------------------------
classification.optimalFlow.barycenter$assigned.template.index
templates.optimalFlow.barycenter$clustering

## ----ej93, echo = TRUE--------------------------------------------------------
scoreF1.optimalFlow.barycenter <- 
  f1ScoreVoting(
    classification.optimalFlow.barycenter$cluster.vote, classification.optimalFlow.barycenter$cluster,
    test.cytometry,
    1.01, noise.types
    )
print(scoreF1.optimalFlow.barycenter$F1_score)

## ----ej10, echo = TRUE--------------------------------------------------------
classification.optimalFlow.hdbscan <-
  optimalFlowClassification(
    test.cytometry[, 1:10],
    database, templates.optimalFlow.hdbscan, consensus.method = "hierarchical", cl.paral = 1
    )
table(classification.optimalFlow.hdbscan$cluster)
classification.optimalFlow.hdbscan$cluster.vote
classification.optimalFlow.hdbscan$assigned.template.index
templates.optimalFlow.hdbscan$clustering
scoreF1.optimalFlow.hdbscan <-
  f1ScoreVoting(
    classification.optimalFlow.hdbscan$cluster.vote, classification.optimalFlow.hdbscan$cluster,
   test.cytometry,
    1.01, noise.types
  )
print(scoreF1.optimalFlow.hdbscan$F1_score)

## ----ej11, echo = TRUE--------------------------------------------------------
classification.optimalFlow.2 <-
  optimalFlowClassification(
    test.cytometry[, 1:10],
    database, templates.optimalFlow, consensus.method = "pooling", classif.method = "matching",
    cost.function = "ellipses", cl.paral = 1
    )
table(classification.optimalFlow.2$cluster)
table(classification.optimalFlow.2$clusterings[[1]]$cluster)
classification.optimalFlow.2$cluster.vote
classification.optimalFlow.2$assigned.template.index
templates.optimalFlow$clustering
scoreF1.optimalFlow.2 <-
  f1ScoreVoting(
    classification.optimalFlow.2$cluster.vote, classification.optimalFlow.2$cluster,
    test.cytometry,
    1.01, noise.types
    )
print(scoreF1.optimalFlow.2$F1_score)

## ----ej12, echo = TRUE--------------------------------------------------------
classification.optimalFlow.3 <-
  optimalFlowClassification(
    test.cytometry[, 1:10],
    database, templates.optimalFlow, consensus.method = "pooling",
    classif.method = "random forest", cl.paral = 1
    )
table(classification.optimalFlow.3$cluster)
classification.optimalFlow.3$assigned.template.index # the cytometry used for learning belongs to the cluster labelled as 1 and is the first of the cytometries in that cluster, hence it is the first cytometry in the database.
templates.optimalFlow$clustering
scoreF1.optimalFlow.3 <-
  optimalFlow::f1Score(classification.optimalFlow.3$cluster,
    test.cytometry,
    noise.types
    )
print(scoreF1.optimalFlow.3)

Try the optimalFlow package in your browser

Any scripts or data that you put into this service are public.

optimalFlow documentation built on Nov. 8, 2020, 6:59 p.m.