Nothing
## ----setup, include=FALSE-----------------------------------------------------
knitr::opts_chunk$set(echo = TRUE)
## ---- out.width="190px", echo=FALSE-------------------------------------------
knitr::include_graphics("diffcyt.png")
## ---- eval=FALSE--------------------------------------------------------------
# # Install Bioconductor installer from CRAN
# install.packages("BiocManager")
#
# # Install 'diffcyt' package from Bioconductor
# BiocManager::install("diffcyt")
## ---- eval=FALSE--------------------------------------------------------------
# BiocManager::install("HDCytoData")
# BiocManager::install("CATALYST")
## -----------------------------------------------------------------------------
suppressPackageStartupMessages(library(HDCytoData))
# Download and load 'Bodenmiller_BCR_XL' dataset in 'flowSet' format
d_flowSet <- Bodenmiller_BCR_XL_flowSet()
suppressPackageStartupMessages(library(flowCore))
# check data format
d_flowSet
# sample names
pData(d_flowSet)
# number of cells
fsApply(d_flowSet, nrow)
# dimensions
dim(exprs(d_flowSet[[1]]))
# expression values
exprs(d_flowSet[[1]])[1:6, 1:5]
## ---- eval=FALSE--------------------------------------------------------------
# # Alternatively: load data from '.fcs' files
# files <- list.files(
# path = "path/to/files", pattern = "\\.fcs$", full.names = TRUE
# )
# d_flowSet <- read.flowSet(
# files, transformation = FALSE, truncate_max_range = FALSE
# )
## -----------------------------------------------------------------------------
# Meta-data: experiment information
# check sample order
filenames <- as.character(pData(d_flowSet)$name)
# sample information
sample_id <- gsub("^PBMC8_30min_", "", gsub("\\.fcs$", "", filenames))
group_id <- factor(
gsub("^patient[0-9]+_", "", sample_id), levels = c("Reference", "BCR-XL")
)
patient_id <- factor(gsub("_.*$", "", sample_id))
experiment_info <- data.frame(
group_id, patient_id, sample_id, stringsAsFactors = FALSE
)
experiment_info
# Meta-data: marker information
# source: Bruggner et al. (2014), Table 1
# column indices of all markers, lineage markers, and functional markers
cols_markers <- c(3:4, 7:9, 11:19, 21:22, 24:26, 28:31, 33)
cols_lineage <- c(3:4, 9, 11, 12, 14, 21, 29, 31, 33)
cols_func <- setdiff(cols_markers, cols_lineage)
# channel and marker names
channel_name <- colnames(d_flowSet)
marker_name <- gsub("\\(.*$", "", channel_name)
# marker classes
# note: using lineage markers for 'cell type', and functional markers for
# 'cell state'
marker_class <- rep("none", ncol(d_flowSet[[1]]))
marker_class[cols_lineage] <- "type"
marker_class[cols_func] <- "state"
marker_class <- factor(marker_class, levels = c("type", "state", "none"))
marker_info <- data.frame(
channel_name, marker_name, marker_class, stringsAsFactors = FALSE
)
marker_info
## -----------------------------------------------------------------------------
suppressPackageStartupMessages(library(diffcyt))
# Create design matrix
# note: selecting columns containing group IDs and patient IDs (for an
# unpaired dataset, only group IDs would be included)
design <- createDesignMatrix(
experiment_info, cols_design = c("group_id", "patient_id")
)
## -----------------------------------------------------------------------------
# Create contrast matrix
contrast <- createContrast(c(0, 1, rep(0, 7)))
# check
nrow(contrast) == ncol(design)
data.frame(parameters = colnames(design), contrast)
## -----------------------------------------------------------------------------
# Test for differential abundance (DA) of clusters
# note: using default method 'diffcyt-DA-edgeR' and default parameters
# note: include random seed for reproducible clustering
out_DA <- diffcyt(
d_input = d_flowSet,
experiment_info = experiment_info,
marker_info = marker_info,
design = design,
contrast = contrast,
analysis_type = "DA",
seed_clustering = 123
)
# display table of results for top DA clusters
topTable(out_DA, format_vals = TRUE)
# calculate number of significant detected DA clusters at 10% false discovery
# rate (FDR)
threshold <- 0.1
res_DA_all <- topTable(out_DA, all = TRUE)
table(res_DA_all$p_adj <= threshold)
## ---- fig.show='hide'---------------------------------------------------------
# Test for differential states (DS) within clusters
# note: using default method 'diffcyt-DS-limma' and default parameters
# note: include random seed for reproducible clustering
out_DS <- diffcyt(
d_input = d_flowSet,
experiment_info = experiment_info,
marker_info = marker_info,
design = design,
contrast = contrast,
analysis_type = "DS",
seed_clustering = 123,
plot = FALSE
)
# display table of results for top DS cluster-marker combinations
topTable(out_DS, format_vals = TRUE)
# calculate number of significant detected DS cluster-marker combinations at
# 10% false discovery rate (FDR)
threshold <- 0.1
res_DS_all <- topTable(out_DS, all = TRUE)
table(res_DS_all$p_adj <= threshold)
## -----------------------------------------------------------------------------
# Prepare data
d_se <- prepareData(d_flowSet, experiment_info, marker_info)
## -----------------------------------------------------------------------------
# Transform data
d_se <- transformData(d_se)
## -----------------------------------------------------------------------------
# Generate clusters
# note: include random seed for reproducible clustering
d_se <- generateClusters(d_se, seed_clustering = 123)
## -----------------------------------------------------------------------------
# Calculate cluster cell counts
d_counts <- calcCounts(d_se)
# Calculate cluster medians
d_medians <- calcMedians(d_se)
## -----------------------------------------------------------------------------
# Test for differential abundance (DA) of clusters
res_DA <- testDA_edgeR(d_counts, design, contrast)
# display table of results for top DA clusters
topTable(res_DA, format_vals = TRUE)
# calculate number of significant detected DA clusters at 10% false discovery
# rate (FDR)
threshold <- 0.1
table(topTable(res_DA, all = TRUE)$p_adj <= threshold)
## ---- fig.show='hide'---------------------------------------------------------
# Test for differential states (DS) within clusters
res_DS <- testDS_limma(d_counts, d_medians, design, contrast, plot = FALSE)
# display table of results for top DS cluster-marker combinations
topTable(res_DS, format_vals = TRUE)
# calculate number of significant detected DS cluster-marker combinations at
# 10% false discovery rate (FDR)
threshold <- 0.1
table(topTable(res_DS, all = TRUE)$p_adj <= threshold)
## -----------------------------------------------------------------------------
# Output object from 'diffcyt()' wrapper function
names(out_DA)
dim(out_DA$d_se)
rowData(out_DA$d_se)
str(assay(out_DA$d_se))
head(assay(out_DA$d_se), 2)
# Extract cell-level table for export as .fcs file
# note: including group IDs, patient IDs, sample IDs, and cluster labels for
# each cell
# note: table must be a numeric matrix (to save as .fcs file)
d_fcs <- assay(out_DA$d_se)
class(d_fcs)
## ---- eval=FALSE--------------------------------------------------------------
# # Save as .fcs file
# filename_fcs <- "exported_data.fcs"
# write.FCS(
# flowFrame(d_fcs), filename = filename_fcs
# )
#
# # Alternatively, save as tab-delimited .txt file
# filename_txt <- "exported_data.txt"
# write.table(
# d_fcs, file = filename_txt, quote = FALSE, sep = "\t", row.names = FALSE
# )
## -----------------------------------------------------------------------------
# Heatmap for top detected DA clusters
# note: use optional argument 'sample_order' to group samples by condition
sample_order <- c(seq(2, 16, by = 2), seq(1, 16, by = 2))
plotHeatmap(out_DA, analysis_type = "DA", sample_order = sample_order)
## ---- eval=FALSE--------------------------------------------------------------
# # Heatmap for top detected DA clusters (alternative code using 'CATALYST')
#
# suppressPackageStartupMessages(library(CATALYST))
#
# plotDiffHeatmap(out_DA$d_se, out_DA$res)
## -----------------------------------------------------------------------------
# Heatmap for top detected DS cluster-marker combinations
# note: use optional argument 'sample_order' to group samples by condition
sample_order <- c(seq(2, 16, by = 2), seq(1, 16, by = 2))
plotHeatmap(out_DS, analysis_type = "DS", sample_order = sample_order)
## ---- eval=FALSE--------------------------------------------------------------
# # Heatmap for top detected DA clusters (alternative code using 'CATALYST')
#
# suppressPackageStartupMessages(library(CATALYST))
#
# plotDiffHeatmap(out_DS$d_se, out_DS$res)
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.