This vignette will demonstrate a general use-case of cyCombine.

knitr::opts_chunk$set(
  strip.white = T, comment = ""
)

Alongside a directory of FCS files, a metadata and a panel file are assumed present. The metadata is in this example assumed to have the following columns:

| Filename | batch | condition | Patient_id | |---------:|:------|-----------|:----------:|


And the panel should contain the columns:

| Channel | Antigen | Type | |---------|---------|------|



Prepare data

First, the FCS files are converted into a work-able tibble.

The recommended workflow

# Load packages
library(cyCombine)
library(tidyverse)

# Directory with FCS files
data_dir <- "~/data"

# Extract markers from panel
panel_file <- file.path(data_dir, "panel.csv") # Can also be .xlsx
metadata_file <- file.path(data_dir, "metadata.csv") # Can also be .xlsx

# Extract markers of interest
markers <- read.csv(panel_file) %>% 
  filter(Type != "none") %>% 
  pull(Antigen)

# Prepare a tibble from directory of FCS files
uncorrected <- prepare_data(
  data_dir = data_dir,
  metadata = metadata_file, 
  filename_col = "Filename",
  batch_ids = "batch",
  condition = "condition",
  down_sample = FALSE,
  markers = markers
)
# Store result
saveRDS(uncorrected, file = file.path(data_dir, "uncorrected.RDS"))


Modular workflow

In case you want more control, you can adjust any step of this approach. Feel free to skip this segment, if the method above worked just fine for you.

This example will include all possible input parameters to give an overview of what can be modified.

# Load packages
library(cyCombine)
library(tidyverse)

# Directory with FCS files
data_dir <- "~/data"

# Extract markers from panel
panel <- read_csv(file.path(data_dir, "panel.csv")) # Can also be .xlsx
metadata <- read_csv(file.path(data_dir, "metadata.csv"))

# Extract markers of interest
markers <- panel %>% 
  filter(Type != "none") %>% 
  pull(Antigen)

# Read FCS files
flowset <- compile_fcs(
  data_dir = data_dir,
  pattern = "\\.fcs" # Read all FCS files
)

# Convert flowset to tibble
df <- convert_flowset(
  flowset = flowset,
  metadata = metadata,
  filename_col = "Filename",
  sample_ids = "Filename", # By default the filename is used to get sample ids
  batch_ids = "batch",
  condition = "condition",
  down_sample = TRUE,
  sample_size = 2000000,
  seed = 101,
  panel = panel, # Can also be the filename. It is solely used to ensure the channel names match what you expect (i.e. what is in the panel_antigen column)
  panel_channel = "Channel",
  panel_antigen = "Antigen"
)

# Transform data - This function also de-randomizes the data
uncorrected <- transform_asinh(
  df = df,
  markers = markers,
  cofactor = 5,
  .keep = TRUE # Lets you keep all columns, in case they are useful to you
)
# Store result
saveRDS(uncorrected, file = file.path(data_dir, "uncorrected.RDS"))



Batch correct

The recommended workflow

Skip this segment if you are interested in more modularity

# Load packages
library(cyCombine)
library(tidyverse)

# Load data (if not already loaded)
# uncorrected <- readRDS("data/uncorrected.RDS")
# markers <- get_markers(uncorrected)

# Batch correct
corrected <- batch_correct(
  df = uncorrected,
  covar = "condition",
  markers = markers,
  norm_method = "scale", # "rank" is recommended when combining data with heavy batch effects
  rlen = 10 # Higher values are recommended when using rank normalization and 10 does not appear to perform well
)

# Save result
saveRDS(corrected, file.path(data_dir, "corrected.RDS"))


Modular method

# Load packages
library(cyCombine)
library(tidyverse)

# Load data (if not already loaded)
# uncorrected <- readRDS("data/uncorrected.RDS")
# markers <- get_markers(uncorrected)

# Create cell type labels using a SOM grid (if you want to use your own labels, they can be added manually and this step should not be run)
labels <- uncorrected %>% 
  normalize(markers = markers,
            norm_method = "rank", # "scale" is recommended in cases with light batch effects (e.g. when combining similar data)
            ties.method = "average") %>% # Can also be minimum
  create_som(markers = markers,
             rlen = 10, # If results are not convincing, consider using a higher value (e.g. 100)
             seed = 101,
             xdim = 8,
             ydim = 8)

# Batch correct
corrected <- correct_data(
  df = uncorrected,
  label = labels # Add custom labels here, if desired
  covar = "condition",
  markers = markers,
  parametric = TRUE
  )

# Save result
saveRDS(corrected, file.path(data_dir, "corrected.RDS"))


Evaluate performance using Earth Mover's Distance

The EMD reduction is implemented as the performance metric; EMDs were computed for both the uncorrected and corrected data, removing those values where both had an EMD < 2.

$$Reduction = \frac{\sum{EMD_{before}} - \sum{EMD_{after}}}{\sum{EMD_{before}}}$$

# Load packages
library(cyCombine)
library(tidyverse)

# Load data (if not already loaded)
# data_dir <- "~/data"
# uncorrected <- readRDS(file.path(data_dir, "uncorrected.RDS"))
# corrected <- readRDS(file.path(data_dir, "corrected.RDS"))
# markers <- get_markers(uncorrected)

# Re-run clustering on corrected data
labels <- corrected %>% 
  create_som(markers = markers,
             rlen = 10)
uncorrected$label <- corrected$label <- labels

# Evaluate EMD
emd <- evaluate_emd(uncorrected, corrected, cell_col = "label")

# Reduction
emd$reduction

# Violin plot
emd$violin

# Scatter plot
emd$scatter


Create UMAPs and density plots

This segment will demonstrate the built-in functions for generating UMAPs and density plots.

# Load packages
library(cyCombine)
library(tidyverse)

# Load data (if not already loaded)
# data_dir <- "~/data"
# uncorrected <- readRDS(file.path(data_dir, "uncorrected.RDS"))
# corrected <- readRDS(file.path(data_dir, "corrected.RDS"))
# markers <- get_markers(uncorrected)

# Create UMAPs
sam <- sample(1:nrow(uncorrected), 30000)
plot1 <- plot_dimred(uncorrected[sam, ], "Uncorrected", type = "umap", plot = "batch", markers = markers)
plot2 <- plot_dimred(corrected[sam, ], "Corrected", type = "umap", plot = "batch", markers = markers)
plot_save_two(plot1, plot2, "figs/umap.png")

# Density plots
plot_density(uncorrected,
             corrected,
             markers = markers,
             filename = "figs/density.png",
             y = "batch",
             ncol = 6,
             xlim = 10)


biosurf/cyCombine documentation built on May 23, 2024, 4:07 a.m.