This vignette will demonstrate a general use-case of cyCombine.
knitr::opts_chunk$set( strip.white = T, comment = "" )
Alongside a directory of FCS files, a metadata and a panel file are assumed present. The metadata is in this example assumed to have the following columns:
| Filename | batch | condition | Patient_id | |---------:|:------|-----------|:----------:|
And the panel should contain the columns:
| Channel | Antigen | Type | |---------|---------|------|
First, the FCS files are converted into a work-able tibble.
# Load packages library(cyCombine) library(tidyverse) # Directory with FCS files data_dir <- "~/data" # Extract markers from panel panel_file <- file.path(data_dir, "panel.csv") # Can also be .xlsx metadata_file <- file.path(data_dir, "metadata.csv") # Can also be .xlsx # Extract markers of interest markers <- read.csv(panel_file) %>% filter(Type != "none") %>% pull(Antigen) # Prepare a tibble from directory of FCS files uncorrected <- prepare_data( data_dir = data_dir, metadata = metadata_file, filename_col = "Filename", batch_ids = "batch", condition = "condition", down_sample = FALSE, markers = markers ) # Store result saveRDS(uncorrected, file = file.path(data_dir, "uncorrected.RDS"))
In case you want more control, you can adjust any step of this approach. Feel free to skip this segment, if the method above worked just fine for you.
This example will include all possible input parameters to give an overview of what can be modified.
# Load packages library(cyCombine) library(tidyverse) # Directory with FCS files data_dir <- "~/data" # Extract markers from panel panel <- read_csv(file.path(data_dir, "panel.csv")) # Can also be .xlsx metadata <- read_csv(file.path(data_dir, "metadata.csv")) # Extract markers of interest markers <- panel %>% filter(Type != "none") %>% pull(Antigen) # Read FCS files flowset <- compile_fcs( data_dir = data_dir, pattern = "\\.fcs" # Read all FCS files ) # Convert flowset to tibble df <- convert_flowset( flowset = flowset, metadata = metadata, filename_col = "Filename", sample_ids = "Filename", # By default the filename is used to get sample ids batch_ids = "batch", condition = "condition", down_sample = TRUE, sample_size = 2000000, seed = 101, panel = panel, # Can also be the filename. It is solely used to ensure the channel names match what you expect (i.e. what is in the panel_antigen column) panel_channel = "Channel", panel_antigen = "Antigen" ) # Transform data - This function also de-randomizes the data uncorrected <- transform_asinh( df = df, markers = markers, cofactor = 5, .keep = TRUE # Lets you keep all columns, in case they are useful to you ) # Store result saveRDS(uncorrected, file = file.path(data_dir, "uncorrected.RDS"))
Skip this segment if you are interested in more modularity
# Load packages library(cyCombine) library(tidyverse) # Load data (if not already loaded) # uncorrected <- readRDS("data/uncorrected.RDS") # markers <- get_markers(uncorrected) # Batch correct corrected <- batch_correct( df = uncorrected, covar = "condition", markers = markers, norm_method = "scale", # "rank" is recommended when combining data with heavy batch effects rlen = 10 # Higher values are recommended when using rank normalization and 10 does not appear to perform well ) # Save result saveRDS(corrected, file.path(data_dir, "corrected.RDS"))
# Load packages library(cyCombine) library(tidyverse) # Load data (if not already loaded) # uncorrected <- readRDS("data/uncorrected.RDS") # markers <- get_markers(uncorrected) # Create cell type labels using a SOM grid (if you want to use your own labels, they can be added manually and this step should not be run) labels <- uncorrected %>% normalize(markers = markers, norm_method = "rank", # "scale" is recommended in cases with light batch effects (e.g. when combining similar data) ties.method = "average") %>% # Can also be minimum create_som(markers = markers, rlen = 10, # If results are not convincing, consider using a higher value (e.g. 100) seed = 101, xdim = 8, ydim = 8) # Batch correct corrected <- correct_data( df = uncorrected, label = labels # Add custom labels here, if desired covar = "condition", markers = markers, parametric = TRUE ) # Save result saveRDS(corrected, file.path(data_dir, "corrected.RDS"))
The EMD reduction is implemented as the performance metric; EMDs were computed for both the uncorrected and corrected data, removing those values where both had an EMD < 2.
$$Reduction = \frac{\sum{EMD_{before}} - \sum{EMD_{after}}}{\sum{EMD_{before}}}$$
# Load packages library(cyCombine) library(tidyverse) # Load data (if not already loaded) # data_dir <- "~/data" # uncorrected <- readRDS(file.path(data_dir, "uncorrected.RDS")) # corrected <- readRDS(file.path(data_dir, "corrected.RDS")) # markers <- get_markers(uncorrected) # Re-run clustering on corrected data labels <- corrected %>% create_som(markers = markers, rlen = 10) uncorrected$label <- corrected$label <- labels # Evaluate EMD emd <- evaluate_emd(uncorrected, corrected, cell_col = "label") # Reduction emd$reduction # Violin plot emd$violin # Scatter plot emd$scatter
This segment will demonstrate the built-in functions for generating UMAPs and density plots.
# Load packages library(cyCombine) library(tidyverse) # Load data (if not already loaded) # data_dir <- "~/data" # uncorrected <- readRDS(file.path(data_dir, "uncorrected.RDS")) # corrected <- readRDS(file.path(data_dir, "corrected.RDS")) # markers <- get_markers(uncorrected) # Create UMAPs sam <- sample(1:nrow(uncorrected), 30000) plot1 <- plot_dimred(uncorrected[sam, ], "Uncorrected", type = "umap", plot = "batch", markers = markers) plot2 <- plot_dimred(corrected[sam, ], "Corrected", type = "umap", plot = "batch", markers = markers) plot_save_two(plot1, plot2, "figs/umap.png") # Density plots plot_density(uncorrected, corrected, markers = markers, filename = "figs/density.png", y = "batch", ncol = 6, xlim = 10)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.