In MissionBio/tapestriR: Analysis of Data Generated by the Tapestri Platform

library(TapestriR)

# Packages needed for visualization 
library(tidyverse)

Load data from various sources {.tabset}

Enable loading from various files.

H5 is the future format for multi-omics and multi-samples, supporting DNA and DNA + Protein analysis.
Loom supports only DNA mutation analysis.
Insights export only supports DNA analysis.

Load from Loom

Works only for DNA and single samples since loom files only contain 1 sample.

filename <- system.file("extdata", "PE11.cells.loom", package = "TapestriR")

variants = read_loom(filename,min_mutation_rate = 0.05)

filtered_variants = filter_variants(variants,filetype="loom")
vaf=round(filtered_variants@data_layers$AD/filtered_variants@data_layers$DP, 3)
vaf[is.na(vaf)] <- 0
filtered_variants = add_data_layer(filtered_variants,'VAF',vaf)

filtered_variants

Load multi-omics H5

Works for DNA and DNA + Protein data. Also works for multi-sample. Best practice is to create a multi-assay, multi-sample h5 in Pipeline, and apply filters before loading into R.

Usage: Load multi-sample multi-omic h5 file Filter and normalize assays

filename <- system.file("extdata", "4_cell_line_mix_dna_protein.h5", package = "TapestriR")

experiment = read_tap(filename)

# Ideally, we would start by loading a filtered H5, but for now, we will load data with some basic filters.
filtered_variants = filter_variants(experiment$assays$dna_variants)

# Add the filtered data back to the experiment. This will be a subset of the rest of the assays to ensure that we have the same cells.
experiment = add_assay(moo = experiment,assay = filtered_variants, keep_common_cells = TRUE)

# Normalize using the clr method.
protein_counts_norm = experiment$assays$protein_read_counts$data_layers$read_counts %>% clr_by_feature() %>% as_tibble(rownames = NA)
# Add normalized data to the protein assay.
experiment$assays$protein_read_counts = add_data_layer(experiment$assays$protein_read_counts,'normalized',protein_counts_norm)


normalized_dna_reads = normalize_dna_reads(experiment$assays$dna_read_counts$data_layers$read_counts)
experiment$assays$dna_read_counts = add_data_layer(experiment$assays$dna_read_counts,'normalized',normalized_dna_reads)


experiment

Create your own multiomics object

Build a multiomics object from individual components instead of using the read_tap function.

Load variant assay and filter.
Load Protein assay and normalize.
Load DNA read counts.
Create a multiomics object and merge all 3 assays. This will ensure each assay has the same cells.

ASSAY_NAME_VARIANT = 'dna_variants'
ASSAY_NAME_PROTEIN = 'protein_read_counts'

filename <- system.file("extdata", "4_cell_line_mix_dna_protein.h5", package = "TapestriR")

# Ideally, we would start by loading a filtered H5, but for now, we will send a filtering list to the function to mimic it.
variants = read_assay_h5(filename = filename, assay_name = ASSAY_NAME_VARIANT, min_mutation_rate = 0.005)

filtered_variants = filter_variants(variants)

# Load the protein assay and normalize.
protein = read_assay_h5(filename = filename, assay_name = ASSAY_NAME_PROTEIN)
# Normalize using the clr method.
protein_counts_norm = protein@data_layers$read_counts %>% clr_by_feature() %>% as_tibble(rownames = NA)
# Add normalized data to the protein assay.
protein = add_data_layer(protein,'normalized',protein_counts_norm)

# Create a multiomics object (moo).
experiment = create_moo(experiment_name = basename(filename), cell_annotations = filtered_variants@cell_annotations)
experiment = add_assay(moo = experiment,assay = filtered_variants)
experiment = add_assay(experiment,protein, keep_common_cells = TRUE)

experiment

X-Y plots

1) Select the proteins to plot on the X and Y axes. 2) Select a set of other feature(s) to color the plot by. If you choose more than one feature, each feature will be plotted in a subplot.

##################
# Select the proteins to plot on X and Y.
##################

# protein_x = 'CD34'
# protein_y = 'CD38'

protein_x = 1
protein_y = 2

##################
# Select 1 or more features to color by.
# color_by should be a vector of the column header you want to color by.
##################

# All proteins
color_by = experiment$assays$protein_read_counts$data_layers$normalized

# Select a few proteins.
# color_by =  experiment$assays$protein_read_counts$data_layers$normalized %>% select('CD110','CD117')

# Select a few variants.
# color_by =  experiment$assays$dna_variants$data_layers$NGT %>% select(1:10) %>% mutate_all(as_factor) %>% mutate_all(recode_genotypes)


p  = tapestri_scatterplot(x = experiment$assays$protein_read_counts$data_layers$normalized[[protein_x]], 
                 y= experiment$assays$protein_read_counts$data_layers$normalized[[protein_y]], 
                 color_by = color_by)+ scale_colour_gradient2(low="yellow", mid='grey', high="darkred") 
p = p + xlab(protein_x) + ylab(protein_y) + ggtitle('Color by Protein Expression')
p

# Select a few variants.
color_by =  experiment$assays$dna_variants$data_layers$NGT %>% select(1:10) %>% mutate_all(as_factor) %>% mutate_all(recode_genotypes)


p  = tapestri_scatterplot(x = experiment$assays$protein_read_counts$data_layers$normalized[[protein_x]], 
                 y= experiment$assays$protein_read_counts$data_layers$normalized[[protein_y]], 
                 color_by = color_by)
p = p + xlab(protein_x) + ylab(protein_y) + ggtitle('Color by Genotypes')
p

Load the data from Tapestri Insights export

Works for DNA only. Export can be from a multi-sample project in Tapestri Insights.

Usage:

Extract the exported zip file.
Read directory to create a new Tapestri_Assay object.

export_dir <- system.file("extdata", "insights_2.2_export", package = "TapestriR")

# Show files
dir(export_dir)
variants = read_insights_export(export_dir = export_dir)

variants