
# Packages needed for visualization 

Load data from various sources {.tabset}

Enable loading from various files.

Load from Loom

Works only for DNA and single samples since loom files only contain 1 sample.

filename <- system.file("extdata", "PE11.cells.loom", package = "TapestriR")

variants = read_loom(filename,min_mutation_rate = 0.05)

filtered_variants = filter_variants(variants,filetype="loom")
vaf=round(filtered_variants@data_layers$AD/filtered_variants@data_layers$DP, 3)
vaf[] <- 0
filtered_variants = add_data_layer(filtered_variants,'VAF',vaf)


Load multi-omics H5

Works for DNA and DNA + Protein data. Also works for multi-sample. Best practice is to create a multi-assay, multi-sample h5 in Pipeline, and apply filters before loading into R.

Usage: Load multi-sample multi-omic h5 file Filter and normalize assays

filename <- system.file("extdata", "4_cell_line_mix_dna_protein.h5", package = "TapestriR")

experiment = read_tap(filename)

# Ideally, we would start by loading a filtered H5, but for now, we will load data with some basic filters.
filtered_variants = filter_variants(experiment$assays$dna_variants)

# Add the filtered data back to the experiment. This will be a subset of the rest of the assays to ensure that we have the same cells.
experiment = add_assay(moo = experiment,assay = filtered_variants, keep_common_cells = TRUE)

# Normalize using the clr method.
protein_counts_norm = experiment$assays$protein_read_counts$data_layers$read_counts %>% clr_by_feature() %>% as_tibble(rownames = NA)
# Add normalized data to the protein assay.
experiment$assays$protein_read_counts = add_data_layer(experiment$assays$protein_read_counts,'normalized',protein_counts_norm)

normalized_dna_reads = normalize_dna_reads(experiment$assays$dna_read_counts$data_layers$read_counts)
experiment$assays$dna_read_counts = add_data_layer(experiment$assays$dna_read_counts,'normalized',normalized_dna_reads)


Create your own multiomics object

Build a multiomics object from individual components instead of using the read_tap function.

ASSAY_NAME_VARIANT = 'dna_variants'
ASSAY_NAME_PROTEIN = 'protein_read_counts'

filename <- system.file("extdata", "4_cell_line_mix_dna_protein.h5", package = "TapestriR")

# Ideally, we would start by loading a filtered H5, but for now, we will send a filtering list to the function to mimic it.
variants = read_assay_h5(filename = filename, assay_name = ASSAY_NAME_VARIANT, min_mutation_rate = 0.005)

filtered_variants = filter_variants(variants)

# Load the protein assay and normalize.
protein = read_assay_h5(filename = filename, assay_name = ASSAY_NAME_PROTEIN)
# Normalize using the clr method.
protein_counts_norm = protein@data_layers$read_counts %>% clr_by_feature() %>% as_tibble(rownames = NA)
# Add normalized data to the protein assay.
protein = add_data_layer(protein,'normalized',protein_counts_norm)

# Create a multiomics object (moo).
experiment = create_moo(experiment_name = basename(filename), cell_annotations = filtered_variants@cell_annotations)
experiment = add_assay(moo = experiment,assay = filtered_variants)
experiment = add_assay(experiment,protein, keep_common_cells = TRUE)


X-Y plots

1) Select the proteins to plot on the X and Y axes. 2) Select a set of other feature(s) to color the plot by. If you choose more than one feature, each feature will be plotted in a subplot.

# Select the proteins to plot on X and Y.

# protein_x = 'CD34'
# protein_y = 'CD38'

protein_x = 1
protein_y = 2

# Select 1 or more features to color by.
# color_by should be a vector of the column header you want to color by.

# All proteins
color_by = experiment$assays$protein_read_counts$data_layers$normalized

# Select a few proteins.
# color_by =  experiment$assays$protein_read_counts$data_layers$normalized %>% select('CD110','CD117')

# Select a few variants.
# color_by =  experiment$assays$dna_variants$data_layers$NGT %>% select(1:10) %>% mutate_all(as_factor) %>% mutate_all(recode_genotypes)

p  = tapestri_scatterplot(x = experiment$assays$protein_read_counts$data_layers$normalized[[protein_x]], 
                 y= experiment$assays$protein_read_counts$data_layers$normalized[[protein_y]], 
                 color_by = color_by)+ scale_colour_gradient2(low="yellow", mid='grey', high="darkred") 
p = p + xlab(protein_x) + ylab(protein_y) + ggtitle('Color by Protein Expression')

# Select a few variants.
color_by =  experiment$assays$dna_variants$data_layers$NGT %>% select(1:10) %>% mutate_all(as_factor) %>% mutate_all(recode_genotypes)

p  = tapestri_scatterplot(x = experiment$assays$protein_read_counts$data_layers$normalized[[protein_x]], 
                 y= experiment$assays$protein_read_counts$data_layers$normalized[[protein_y]], 
                 color_by = color_by)
p = p + xlab(protein_x) + ylab(protein_y) + ggtitle('Color by Genotypes')

Load the data from Tapestri Insights export

Works for DNA only. Export can be from a multi-sample project in Tapestri Insights.


export_dir <- system.file("extdata", "insights_2.2_export", package = "TapestriR")

# Show files
variants = read_insights_export(export_dir = export_dir)


MissionBio/tapestriR documentation built on Feb. 25, 2021, 8:29 p.m.