knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>",
  fig.path = "man/figures/README-",
  out.width = "100%"
)
library(tapestri.tools)
library(viridis)
library(umap)

tapestri.tools

A toolbox for the analysis of MissionBio's tapestri single cell multi-omics data.

Installation

You can install the development version of tapestri.tools from GitHub with:

# install.packages("devtools")
devtools::install_github("northNomad/tapestri.tools")

Read all the DNA variants

#Load tapestri multi-omics data stored in .h5 format into session
h5f <- tapestri.tools::read_h5(path = "data_private/mpdxD-18_MB_33_38.dna+protein.h5")

dt_variants <- get_variants(h5f = h5f, format = "data.table")
head(dt_variants)

tapestri.tools data wrangling

In the example experiment, we transplanted a mix of two primary AML samples into one immunodeficient mice.

12 weeks after transplantation, we harvested the bone marrow cells and performed single cell DNA + protein sequencing using MissionBio's tapestri platform. One of the AML sample carries IDH1^R132H^ mutation, the other carries IDH2^R140Q^; we know these two IDH variants are mutually exclusive.

#based on hg19
int_variants <- c(IDH1_R132H = "chr2:209113112:C/T", IDH2_R140Q = "chr15:90631934:C/T")

tapestri.tools::plot_upset_with_variants(h5f, variants = int_variants, unknown = "unknown_to_ref")

Filter or index the cells based on the genotypes of specific variants:

#which cells carry IDH1 mutation? 
index_cells_IDH1 <- get_cells_index(h5f, variants = int_variants[1], int_ngt = c(1, 2)) 

#which cells carry IDH2 mutation?
index_cells_IDH2 <- get_cells_index(h5f, variants = int_variants[2], int_ngt = c(1, 2))

#which cells are doublets 
x <- get_cells_index(h5f, variants = int_variants, int_ngt = c(1, 2)) 
index_doublets <- intersect(x$IDH1_R132H, x$IDH2_R140Q)

####
index_cells_IDH1 <- index_cells_IDH1[!(index_cells_IDH1 %in% index_doublets)]
index_cells_IDH2 <- index_cells_IDH2[!(index_cells_IDH2 %in% index_doublets)]

length(index_cells_IDH1) #2919
length(index_cells_IDH2) #839

Read in genotype matrix of FLT3ITD variants of IDH1 mutated cells

FLT3ITD <- get_flt3itd(h5f, format = "data.table")
FLT3ITD
id_FLT3ITD <- FLT3ITD$id
names(id_FLT3ITD) <- paste0("FLT3ITD", 1:length(id_FLT3ITD))

read_assays_variants(h5f, 
                     included_assays = c("AF", "NGT"),
                     index_variants = get_variants_index(h5f, id_FLT3ITD),
                     index_cells = index_cells_IDH1,  
                     format = "list") -> x

names(x) #AF, NGT
dim(x$AF)

Protein UMAPs

get_protein_ids(h5f)
dt_protein <- read_protein_counts(h5f, normalization = "clr", transpose = TRUE) %>% as.data.frame() 

umap_protein <- umap::umap(dt_protein)
umap_protein <- cbind(umap_protein$data, as.data.frame(umap_protein$layout))

#
ls_umap <- list()
for(i in get_protein_ids(h5f)){
  ggplot(umap_protein, aes_string("V1", "V2", color = i)) + 
    geom_point() + 
    theme_minimal() + 
    labs(x = "UMAP1", y = "UMAP2") +
    scale_color_viridis_c() -> p

  ls_umap[[i]] <- p
}
#
library(patchwork)
ls_umap[["CD33"]] + ls_umap[["CD14"]]

....More plotting and filtering functions to come...



northNomad/tapestri.tools documentation built on May 31, 2024, 4:44 p.m.