inst/doc/cr-overview.R

## ---- include = FALSE---------------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>")

## ---- echo = FALSE------------------------------------------------------------
knitr::include_graphics(system.file('help/figures/contig_schematic.png', package = 'CellaRepertorium'))

## ----  echo = FALSE-----------------------------------------------------------
knitr::include_graphics(system.file('help/figures/table_schematic.png', package = 'CellaRepertorium'))

## -----------------------------------------------------------------------------
library(CellaRepertorium)
library(dplyr)
data("contigs_qc")
cdb = ContigCellDB_10XVDJ(contigs_qc, 
                          contig_pk = c('barcode', 'pop', 'sample', 'contig_id'), 
                          cell_pk = c('barcode', 'pop', 'sample'))

## -----------------------------------------------------------------------------
cdb$contig_tbl$cdr_nt_len = nchar(cdb$contig_tbl$cdr3_nt)

## -----------------------------------------------------------------------------
suppressPackageStartupMessages(library(Biostrings))
cdb = cdb %>% mutate_cdb(cdr3_g_content = alphabetFrequency(DNAStringSet(cdr3_nt))[,'G'], tbl = 'contig_tbl')

head(cdb$contig_tbl, n = 4) %>% 
    select(contig_id, cdr3_nt, cdr_nt_len, cdr3_g_content)

## -----------------------------------------------------------------------------
aa80 = cdhit_ccdb(cdb, 'cdr3', type = 'AA', cluster_pk = 'aa80', 
                  identity = .8, min_length = 5)
aa80 = fine_clustering(aa80, sequence_key = 'cdr3', type = 'AA')

## -----------------------------------------------------------------------------
head(aa80$cluster_tbl)
head(aa80$contig_tbl) %>% select(contig_id, aa80, is_medoid, `d(medoid)`)

## -----------------------------------------------------------------------------
library(ggplot2)

paired_chain = enumerate_pairing(cdb, chain_recode_fun = 'guess')

ggplot(paired_chain, aes(x = interaction(sample, pop), fill = pairing)) + 
    geom_bar() + facet_wrap(~canonical, scale = 'free_x') + 
    coord_flip() + theme_minimal()

## -----------------------------------------------------------------------------
aa80 = canonicalize_cluster(aa80, representative = 'cdr3', 
                            contig_fields = c('cdr3', 'cdr3_nt', 'chain', 'v_gene', 'd_gene', 'j_gene'))
aa80$cluster_pk = 'representative'


## -----------------------------------------------------------------------------
aa80 = rank_chain_ccdb(aa80)

## -----------------------------------------------------------------------------
pairing_list = pairing_tables(aa80, table_order = 2, orphan_level = 1, min_expansion = 3, cluster_keys = c('cdr3', 'representative', 'chain', 'v_gene', 'j_gene', 'avg_distance'))


## ----plot_expanded------------------------------------------------------------
pairs_plt = ggplot(pairing_list$cell_tbl, aes(x = cluster_idx.1_fct, y = cluster_idx.2_fct)) + 
  geom_jitter(aes(color = sample, shape = pop), width = .2, height = .2) + 
  theme_minimal() + xlab('TRB') + ylab('TRA') + 
  theme(axis.text.x = element_text(angle = 45))

pairs_plt = map_axis_labels(pairs_plt, pairing_list$idx1_tbl, pairing_list$idx2_tbl, aes_label  = 'chain')
pairs_plt


## -----------------------------------------------------------------------------
sessionInfo()

Try the CellaRepertorium package in your browser

Any scripts or data that you put into this service are public.

CellaRepertorium documentation built on Nov. 8, 2020, 7:48 p.m.