AIRR-seq)

# Download original data from 10X website
# PBMCs from C57BL/6 mice - TCR enrichment from amplified cDNA
# V3 chemistry
# https://support.10xgenomics.com/single-cell-vdj/datasets/3.0.0/vdj_v1_mm_c57bl6_pbmc_t
download.file('http://cf.10xgenomics.com/samples/cell-vdj/3.0.0/vdj_v1_mm_c57bl6_pbmc_t/vdj_v1_mm_c57bl6_pbmc_t_all_contig_annotations.csv',
              destfile = 'inst/script/vdj_v1_mm_c57bl6_pbmc_t_all_contig_annotations.csv')
download.file('http://cf.10xgenomics.com/samples/cell-vdj/3.0.0/vdj_v1_mm_c57bl6_pbmc_t/vdj_v1_mm_c57bl6_pbmc_t_all_contig_annotations.json',
              destfile = 'inst/script/vdj_v1_mm_c57bl6_pbmc_t_all_contig_annotations.json')
# PBMCs from BALB/c mice - TCR enrichment from amplified cDNA
# https://support.10xgenomics.com/single-cell-vdj/datasets/3.0.0/vdj_v1_mm_balbc_pbmc_t
download.file('http://cf.10xgenomics.com/samples/cell-vdj/3.0.0/vdj_v1_mm_balbc_pbmc_t/vdj_v1_mm_balbc_pbmc_t_all_contig_annotations.csv',
              destfile = 'inst/script/vdj_v1_mm_balbc_pbmc_t_all_contig_annotations.csv')
download.file('http://cf.10xgenomics.com/samples/cell-vdj/3.0.0/vdj_v1_mm_balbc_pbmc_t/vdj_v1_mm_balbc_pbmc_t_all_contig_annotations.json',
              destfile = 'inst/script/vdj_v1_mm_balbc_pbmc_t_all_contig_annotations.json')

# Subsample / split
library(readr)
library(jsonlite)
library(dplyr)
library(purrr)
library(tidyr)

csvs = list.files('inst/script', pattern = '.+?t_all_contig.+?\\.csv$', full.names = TRUE)
jsns = list.files('inst/script', pattern = '.+?t_all_contig.+?\\.json$', full.names = TRUE)

# Pull out sample and population names
contig_map = tibble(file = csvs, strain = factor(c('balbc', 'b6'), levels = c('balbc', 'b6')))
jsn_map = tibble(file = jsns, strain = factor(c('balbc', 'b6'), levels = c('balbc', 'b6')))

# read in CSV
contig_map = contig_map %>% rowwise() %>% mutate(contigs = list(read_csv(file)))

# Split into (potentially overlapping) subsets of 200 barcodes
set.seed(1234)
subsample_split = function(contigs, ncells = 200, nsplit = 3){
    cells = contigs %>% group_by(barcode) %>% summarize()
    contigs_splt = map(seq_len(nsplit), function(i){
          cell_samp = cells[sample(nrow(cells), ncells),]
          contigs %>% semi_join(cell_samp, by = 'barcode') %>% ungroup()
    })
}

contig_map = contig_map %>% rowwise() %>% mutate(subsample_list = list(subsample_split(contigs))) %>% unnest(subsample_list) %>% mutate(sample_idx = seq_along(.$strain)) %>% ungroup()

contig_map %>% rowwise() %>% do({
    fname = sprintf('inst/extdata/all_contig_annotations_%s_%d.csv', .$strain, .$sample_idx)
    write_csv(.$subsample_list, path = fname)
    system2('xz', fname)
    tibble()
})

# Derive json
jsn_map = jsn_map %>% rowwise() %>% mutate(json = list(fromJSON(file(file), flatten = FALSE) %>% as_tibble))

# barcodes present in subsamples
ss_bc = contig_map %>% unnest(subsample_list) %>% select(strain, barcode, sample_idx) %>% split(., .$strain)


jsn_ss = map2_dfr(jsn_map$json, ss_bc, function(.x, .y){
    j = right_join(.x, .y, by = 'barcode')
    nest(j, -strain, -sample_idx, .key = 'jsn')
})

jsn_ss %>% rowwise() %>% do({
    code = toJSON(.$jsn)
    fname = sprintf('inst/extdata/all_contig_annotations_%s_%d.json', .$strain, .$sample_idx)
    write_lines(code, path = fname)
    system2('xz', fname)
    tibble()
})

knitr::purl('vignettes/mouse_tcell_qc.Rmd', output = 'inst/script/mouse_tcells_qc.R')
source('inst/script/mouse_tcells_qc.R')
ccdb_ex = ContigCellDB_10XVDJ(contigs_qc, contig_pk = c('pop',   'sample', 'barcode', 'contig_id'), cell_pk = c('pop',   'sample', 'barcode'))
use_data(contigs_qc, overwrite = TRUE)
use_data(ccdb_ex, overwrite = TRUE)
unlink('inst/script/mouse_tcells_qc.R')

Any scripts or data that you put into this service are public.

CellaRepertorium documentation built on Nov. 8, 2020, 7:48 p.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

CellaRepertorium
Data structures, clustering and testing for single cell immune receptor repertoires (scRNAseq RepSeq/AIRR-seq)

inst/script/10XMouseTCR_v3_chem.R
In CellaRepertorium: Data structures, clustering and testing for single cell immune receptor repertoires (scRNAseq RepSeq/AIRR-seq)

Try the CellaRepertorium package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

CellaRepertorium Data structures, clustering and testing for single cell immune receptor repertoires (scRNAseq RepSeq/AIRR-seq)

inst/script/10XMouseTCR_v3_chem.R In CellaRepertorium: Data structures, clustering and testing for single cell immune receptor repertoires (scRNAseq RepSeq/AIRR-seq)

Try the CellaRepertorium package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

CellaRepertorium
Data structures, clustering and testing for single cell immune receptor repertoires (scRNAseq RepSeq/AIRR-seq)

inst/script/10XMouseTCR_v3_chem.R
In CellaRepertorium: Data structures, clustering and testing for single cell immune receptor repertoires (scRNAseq RepSeq/AIRR-seq)