Example workflow for RTraCe

Startup

Load libraries

library(ggplot2)
library(biomaRt)
library(GGally)

Load cell metadata

metadata_fastq = read.table("../data/stubbington_2016/E-MTAB-3857.sdrf.txt", sep = "\t", header = T)
metadata_cells = metadata_fastq[!duplicated(metadata_fastq$Comment.ENA_RUN.),
                                c("Comment.ENA_RUN.", "Source.Name", "Factor.Value.infect.", "Factor.Value.time.",
                                  "Characteristics.cell.type.")]
rownames(metadata_cells) = metadata_cells$Comment.ENA_RUN.
metadata_cells$Factor.Value.infect. = as.character(metadata_cells$Factor.Value.infect.)
metadata_cells$Factor.Value.infect.[metadata_cells$Factor.Value.infect.=="Salmonella typhimurium"] = "S. typh"

Load gene info

# Mouse
bm.q_mouse <- read.table("../data/stubbington_2016/GRCm38.p5_gene_info.txt", header = T, sep = "\t")
colnames(bm.q_mouse) = c("ensembl_gene_id", "external_gene_name", "chromosome_name", "strand", "gene_biotype")
rownames(bm.q_mouse) = bm.q_mouse$ensembl_gene_id

Load data

count_data = read.table("../data/stubbington_2016/stubbington_2016_genes_salmon_counts.txt", 
                        header = T, row.names = 1)
count_data = count_data[grepl("ENS", rownames(count_data)), ]
count_data = count_data[rowSums(count_data)>0,rownames(metadata_cells)]

Cell QC

multiqc_tab = read.table("../data/stubbington_2016/multiqc_salmon.txt", 
                         header = T, row.names = 1, sep = "\t")

qc_df = data.frame(row.names = colnames(count_data),
                   "n_genes" = colSums(count_data>0),
                   "perc_mt" = colSums(count_data[rownames(count_data) %in% bm.q_mouse[bm.q_mouse$chromosome_name=="MT",
                                          1],])/colSums(count_data)*100,
                   "log10_reads" = log10(colSums(count_data)),
                   "perc_map" = multiqc_tab[colnames(count_data),"percent_mapped"])

x = cbind(qc_df, metadata_cells)[,c(1:4, 7:8)]
x$Factor.Value.time. = as.factor(x$Factor.Value.time.)
ggpairs(x, mapping = aes_string(colour = "Factor.Value.time.", shape = "Factor.Value.infect."))+
  theme_classic()+
  theme(axis.text = element_text(size = 7.2),
          axis.text.x = element_text(angle = 20))

Filter data

use_cells = rownames(qc_df)[qc_df$n_genes>4000 & 
                              qc_df$perc_mt<=10 &
                              qc_df$log10_reads>=5.5&
                              qc_df$perc_map>=40]

metadata_cells_filt = metadata_cells[use_cells,]
count_data_filt = count_data[,use_cells]
count_data_filt = count_data_filt[rowSums(count_data_filt>0)>2,]

Testing RTraCe package

Load package

library(RTraCe)

Load TraCeR results

tracer_simple = readTracer(summaryPath = "../data/stubbington_2016/filtered_TCRABDG_summary/", get_vdj_segments = T)
tracer_simple_AB = readTracer(summaryPath = "../data/stubbington_2016/filtered_TCRABDG_summary/", 
                clonotypes = list(c("A"), c("B")),
                nameVar = "clAB", get_vdj_segments = T)

Run PCA on data

pca = pcaMethods::pca(apply(log2(count_data_filt/colSums(count_data_filt)*1000000+1), 
                            1, scale), method = "svd",
                          nPcs = 3, scale = "none", center = F)
metadata_cells_filt = cbind(metadata_cells_filt, pca@scores)

Plot clonotypes in data projection

proj_plot = plotProjection(tracer_data = tracer_simple, pheno_data = metadata_cells_filt, 
               dimensions = c("PC1", "PC2"), additional_pheno = "Factor.Value.infect.", plot_out = T)
proj_plot = plotProjection(tracer_data = tracer_simple_AB, pheno_data = metadata_cells_filt, 
               dimensions = c("PC1", "PC2"), clonotypes = "clAB", additional_pheno = "Factor.Value.infect.", plot_out = T)

Barplots of cells with clonotypes per condition(s)

proj_plot = plotCounts(tracer_data = tracer_simple_AB, clonotypes = "clAB", pheno_data = metadata_cells_filt, 
                       category = "Factor.Value.infect.", plot_out = T)
proj_plot = plotCounts(tracer_data = tracer_simple_AB, clonotypes = "clAB", pheno_data = metadata_cells_filt, 
                       category = c("Factor.Value.infect.", "Factor.Value.time."), plot_out = T)

Shared clonotypes

proj_plot = plotSharedClones(tracer_data = tracer_simple_AB, 
                             clonotypes = "tcr_info", 
                             pheno_data = metadata_cells_filt, 
                             category = "Factor.Value.time.", plot_out = T)

Matching chains

proj_plot = plotMatchingChains(tracer_data = tracer_simple_AB, 
                               pheno_data = metadata_cells_filt, 
                               categories = "Factor.Value.time.", plot_out = T)

Clonotype sizes

x = plotClonotypeSize(tracer_data = tracer_simple_AB, 
                               pheno_data = metadata_cells_filt, 
                               category = "Factor.Value.time.", plot_out = T)
y = plotClonotypeSize(tracer_data = tracer_simple_AB, 
                               pheno_data = metadata_cells_filt, 
                               category = NULL, plot_out = T)

VDJ combinations

plotVDJmatrix(tracer_simple_AB, pheno_data = metadata_cells_filt, category = "Factor.Value.time.", chain = "B")

VDJ frequency

plotVDJfrequency(tracer_simple_AB, pheno_data = metadata_cells_filt, category = "Factor.Value.time.", chain = "A")


tomasgomes/RTraCe documentation built on May 29, 2019, 9:55 a.m.