Load libraries
library(ggplot2) library(biomaRt) library(GGally)
Load cell metadata
metadata_fastq = read.table("../data/stubbington_2016/E-MTAB-3857.sdrf.txt", sep = "\t", header = T) metadata_cells = metadata_fastq[!duplicated(metadata_fastq$Comment.ENA_RUN.), c("Comment.ENA_RUN.", "Source.Name", "Factor.Value.infect.", "Factor.Value.time.", "Characteristics.cell.type.")] rownames(metadata_cells) = metadata_cells$Comment.ENA_RUN. metadata_cells$Factor.Value.infect. = as.character(metadata_cells$Factor.Value.infect.) metadata_cells$Factor.Value.infect.[metadata_cells$Factor.Value.infect.=="Salmonella typhimurium"] = "S. typh"
Load gene info
# Mouse bm.q_mouse <- read.table("../data/stubbington_2016/GRCm38.p5_gene_info.txt", header = T, sep = "\t") colnames(bm.q_mouse) = c("ensembl_gene_id", "external_gene_name", "chromosome_name", "strand", "gene_biotype") rownames(bm.q_mouse) = bm.q_mouse$ensembl_gene_id
Load data
count_data = read.table("../data/stubbington_2016/stubbington_2016_genes_salmon_counts.txt", header = T, row.names = 1) count_data = count_data[grepl("ENS", rownames(count_data)), ] count_data = count_data[rowSums(count_data)>0,rownames(metadata_cells)]
Cell QC
multiqc_tab = read.table("../data/stubbington_2016/multiqc_salmon.txt", header = T, row.names = 1, sep = "\t") qc_df = data.frame(row.names = colnames(count_data), "n_genes" = colSums(count_data>0), "perc_mt" = colSums(count_data[rownames(count_data) %in% bm.q_mouse[bm.q_mouse$chromosome_name=="MT", 1],])/colSums(count_data)*100, "log10_reads" = log10(colSums(count_data)), "perc_map" = multiqc_tab[colnames(count_data),"percent_mapped"]) x = cbind(qc_df, metadata_cells)[,c(1:4, 7:8)] x$Factor.Value.time. = as.factor(x$Factor.Value.time.) ggpairs(x, mapping = aes_string(colour = "Factor.Value.time.", shape = "Factor.Value.infect."))+ theme_classic()+ theme(axis.text = element_text(size = 7.2), axis.text.x = element_text(angle = 20))
Filter data
use_cells = rownames(qc_df)[qc_df$n_genes>4000 & qc_df$perc_mt<=10 & qc_df$log10_reads>=5.5& qc_df$perc_map>=40] metadata_cells_filt = metadata_cells[use_cells,] count_data_filt = count_data[,use_cells] count_data_filt = count_data_filt[rowSums(count_data_filt>0)>2,]
Load package
library(RTraCe)
Load TraCeR results
tracer_simple = readTracer(summaryPath = "../data/stubbington_2016/filtered_TCRABDG_summary/", get_vdj_segments = T) tracer_simple_AB = readTracer(summaryPath = "../data/stubbington_2016/filtered_TCRABDG_summary/", clonotypes = list(c("A"), c("B")), nameVar = "clAB", get_vdj_segments = T)
Run PCA on data
pca = pcaMethods::pca(apply(log2(count_data_filt/colSums(count_data_filt)*1000000+1), 1, scale), method = "svd", nPcs = 3, scale = "none", center = F) metadata_cells_filt = cbind(metadata_cells_filt, pca@scores)
Plot clonotypes in data projection
proj_plot = plotProjection(tracer_data = tracer_simple, pheno_data = metadata_cells_filt, dimensions = c("PC1", "PC2"), additional_pheno = "Factor.Value.infect.", plot_out = T) proj_plot = plotProjection(tracer_data = tracer_simple_AB, pheno_data = metadata_cells_filt, dimensions = c("PC1", "PC2"), clonotypes = "clAB", additional_pheno = "Factor.Value.infect.", plot_out = T)
Barplots of cells with clonotypes per condition(s)
proj_plot = plotCounts(tracer_data = tracer_simple_AB, clonotypes = "clAB", pheno_data = metadata_cells_filt, category = "Factor.Value.infect.", plot_out = T) proj_plot = plotCounts(tracer_data = tracer_simple_AB, clonotypes = "clAB", pheno_data = metadata_cells_filt, category = c("Factor.Value.infect.", "Factor.Value.time."), plot_out = T)
Shared clonotypes
proj_plot = plotSharedClones(tracer_data = tracer_simple_AB, clonotypes = "tcr_info", pheno_data = metadata_cells_filt, category = "Factor.Value.time.", plot_out = T)
Matching chains
proj_plot = plotMatchingChains(tracer_data = tracer_simple_AB, pheno_data = metadata_cells_filt, categories = "Factor.Value.time.", plot_out = T)
Clonotype sizes
x = plotClonotypeSize(tracer_data = tracer_simple_AB, pheno_data = metadata_cells_filt, category = "Factor.Value.time.", plot_out = T) y = plotClonotypeSize(tracer_data = tracer_simple_AB, pheno_data = metadata_cells_filt, category = NULL, plot_out = T)
VDJ combinations
plotVDJmatrix(tracer_simple_AB, pheno_data = metadata_cells_filt, category = "Factor.Value.time.", chain = "B")
VDJ frequency
plotVDJfrequency(tracer_simple_AB, pheno_data = metadata_cells_filt, category = "Factor.Value.time.", chain = "A")
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.