View source: R/compute_cell_fractions.R
compute_cell_fractions | R Documentation |
Estimates cell fractions from TPM bulk gene expression using quanTIseq method from Finotello et al., Genome Med, 2019.
compute_cell_fractions(RNA_tpm = NULL, verbose = TRUE)
RNA_tpm |
data.frame containing TPM values with HGNC symbols in rows and samples in columns. |
verbose |
logical value indicating whether to display messages about the number of immune cell signature genes found in the gene expression data provided. |
A numeric matrix of normalized enrichment scores with samples in rows and cell types in columns.
Finotello F, Mayer C, Plattner C, Laschober G, Rieder D, Hackl H, Krogsdam A, Loncova Z, Posch W, Wilflingseder D, Sopper S, Ijsselsteijn M, Brouwer TP, Johnson D, Xu Y, Wang Y, Sanders ME, Estrada MV, Ericsson-Gonzalez P, Charoentong P, Balko J, de Miranda NFDCC, Trajanoski Z. Molecular and pharmacological modulators of the tumor immune contexture revealed by deconvolution of RNA-seq data. Genome Medicine, 2019. 11(1):34. https://doi.org/10.1186/s13073-019-0638-6
# using a SummarizedExperiment object
library(SummarizedExperiment)
# Using example exemplary dataset (Mariathasan et al., Nature, 2018)
# from easierData. Original processed data is available from
# IMvigor210CoreBiologies package.
library("easierData")
dataset_mariathasan <- easierData::get_Mariathasan2018_PDL1_treatment()
RNA_tpm <- assays(dataset_mariathasan)[["tpm"]]
# Select a subset of patients to reduce vignette building time.
pat_subset <- c(
"SAM76a431ba6ce1", "SAMd3bd67996035", "SAMd3601288319e",
"SAMba1a34b5a060", "SAM18a4dabbc557"
)
RNA_tpm <- RNA_tpm[, colnames(RNA_tpm) %in% pat_subset]
# Some genes are causing issues due to approved symbols matching more than one gene
genes_info <- easier:::reannotate_genes(cur_genes = rownames(RNA_tpm))
## Remove non-approved symbols
non_na <- !is.na(genes_info$new_names)
RNA_tpm <- RNA_tpm[non_na, ]
genes_info <- genes_info[non_na, ]## Remove entries that are withdrawn
RNA_tpm <- RNA_tpm[-which(genes_info$new_names == "entry withdrawn"), ]
genes_info <- genes_info[-which(genes_info$new_names == "entry withdrawn"), ]
## Identify duplicated new genes
newnames_dup <- unique(genes_info$new_names[duplicated(genes_info$new_names)])
newnames_dup_ind <- do.call(c, lapply(newnames_dup, function(X) which(genes_info$new_names == X)))
newnames_dup <- genes_info$new_names[newnames_dup_ind]
## Retrieve data for duplicated genes
tmp <- RNA_tpm[genes_info$old_names[genes_info$new_names %in% newnames_dup],]
## Remove data for duplicated genes
RNA_tpm <- RNA_tpm[-which(rownames(RNA_tpm) %in% rownames(tmp)),]
## Aggregate data of duplicated genes
dup_genes <- genes_info$new_names[which(genes_info$new_names %in% newnames_dup)]
names(dup_genes) <- rownames(tmp)
if (anyDuplicated(newnames_dup)){
tmp2 <- stats::aggregate(tmp, by = list(dup_genes), FUN = "mean")
rownames(tmp2) <- tmp2$Group.1
tmp2$Group.1 <- NULL
}
# Put data together
RNA_tpm <- rbind(RNA_tpm, tmp2)
# Computation of cell fractions (Finotello et al., Genome Med, 2019)
cell_fractions <- compute_cell_fractions(RNA_tpm = RNA_tpm)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.