In waldronlab/subtypeHeterogeneity: Tumor subclonality of expression-based cancer subtypes

Single cell RNA-seq analysis (10x tumors)

library(knitr)
opts_chunk$set(out.width = "100%", cache = TRUE)
knitr::opts_chunk$set(dev = "png", dev.args = list(type = "cairo-png"))
options(repos = c(CRAN = 'https://cloud.r-project.org'))

Setup

Dependencies:

# OVC
library(subtypeHeterogeneity) 
library(consensusOV)

# Single cell
library(scater)
library(scran)
library(DropletUtils)
library(SingleR)

# Annotation
library(EnsDb.Hsapiens.v86)
library(org.Hs.eg.db)
library(EnrichmentBrowser)

# Plotting
library(ComplexHeatmap)
library(ggpubr)

Constants:

SUBTYPES <- c("DIF", "IMR", "MES", "PRO")
CELL.TYPES <- c("EPI", "LYMPH", "MYE", "STROM", "ENDO")
data.dir <- system.file("extdata", package = "subtypeHeterogeneity")

Colors

cb.pink <- "#CC79A7"
cb.red <- "#D55E00"
cb.blue <- "#0072B2"
cb.yellow <- "#F0E442"
cb.green <- "#009E73"
cb.lightblue <- "#56B4E9"
cb.orange <- "#E69F00"

stcols <- c(cb.lightblue, cb.green, cb.orange, cb.pink) 
names(stcols) <- SUBTYPES

Parallel computation:

bp <- BiocParallel::registered()[[1]]

Note: The sections Preprocessing, Dimensionsality reduction, and Clustering describe the processing of the CellRanger output files at the example of Tumor T90. Tumors T59, T76, T77, and T89 were processed analogously. Execution of the code in these sections requires download of the CellRanger output files deposited in GEO (accession number: GSE154600).

Section Annotation, Subsection Visualization of results (all tumors), and subsequent sections describe how Figures 4 and 5 of the main manuscript were created. The code is based on the fully processed and annotated SingleCellExperiments (resulting from processing steps described in Section 2 - Section 5.3).

Preprocessing

Read 10X output (required files: matrix.mtx, barcodes.tsv, genes.tsv)

sce <- DropletUtils::read10xCounts("tumor90")
dim(sce)

Annotating the rows:

rownames(sce) <- scater::uniquifyFeatureNames(rowData(sce)$ID,
                                              rowData(sce)$Symbol)
location <- AnnotationDbi::mapIds(EnsDb.Hsapiens.v86, keys = rowData(sce)$ID, 
                                  column = "SEQNAME", keytype = "GENEID")

Testing for deviations from ambient expression:

bcrank <- DropletUtils::barcodeRanks(counts(sce))
uniq <- !duplicated(bcrank$rank)

plot(bcrank$rank[uniq], bcrank$total[uniq], log="xy",
    xlab="Rank", ylab="Total UMI count", cex.lab=1.2)

abline(h=bcrank$inflection, col="darkgreen", lty=2)
abline(h=bcrank$knee, col="dodgerblue", lty=2)

legend("bottomleft", legend=c("Inflection", "Knee"),
    col=c("darkgreen", "dodgerblue"), lty=2, cex=1.2)

Quality control on the cells:

df <- scater::perCellQCMetrics(sce, 
                       subsets = list(Mito = which(location == "MT")),
                       BPPARAM = bp)
par(mfrow = c(1,3))
hist(df$sum/1e3, xlab="Library sizes (thousands)", main="",
    breaks=20, col="grey80", ylab="Number of cells")
hist(df$detected, xlab="Number of expressed genes", main="",
    breaks=20, col="grey80", ylab="Number of cells")
hist(df$subsets_Mito_percent, xlab="Mitochondrial proportion (%)",
    ylab="Number of cells", breaks=20, main="", col="grey80")
par(mfrow = c(1,1))

high.mito <- scater::isOutlier(df$subsets_Mito_percent, nmads = 3, type = "higher")
libsize.drop <- scater::isOutlier(df$sum, nmads = 1, type = "lower", log = TRUE)
feature.drop <- scater::isOutlier(df$detected, nmads = 1, type = "lower", log = TRUE)
sce <- sce[,!(high.mito | libsize.drop | feature.drop)]

df <- data.frame(ByHighMito = sum(high.mito),
           ByLibSize = sum(libsize.drop),
           ByFeature = sum(feature.drop),
           Remaining = ncol(sce))

df
dim(sce)

Examining gene expression:

ave <- scater::calculateAverage(sce, BPPARAM = bp)
hist(log10(ave), breaks = 100, main = "", col = "grey",
    xlab = expression(Log[10]~"average count"))

Remove genes that have too low average counts

rowData(sce)$AveCount <- ave
to.keep <- ave > 0.001
sce <- sce[to.keep,]

# Number excluded genes
sum(!to.keep)
dim(sce)

Library-size normalization:

clusters <- scran::quickCluster(sce, method = "igraph", 
                                min.mean = 0.1, BPPARAM = bp)
sce <- scran::computeSumFactors(sce, min.mean = 0.1,
                                cluster = clusters, BPPARAM = bp)
plot(scater::librarySizeFactors(sce), sizeFactors(sce), pch = 16,
    xlab = "Library size factors", ylab = "Deconvolution factors", log = "xy")
sce <- scater::logNormCounts(sce)
sce

Dimensionality reduction

Modelling the mean-variance trend:

dec.pbmc <- scran::modelGeneVarByPoisson(sce, BPPARAM = bp)
top.pbmc <- scran::getTopHVGs(dec.pbmc, prop = 0.1)
plot(dec.pbmc$mean, dec.pbmc$total, pch=16, cex=0.5,
    xlab="Mean of log-expression", ylab="Variance of log-expression")
curfit <- metadata(dec.pbmc)
curve(curfit$trend(x), col = 'dodgerblue', add = TRUE, lwd = 2)

Denoise log-expression data by removing principal components corresponding to technical noise (might take a while):

sce <- scran::denoisePCA(sce, subset.row = top.pbmc,
                         technical = dec.pbmc, BPPARAM = bp)
ncol(reducedDim(sce, "PCA"))
plot(attr(reducedDim(sce), "percentVar"), xlab = "PC",
     ylab = "Proportion of variance explained")
abline(v = ncol(reducedDim(sce, "PCA")), lty = 2, col = "red")

tSNE:

sce <- scater::runTSNE(sce, dimred = "PCA", perplexity = 30, BPPARAM = bp)
sce$sizeFactor <- sizeFactors(sce)
scater::plotTSNE(sce, colour_by = "sizeFactor")

Clustering

Clustering with graph-based methods:

snn.gr <- scran::buildSNNGraph(sce, use.dimred = "PCA", BPPARAM = bp, k = 25)
clusters <- igraph::cluster_walktrap(snn.gr)
sce$Cluster <- factor(clusters$membership)
table(sce$Cluster)

cluster.mod <- scran::clusterModularity(snn.gr, sce$Cluster,
                                        get.weights = TRUE)
log.ratio <- log2(cluster.mod$observed / cluster.mod$expected + 1)

ComplexHeatmap::Heatmap(log.ratio,
                        cluster_rows = FALSE, cluster_columns = FALSE)
scater::plotTSNE(sce, colour_by = "Cluster")

Annotation (cell type and consensus subtype)

SingleR cell type annotation

Transfer cell type annotation from reference profiles in the Human Primary Cell Atlas and ENCODE:

hpc <- transferCellType(sce, "hpca")
encode <- transferCellType(sce, "encode")

sce$hpca.celltype <- hpc$labels 
sce$encode.celltype <- encode$labels

maxScore <- function(res)
    vapply(1:nrow(res), function(i) res$scores[i, res[i,"labels"]], numeric(1))

sce$hpca.celltype.score <- maxScore(hpc)
sce$encode.celltype.score <- maxScore(hpc)

Display main cell types present:

plotMainCellTypes(sce, "hpca.celltype")
plotMainCellTypes(sce, "encode.celltype")

Display ambiguity of cell type assignments:

SingleR::plotScoreHeatmap(hpc)
SingleR::plotScoreHeatmap(encode)

Marker genes

Adopted from Shih et al., PLoS One (2018):

epithelial <- c("EPCAM", "KRT8", "KRT18", "KRT19")
lymphocyte <- c("PTPRC", "CD3E", "CD19", "MS4A1")
endothelial <- c("PECAM1", "CD34")
fibroblast <- c("ACTA2", "DCN", "ACTB")
stromal <- c("THY1", "ENG", "VIM", "CD44")

origin <- c("PAX8", "CALB2", "COL11A1")
scater::plotTSNE(sce, colour_by = epithelial[1])

consensusOV subtype annotation

sce.entrez <- EnrichmentBrowser::idMap(sce, org = "hsa", 
                                       from = "SYMBOL", to = "ENTREZID")
am <- as.matrix(assay(sce.entrez, "logcounts"))
cst <- consensusOV::get.consensus.subtypes(am, names(sce.entrez))

sts <- sub("_consensus$", "", as.vector(cst$consensusOV.subtypes))
sce$subtype <- sts
sce$margin <- consensusOV::margin(cst$rf.probs)

summary(sce$margin)
scater::plotTSNE(sce, colour_by = "subtype")
scater::plotTSNE(sce, colour_by = "margin")

Visualization of results (all tumors)

Read the pre-processed 10X data for all tumors as list of SingleCellExperiments.

tumors <- c(59, 76, 77, 89, 90) 
tags <- c("cu0pxp0sepsvhos", "taff3g6nlmf5yk4", "2otdxfunxnlr6ql",
          "sojupsrjxnvnf6d", "ydokzcxgkugo678")
durl <- "https://dl.dropboxusercontent.com/s/tag/sampleXX_sce.rds"

readSCE <- function(i)
{ 
    dl <- sub("XX", tumors[i], durl)
    dl <- sub("tag", tags[i], dl)
    readRDS(file(dl))
}

sces <- lapply(1:5, readSCE)
names(sces) <- paste0("T", tumors)
sces

Summarize cell type annotation:

sces <- lapply(sces, annoCellType)

vapply(sces, function(sce) table(sce$celltype), integer(5))
vapply(sces, function(sce) table(sce$subtype), integer(4))

Display cell type and subtype annotation for one tumor at a time:

p2a <- plotType(sces[[1]])
p2b <- plotType(sces[[1]], type = "subtype")
p2 <- ggarrange(p2b + theme_bw() + theme(plot.margin = margin(r = 1)), 
                p2a + theme_bw() + theme(axis.text.y = element_blank(), 
                                         axis.ticks.y = element_blank(), 
                                         axis.title.y = element_blank(),
                                         plot.margin = margin(l = 1, r=1)), 
                legend = "top", align = "h", labels = c("A", "B"))
p2

Facet cell type and subtype visualization for all tumors:

stp <- facetTumors(sces, col = "subtype", pal = stcols)
pal <- ggpubr::get_palette("npg", 5)
names(pal) <- CELL.TYPES
ctp <- facetTumors(sces, col = "celltype", pal = pal) 
p2 <- ggpubr::ggarrange(stp + theme(axis.text.x = element_blank(),
                                    axis.title.x = element_blank()), 
                        ctp, nrow = 2, align = "v", labels = c("A", "B"),
                        heights = c(1, 1.2))

Barplot percentage of subtype / cell type for all 10X tumors:

cttab <- vapply(sces, getPerc, numeric(5), perc=FALSE)
total <- colSums(cttab)
prop.test(cttab["EPI",], total)
prop.test(cttab["STROM",], total)

stbp <- bpType(sces, "subtype", pal = stcols)
ctbp <- bpType(sces, "celltype", pal = pal)

Cell type vs subtype matrix:

mat <- getCelltypeSubtypeMatrix(sces)
ctstbp <- bpCrossType(mat, pal)
ctstbp

Or as matrix plot:

mat <- round(mat / sum(mat) * 100)
mat <- mat[,CELL.TYPES]
matrixPlot(mat, cnames = c("SUBTYPE", "CELL.TYPE"), high.color = "darkgreen")

p1 <- ggarrange(stbp,
                ctbp + theme(axis.text.y = element_blank(),
                             axis.title.y = element_blank()),
                             # axis.ticks.x = element_blank(),
                             # axis.line.x = element_blank()), 
                ctstbp + theme(axis.text.y = element_blank(),
                             axis.title.y = element_blank()),
                nrow = 1, hjust = c(-0.5, 0.5, 0.5),
                widths = c(1.3,1,1),
                legend = "none", align = "h", labels = c("C", "D", "E"))
p1

Margin scores:

spl.margin <- function(sce, col = "celltype")
    reshape2::melt(split(sce$margin, sce[[col]]))
psces <- lapply(sces, spl.margin)
for(i in tumors) psces[[match(i, tumors)]]$tumor <- paste0("T", i)
df <- do.call(rbind, psces)
colnames(df) <- c("MARGIN", "CELL.TYPE", "TUMOR")
df[[2]] <- factor(df[[2]], levels = CELL.TYPES)

Boxplot of margin scores:

ggboxplot(df, x = "CELL.TYPE", y = "MARGIN", width = 0.8, notch = TRUE,
            fill = "CELL.TYPE", palette = get_palette("lancet", 5),
            facet.by = "TUMOR", nrow = 1,  
            x.text.angle = 45, legend="none", ggtheme = theme_bw(base_size = 12))

Only epithelial cells:

df <- subset(df, CELL.TYPE == "EPI")
bp <- ggboxplot(df, x = "TUMOR", y = "MARGIN", width = 0.8, notch = TRUE,
                fill = "lightgrey", #cb.red,
                legend="none", ggtheme = theme_bw(), ylab = "MARGIN (EPI)",
                font.x = 11, font.y = 11, font.tickslab = c(10, "plain", "black"))
                # ylim = c(0,1),x.text.angle = 45, 
df.points <- data.frame(x=1:5, y=c(0.376, 0.766, 0.182, 0.646, 0.214))
bp <- bp + geom_point(data = df.points, aes(x = x, y = y), shape = 8, size = 3) +
           geom_text(data = df.points[2,], aes(x = x, y = y),
                     label = "Bulk", hjust = 0, nudge_x = 0.1, size = 4)
bp

Bulk RNA-seq

durl <- "https://dl.dropboxusercontent.com/s/lit4wok327u1mps"
dfile <- file.path(durl, "Bulk_RNAseq_March2020.txt")
dat <- read.delim(file(dfile), as.is=TRUE)
cpm.ind <- seq_len(which(dat[,1] == "")[1] - 1)
cpm.dat <- dat[cpm.ind, 1:8]
raw.dat <- dat[,9:16]
cpm.sym <- cpm.dat[,1]
raw.sym <- raw.dat[,1]

Use pre-computed library size-normalized expression values for subtype assignment:

rownames(cpm.dat) <- cpm.dat[,2] 
cpm.dat <- cpm.dat[,3:8]
colnames(cpm.dat) <- paste0("T", c(59, 77, 76, 89, 90, 91)) 
cpm.dat <- as.matrix(cpm.dat)
mode(cpm.dat) <- "numeric"
cpm.se <- SummarizedExperiment(assays = list(cpm = cpm.dat))
cpm.se <- EnrichmentBrowser::idMap(cpm.se, org="hsa", from="ENSEMBL", to="ENTREZID")
cpm.file <- file.path(data.dir, "bulk_cpm_subtypes.rds")

cst <- consensusOV::get.consensus.subtypes(assay(cpm.se), names(cpm.se))
saveRDS(cst, file = cpm.file)

cst <- readRDS(cpm.file)
cst

Sanity check: start from raw read counts

rownames(raw.dat) <- raw.dat[,2]
raw.dat <- raw.dat[,3:8]
colnames(raw.dat) <- paste0("T", c(59, 77, 76, 89, 90, 91))
raw.dat <- as.matrix(raw.dat)
mode(raw.dat) <- "numeric"
raw.dat <- edgeR::cpm(raw.dat, log=TRUE)
raw.se <- SummarizedExperiment(assays = list(raw = raw.dat))
raw.se <- EnrichmentBrowser::idMap(raw.se, org="hsa", from="ENSEMBL", to="ENTREZID")
raw.file <- file.path(data.dir, "bulk_raw_subtypes.rds")

cst2 <- consensusOV::get.consensus.subtypes(assay(raw.se), names(raw.se))
saveRDS(cst2, file = raw.file)

Pretty much the same as when starting from already normalized expression values:

cst2 <- readRDS(raw.file)
cst2

Contrast bulk and single-cell margins:

mat <- t(cst$rf.probs)
rownames(mat) <- sub("_consensus", "", rownames(mat))
mat <- round(mat, digits = 2)
mat <- mat[c(3,4,1,2),c(1,3,2,4)]

mat <- rbind(c(0.15, 0.02, 0.5, 0.13, 0.12),
             c(0.59, 0.03, 0.14, 0.77, 0.07),
             c(0.22, 0.85, 0.03, 0.08, 0.3),
             c(0.04, 0.09, 0.32, 0.02, 0.52))
colnames(mat) <- paste0("T", tumors)
rownames(mat) <- SUBTYPES
mat <- mat[4:1,]
mp <- matrixPlot(t(mat), cnames = c("TUMORS", "CLASS.PROB"),
                    bw.thresh = 0.5, gthresh = 0.2)
mp <- mp + theme(axis.text.x = element_blank(),
                 axis.title.x = element_blank(),
                 axis.ticks.x = element_blank(),
                 axis.line.x = element_blank(),
                 plot.margin = margin(b = 0))
mp <- mp + ylab("BULK")
p0 <- ggarrange(mp, bp, vjust = 0.5, hjust = 0.5,
                nrow = 2, align = "v", heights = c(1,3), labels = c("F", ""))
p0
p3 <- ggarrange(p1, p0, nrow = 1, widths = c(2,1))
ggarrange(p2, p3, heights = c(2,1), nrow = 2)

InferCNV

tumor 59: 3,500 epithelial cells vs. 9,500 reference (non-epithelial) cells
tumor 76: 210 epithelial cells vs. 13,500 reference (non-epithelial) cells
tumor 77: 2,900 epithelial cells vs. 4,000 reference (non-epithelial) cells
tumor 89: 300 epithelial cells vs. 4,600 reference (non-epithelial) cells
tumor 90: 900 epithelial cells vs. 2,700 reference (non-epithelial) cells

Prepare input files and execute InferCNV

Prepare input files from a SingleCellExperiment

obj <- generateInferCNVInput(sces[[1]], "tumor59")

Run inferCNV for one tumor at a time (computational intensive, several hours). Best executed on a cluster (here with 20 cores).

# cutoff = 1 works well for Smart-seq2, 
# cutoff = 0.1 works well for 10x Genomics
res <- infercnv::run(obj,
                     cutoff = 0.1, 
                     out_dir = "tumor59",
                     num_threads = 20,
                     denoise = TRUE,
                     HMM = TRUE)

Cross-tumor heatmap

for(i in seq_along(sces)) colnames(sces[[i]]) <- sces[[i]]$Barcode

Read observation matrix (expression profiles of cancer epithelial cells)

tags <- c("p7r313prargcp3g", "6yunw77zfe4nzgw", "qaop7h1sh2bxm5g",
          "x469lcpm3rp5dxo", "m56zm2zywjcongz")
durl <- "https://dl.dropboxusercontent.com/s/tag/TXX_infercnv.observations.rds"

readObsMat <- function(i)
{ 
    dl <- sub("XX", tumors[i], durl)
    dl <- sub("tag", tags[i], dl)
    readRDS(file(dl))
}

obs.mats <- lapply(1:5, readObsMat)
names(obs.mats) <- paste0("T", tumors)

Subset and annotate cells:

for(i in seq_along(tumors)) 
    obs.mats[[i]] <- subsetObservations(obs.mats[[i]], sces[[i]])
ra <- getCellAnnotation(sces, obs.mats)

Create union of genes featured in the inferCNV output of each tumor, extend individual observations as needed, and create a combined observation matrix storing the information across all tumors:

un <- Reduce(union, lapply(obs.mats, rownames))
obs.mats <- lapply(obs.mats, extendObservations, un = un) 
obs.mat <- do.call(cbind, lapply(obs.mats, function(o) o[un,]))

Obtain genomic coordinates for the genes in the combined observation matrix:

egenes <- symbols2ranges(rownames(obs.mat))
obs.mat <- obs.mat[names(egenes),]
chr <- as.character(seqnames(egenes))

Highlight specific recurrent regions with known driver genes from the TCGA OVC paper:

clabs <- getRecurrentDriverGenes(rownames(obs.mat))

Plot the heatmap:

im <- ComplexHeatmap::Heatmap(t(obs.mat),
    #top_annotation = ta,
    right_annotation = ra,
    name = "Expression",
    col = circlize::colorRamp2(#c(0.895, 1.105), c("white", "black")),
                                c(0.895, 1, 1.105), c("blue", "white", "red")),
    row_title_rot = 0, column_title_rot = 90,
    show_row_names = FALSE, show_column_names = TRUE,
    cluster_rows = FALSE, cluster_columns = FALSE,
    row_split = rep(paste0("T", tumors), sapply(obs.mats, ncol)),
    column_spl = factor(chr, levels = unique(chr)),
    column_gap = unit(0, "mm"),
    column_title_gp = gpar(fontsize = 6),
    column_labels = clabs,
    column_names_gp = gpar(fontsize = 6),
    border = TRUE)
im <- grid.grabExpr(draw(im))
im

Cell Cycle

Cyclins

Tumor 59

for(i in seq_along(sces)) colnames(sces[[i]]) <- NULL
sce59 <- subsetByCellType(sces[[1]], cell.type = "EPI", subtype = c("DIF", "PRO"))
sce59

cyclin.genes <- grep("^CCN[ABDE][0-9]$", names(sce59), value = TRUE)
cyclin.genes
markerHeatmap(sce59, cyclin.genes)
bpCyclins(sce59)

Tumor 77

sce77 <- subsetByCellType(sces[[3]], cell.type = "EPI", subtype = c("DIF", "PRO"))
bpCyclins(sce77)

Tumor 90

sce90 <- subsetByCellType(sces[[5]], cell.type = "EPI", subtype = c("DIF", "PRO"))
bpCyclins(sce90)

Marker genes

Tumor 59

markers59 <- scran::findMarkers(sce59, groups = sce59$subtype)
head(markers59$DIF)
markers59$DIF["CCND1",]
head(markers59$DIF[markers59$DIF$logFC.PRO < 0,])
markerHeatmap(sce59, rownames(markers59$DIF)[1:25])

Contrast with bulk markers

data.dir <- system.file("extdata", package = "subtypeHeterogeneity")
verhaak.file <- file.path(data.dir, "Verhaak_supplementT8A.txt")
bulk.markers <- getExtendedVerhaakSignature(verhaak.file, 
                                            nr.genes.per.subtype = 200,
                                            by.subtype = TRUE)
bmarkers59 <- compileMarkers(markers59, bulk.markers)
bmarkers59
markerHeatmap(sce59, rownames(bmarkers59), row.split = TRUE)

Tumor 77

markers77 <- scran::findMarkers(sce77, groups = sce77$subtype)
markerHeatmap(sce77, rownames(markers77$DIF)[1:25])
bmarkers77 <- compileMarkers(markers77, bulk.markers)
markerHeatmap(sce77, rownames(bmarkers77), row.split = TRUE)

Tumor 90

markers90 <- scran::findMarkers(sce90, groups = sce90$subtype)
markerHeatmap(sce90, rownames(markers90$DIF)[1:25])
bmarkers90 <- compileMarkers(markers90, bulk.markers)
markerHeatmap(sce90, rownames(bmarkers90), row.split = TRUE)

Combined

markers <- list(markers59, markers77, markers90)
dif.markers <- getCommonMarkers(markers, n = 10)
pro.markers <- getCommonMarkers(markers, subtype = "PRO", n = 10)
markers <- c(dif.markers, pro.markers)

capture the heatmap

ml <- markerHeatmapList(sces[c(1,3,5)], markers, row.split = TRUE)
m <- grid.grabExpr(draw(ml))

Cyclone

Tumor 59

Run the cyclone classifier (pre-computed, computationally intensive)

sce.ensembl <- EnrichmentBrowser::idMap(sce, org = "hsa", 
                                        from = "SYMBOL", to = "ENSEMBL")
marker.file <- system.file("extdata", "human_cycle_markers.rds", package="scran")
hm.pairs <- readRDS(marker.file)
cy.assignments <- scran::cyclone(sce.ensembl, hm.pairs)
cy.file <- file.path(data.dir, "T59_cyclone.rds")
saveRDS(cy.assignments, file = cy.file)

Get the computed assignments and plot:

cy.file <- file.path(data.dir, "T59_cyclone.rds")
cy.assignments <- readRDS(cy.file)
plot(cy.assignments$score$G1, cy.assignments$score$G2M,
     xlab = "G1 score", ylab = "G2/M score", pch = 16)
df <- data.frame(cy.assignments$score, subtype = sce59$subtype)
ggscatter(df, x = "G1", y = "G2M", color = "subtype",
          palette = stcols[c("DIF", "PRO")], ggtheme = ggplot2::theme_bw())

tab <- table(cy.assignments$phases, sce59$subtype)
tab
chisq.test(tab)

Figure 5:

bc <- bpCyclins(sces[[1]], top.only = TRUE) + ylim(c(0,4.3))
bc <- bc + geom_signif(annotations = "1.8e-07", 
                  y_position = 4.25, xmin = 4.8, xmax = 5.2) 
gs <- ggscatter(df, x = "G1", y = "G2M", 
                color = "subtype", palette = stcols[c("DIF", "PRO")], 
                ggtheme = ggplot2::theme_bw(), legend = "none")
bc <- ggarrange(bc, gs, ncol = 2, labels = c("B", "C"))
bcd <- ggarrange(bc, m, nrow = 2, labels = c("", "D"))
bcd

ggarrange(im, bcd, ncol = 2, labels = c("A",""))

Tumor 77

cy.file <- sub("59", "77", cy.file)
cy.assignments <- readRDS(cy.file)
df <- data.frame(cy.assignments$score, subtype = sce77$subtype)
ggscatter(df, x = "G1", y = "G2M", color = "subtype",
          palette = stcols[c("DIF", "PRO")], ggtheme = ggplot2::theme_bw())

tab <- table(cy.assignments$phases, sce77$subtype)
tab
chisq.test(tab)

Tumor 90

cy.file <- sub("77", "90", cy.file)
cy.assignments <- readRDS(cy.file)
df <- data.frame(cy.assignments$score, subtype = sce90$subtype)
ggscatter(df, x = "G1", y = "G2M", color = "subtype",
          palette = stcols[c("DIF", "PRO")], ggtheme = ggplot2::theme_bw())

tab <- table(cy.assignments$phases, sce90$subtype)
tab
fisher.test(tab)

Session Info

sessionInfo()

waldronlab/subtypeHeterogeneity documentation built on Jan. 10, 2025, 10:11 a.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

waldronlab/subtypeHeterogeneity
Tumor subclonality of expression-based cancer subtypes

In waldronlab/subtypeHeterogeneity: Tumor subclonality of expression-based cancer subtypes

Single cell RNA-seq analysis (10x tumors)

Setup

Preprocessing

Dimensionality reduction

Clustering

Annotation (cell type and consensus subtype)

SingleR cell type annotation

Marker genes

consensusOV subtype annotation

Visualization of results (all tumors)

Bulk RNA-seq

InferCNV

Prepare input files and execute InferCNV

Cross-tumor heatmap

Cell Cycle

Cyclins

Tumor 59

Tumor 77

Tumor 90

Marker genes

Tumor 59

Tumor 77

Tumor 90

Combined

Cyclone

Tumor 59

Tumor 77

Tumor 90

Session Info

R Package Documentation

Browse R Packages

We want your feedback!

waldronlab/subtypeHeterogeneity Tumor subclonality of expression-based cancer subtypes

In waldronlab/subtypeHeterogeneity: Tumor subclonality of expression-based cancer subtypes

Single cell RNA-seq analysis (10x tumors)

Setup

Preprocessing

Dimensionality reduction

Clustering

Annotation (cell type and consensus subtype)

SingleR cell type annotation

Marker genes

consensusOV subtype annotation

Visualization of results (all tumors)

Bulk RNA-seq

InferCNV

Prepare input files and execute InferCNV

Cross-tumor heatmap

Cell Cycle

Cyclins

Tumor 59

Tumor 77

Tumor 90

Marker genes

Tumor 59

Tumor 77

Tumor 90

Combined

Cyclone

Tumor 59

Tumor 77

Tumor 90

Session Info

R Package Documentation

Browse R Packages

We want your feedback!

waldronlab/subtypeHeterogeneity
Tumor subclonality of expression-based cancer subtypes