Single cell RNA-seq analysis (10x tumors)

library(knitr)
opts_chunk$set(out.width = "100%", cache = TRUE)
knitr::opts_chunk$set(dev = "png", dev.args = list(type = "cairo-png"))
options(repos = c(CRAN = 'https://cloud.r-project.org')) 

Setup

Dependencies:

# OVC
library(subtypeHeterogeneity) 
library(consensusOV)

# Single cell
library(scater)
library(scran)
library(DropletUtils)
library(SingleR)

# Annotation
library(EnsDb.Hsapiens.v86)
library(org.Hs.eg.db)
library(EnrichmentBrowser)

# Plotting
library(ComplexHeatmap)
library(ggpubr)

Constants:

SUBTYPES <- c("DIF", "IMR", "MES", "PRO")
CELL.TYPES <- c("EPI", "LYMPH", "MYE", "STROM", "ENDO")
data.dir <- system.file("extdata", package = "subtypeHeterogeneity")

Colors

cb.pink <- "#CC79A7"
cb.red <- "#D55E00"
cb.blue <- "#0072B2"
cb.yellow <- "#F0E442"
cb.green <- "#009E73"
cb.lightblue <- "#56B4E9"
cb.orange <- "#E69F00"

stcols <- c(cb.lightblue, cb.green, cb.orange, cb.pink) 
names(stcols) <- SUBTYPES

Parallel computation:

bp <- BiocParallel::registered()[[1]]

Note: The sections Preprocessing, Dimensionsality reduction, and Clustering describe the processing of the CellRanger output files at the example of Tumor T90. Tumors T59, T76, T77, and T89 were processed analogously. Execution of the code in these sections requires download of the CellRanger output files deposited in GEO (accession number: GSE154600).

Section Annotation, Subsection Visualization of results (all tumors), and subsequent sections describe how Figures 4 and 5 of the main manuscript were created. The code is based on the fully processed and annotated SingleCellExperiments (resulting from processing steps described in Section 2 - Section 5.3).

Preprocessing

Read 10X output (required files: matrix.mtx, barcodes.tsv, genes.tsv)

sce <- DropletUtils::read10xCounts("tumor90")
dim(sce)

Annotating the rows:

rownames(sce) <- scater::uniquifyFeatureNames(rowData(sce)$ID,
                                              rowData(sce)$Symbol)
location <- AnnotationDbi::mapIds(EnsDb.Hsapiens.v86, keys = rowData(sce)$ID, 
                                  column = "SEQNAME", keytype = "GENEID")

Testing for deviations from ambient expression:

bcrank <- DropletUtils::barcodeRanks(counts(sce))
uniq <- !duplicated(bcrank$rank)

plot(bcrank$rank[uniq], bcrank$total[uniq], log="xy",
    xlab="Rank", ylab="Total UMI count", cex.lab=1.2)

abline(h=bcrank$inflection, col="darkgreen", lty=2)
abline(h=bcrank$knee, col="dodgerblue", lty=2)

legend("bottomleft", legend=c("Inflection", "Knee"),
    col=c("darkgreen", "dodgerblue"), lty=2, cex=1.2)

Quality control on the cells:

df <- scater::perCellQCMetrics(sce, 
                       subsets = list(Mito = which(location == "MT")),
                       BPPARAM = bp)
par(mfrow = c(1,3))
hist(df$sum/1e3, xlab="Library sizes (thousands)", main="",
    breaks=20, col="grey80", ylab="Number of cells")
hist(df$detected, xlab="Number of expressed genes", main="",
    breaks=20, col="grey80", ylab="Number of cells")
hist(df$subsets_Mito_percent, xlab="Mitochondrial proportion (%)",
    ylab="Number of cells", breaks=20, main="", col="grey80")
par(mfrow = c(1,1))

high.mito <- scater::isOutlier(df$subsets_Mito_percent, nmads = 3, type = "higher")
libsize.drop <- scater::isOutlier(df$sum, nmads = 1, type = "lower", log = TRUE)
feature.drop <- scater::isOutlier(df$detected, nmads = 1, type = "lower", log = TRUE)
sce <- sce[,!(high.mito | libsize.drop | feature.drop)]

df <- data.frame(ByHighMito = sum(high.mito),
           ByLibSize = sum(libsize.drop),
           ByFeature = sum(feature.drop),
           Remaining = ncol(sce))

df
dim(sce)

Examining gene expression:

ave <- scater::calculateAverage(sce, BPPARAM = bp)
hist(log10(ave), breaks = 100, main = "", col = "grey",
    xlab = expression(Log[10]~"average count"))

Remove genes that have too low average counts

rowData(sce)$AveCount <- ave
to.keep <- ave > 0.001
sce <- sce[to.keep,]

# Number excluded genes
sum(!to.keep)
dim(sce)

Library-size normalization:

clusters <- scran::quickCluster(sce, method = "igraph", 
                                min.mean = 0.1, BPPARAM = bp)
sce <- scran::computeSumFactors(sce, min.mean = 0.1,
                                cluster = clusters, BPPARAM = bp)
plot(scater::librarySizeFactors(sce), sizeFactors(sce), pch = 16,
    xlab = "Library size factors", ylab = "Deconvolution factors", log = "xy")
sce <- scater::logNormCounts(sce)
sce

Dimensionality reduction

Modelling the mean-variance trend:

dec.pbmc <- scran::modelGeneVarByPoisson(sce, BPPARAM = bp)
top.pbmc <- scran::getTopHVGs(dec.pbmc, prop = 0.1)
plot(dec.pbmc$mean, dec.pbmc$total, pch=16, cex=0.5,
    xlab="Mean of log-expression", ylab="Variance of log-expression")
curfit <- metadata(dec.pbmc)
curve(curfit$trend(x), col = 'dodgerblue', add = TRUE, lwd = 2)

Denoise log-expression data by removing principal components corresponding to technical noise (might take a while):

sce <- scran::denoisePCA(sce, subset.row = top.pbmc,
                         technical = dec.pbmc, BPPARAM = bp)
ncol(reducedDim(sce, "PCA"))
plot(attr(reducedDim(sce), "percentVar"), xlab = "PC",
     ylab = "Proportion of variance explained")
abline(v = ncol(reducedDim(sce, "PCA")), lty = 2, col = "red")

tSNE:

sce <- scater::runTSNE(sce, dimred = "PCA", perplexity = 30, BPPARAM = bp)
sce$sizeFactor <- sizeFactors(sce)
scater::plotTSNE(sce, colour_by = "sizeFactor")

Clustering

Clustering with graph-based methods:

snn.gr <- scran::buildSNNGraph(sce, use.dimred = "PCA", BPPARAM = bp, k = 25)
clusters <- igraph::cluster_walktrap(snn.gr)
sce$Cluster <- factor(clusters$membership)
table(sce$Cluster)
cluster.mod <- scran::clusterModularity(snn.gr, sce$Cluster,
                                        get.weights = TRUE)
log.ratio <- log2(cluster.mod$observed / cluster.mod$expected + 1)

ComplexHeatmap::Heatmap(log.ratio,
                        cluster_rows = FALSE, cluster_columns = FALSE)
scater::plotTSNE(sce, colour_by = "Cluster")

Annotation (cell type and consensus subtype)

SingleR cell type annotation

Transfer cell type annotation from reference profiles in the Human Primary Cell Atlas and ENCODE:

hpc <- transferCellType(sce, "hpca")
encode <- transferCellType(sce, "encode")

sce$hpca.celltype <- hpc$labels 
sce$encode.celltype <- encode$labels

maxScore <- function(res)
    vapply(1:nrow(res), function(i) res$scores[i, res[i,"labels"]], numeric(1))

sce$hpca.celltype.score <- maxScore(hpc)
sce$encode.celltype.score <- maxScore(hpc)

Display main cell types present:

plotMainCellTypes(sce, "hpca.celltype")
plotMainCellTypes(sce, "encode.celltype")

Display ambiguity of cell type assignments:

SingleR::plotScoreHeatmap(hpc)
SingleR::plotScoreHeatmap(encode)

Marker genes

Adopted from Shih et al., PLoS One (2018):

epithelial <- c("EPCAM", "KRT8", "KRT18", "KRT19")
lymphocyte <- c("PTPRC", "CD3E", "CD19", "MS4A1")
endothelial <- c("PECAM1", "CD34")
fibroblast <- c("ACTA2", "DCN", "ACTB")
stromal <- c("THY1", "ENG", "VIM", "CD44")

origin <- c("PAX8", "CALB2", "COL11A1")
scater::plotTSNE(sce, colour_by = epithelial[1])

consensusOV subtype annotation

sce.entrez <- EnrichmentBrowser::idMap(sce, org = "hsa", 
                                       from = "SYMBOL", to = "ENTREZID")
am <- as.matrix(assay(sce.entrez, "logcounts"))
cst <- consensusOV::get.consensus.subtypes(am, names(sce.entrez))

sts <- sub("_consensus$", "", as.vector(cst$consensusOV.subtypes))
sce$subtype <- sts
sce$margin <- consensusOV::margin(cst$rf.probs)

summary(sce$margin)
scater::plotTSNE(sce, colour_by = "subtype")
scater::plotTSNE(sce, colour_by = "margin")

Visualization of results (all tumors)

Read the pre-processed 10X data for all tumors as list of SingleCellExperiments.

tumors <- c(59, 76, 77, 89, 90) 
tags <- c("cu0pxp0sepsvhos", "taff3g6nlmf5yk4", "2otdxfunxnlr6ql",
          "sojupsrjxnvnf6d", "ydokzcxgkugo678")
durl <- "https://dl.dropboxusercontent.com/s/tag/sampleXX_sce.rds"
readSCE <- function(i)
{ 
    dl <- sub("XX", tumors[i], durl)
    dl <- sub("tag", tags[i], dl)
    readRDS(file(dl))
}
sces <- lapply(1:5, readSCE)
names(sces) <- paste0("T", tumors)
sces

Summarize cell type annotation:

sces <- lapply(sces, annoCellType)
vapply(sces, function(sce) table(sce$celltype), integer(5))
vapply(sces, function(sce) table(sce$subtype), integer(4))

Display cell type and subtype annotation for one tumor at a time:

p2a <- plotType(sces[[1]])
p2b <- plotType(sces[[1]], type = "subtype")
p2 <- ggarrange(p2b + theme_bw() + theme(plot.margin = margin(r = 1)), 
                p2a + theme_bw() + theme(axis.text.y = element_blank(), 
                                         axis.ticks.y = element_blank(), 
                                         axis.title.y = element_blank(),
                                         plot.margin = margin(l = 1, r=1)), 
                legend = "top", align = "h", labels = c("A", "B"))
p2

Facet cell type and subtype visualization for all tumors:

stp <- facetTumors(sces, col = "subtype", pal = stcols)
pal <- ggpubr::get_palette("npg", 5)
names(pal) <- CELL.TYPES
ctp <- facetTumors(sces, col = "celltype", pal = pal) 
p2 <- ggpubr::ggarrange(stp + theme(axis.text.x = element_blank(),
                                    axis.title.x = element_blank()), 
                        ctp, nrow = 2, align = "v", labels = c("A", "B"),
                        heights = c(1, 1.2))

Barplot percentage of subtype / cell type for all 10X tumors:

cttab <- vapply(sces, getPerc, numeric(5), perc=FALSE)
total <- colSums(cttab)
prop.test(cttab["EPI",], total)
prop.test(cttab["STROM",], total)
stbp <- bpType(sces, "subtype", pal = stcols)
ctbp <- bpType(sces, "celltype", pal = pal)

Cell type vs subtype matrix:

mat <- getCelltypeSubtypeMatrix(sces)
ctstbp <- bpCrossType(mat, pal)
ctstbp

Or as matrix plot:

mat <- round(mat / sum(mat) * 100)
mat <- mat[,CELL.TYPES]
matrixPlot(mat, cnames = c("SUBTYPE", "CELL.TYPE"), high.color = "darkgreen")
p1 <- ggarrange(stbp,
                ctbp + theme(axis.text.y = element_blank(),
                             axis.title.y = element_blank()),
                             # axis.ticks.x = element_blank(),
                             # axis.line.x = element_blank()), 
                ctstbp + theme(axis.text.y = element_blank(),
                             axis.title.y = element_blank()),
                nrow = 1, hjust = c(-0.5, 0.5, 0.5),
                widths = c(1.3,1,1),
                legend = "none", align = "h", labels = c("C", "D", "E"))
p1

Margin scores:

spl.margin <- function(sce, col = "celltype")
    reshape2::melt(split(sce$margin, sce[[col]]))
psces <- lapply(sces, spl.margin)
for(i in tumors) psces[[match(i, tumors)]]$tumor <- paste0("T", i)
df <- do.call(rbind, psces)
colnames(df) <- c("MARGIN", "CELL.TYPE", "TUMOR")
df[[2]] <- factor(df[[2]], levels = CELL.TYPES)

Boxplot of margin scores:

ggboxplot(df, x = "CELL.TYPE", y = "MARGIN", width = 0.8, notch = TRUE,
            fill = "CELL.TYPE", palette = get_palette("lancet", 5),
            facet.by = "TUMOR", nrow = 1,  
            x.text.angle = 45, legend="none", ggtheme = theme_bw(base_size = 12)) 

Only epithelial cells:

df <- subset(df, CELL.TYPE == "EPI")
bp <- ggboxplot(df, x = "TUMOR", y = "MARGIN", width = 0.8, notch = TRUE,
                fill = "lightgrey", #cb.red,
                legend="none", ggtheme = theme_bw(), ylab = "MARGIN (EPI)",
                font.x = 11, font.y = 11, font.tickslab = c(10, "plain", "black"))
                # ylim = c(0,1),x.text.angle = 45, 
df.points <- data.frame(x=1:5, y=c(0.376, 0.766, 0.182, 0.646, 0.214))
bp <- bp + geom_point(data = df.points, aes(x = x, y = y), shape = 8, size = 3) +
           geom_text(data = df.points[2,], aes(x = x, y = y),
                     label = "Bulk", hjust = 0, nudge_x = 0.1, size = 4)
bp

Bulk RNA-seq

durl <- "https://dl.dropboxusercontent.com/s/lit4wok327u1mps"
dfile <- file.path(durl, "Bulk_RNAseq_March2020.txt")
dat <- read.delim(file(dfile), as.is=TRUE)
cpm.ind <- seq_len(which(dat[,1] == "")[1] - 1)
cpm.dat <- dat[cpm.ind, 1:8]
raw.dat <- dat[,9:16]
cpm.sym <- cpm.dat[,1]
raw.sym <- raw.dat[,1]

Use pre-computed library size-normalized expression values for subtype assignment:

rownames(cpm.dat) <- cpm.dat[,2] 
cpm.dat <- cpm.dat[,3:8]
colnames(cpm.dat) <- paste0("T", c(59, 77, 76, 89, 90, 91)) 
cpm.dat <- as.matrix(cpm.dat)
mode(cpm.dat) <- "numeric"
cpm.se <- SummarizedExperiment(assays = list(cpm = cpm.dat))
cpm.se <- EnrichmentBrowser::idMap(cpm.se, org="hsa", from="ENSEMBL", to="ENTREZID")
cpm.file <- file.path(data.dir, "bulk_cpm_subtypes.rds")
cst <- consensusOV::get.consensus.subtypes(assay(cpm.se), names(cpm.se))
saveRDS(cst, file = cpm.file)
cst <- readRDS(cpm.file)
cst

Sanity check: start from raw read counts

rownames(raw.dat) <- raw.dat[,2]
raw.dat <- raw.dat[,3:8]
colnames(raw.dat) <- paste0("T", c(59, 77, 76, 89, 90, 91))
raw.dat <- as.matrix(raw.dat)
mode(raw.dat) <- "numeric"
raw.dat <- edgeR::cpm(raw.dat, log=TRUE)
raw.se <- SummarizedExperiment(assays = list(raw = raw.dat))
raw.se <- EnrichmentBrowser::idMap(raw.se, org="hsa", from="ENSEMBL", to="ENTREZID")
raw.file <- file.path(data.dir, "bulk_raw_subtypes.rds")
cst2 <- consensusOV::get.consensus.subtypes(assay(raw.se), names(raw.se))
saveRDS(cst2, file = raw.file)

Pretty much the same as when starting from already normalized expression values:

cst2 <- readRDS(raw.file)
cst2

Contrast bulk and single-cell margins:

mat <- t(cst$rf.probs)
rownames(mat) <- sub("_consensus", "", rownames(mat))
mat <- round(mat, digits = 2)
mat <- mat[c(3,4,1,2),c(1,3,2,4)]
mat <- rbind(c(0.15, 0.02, 0.5, 0.13, 0.12),
             c(0.59, 0.03, 0.14, 0.77, 0.07),
             c(0.22, 0.85, 0.03, 0.08, 0.3),
             c(0.04, 0.09, 0.32, 0.02, 0.52))
colnames(mat) <- paste0("T", tumors)
rownames(mat) <- SUBTYPES
mat <- mat[4:1,]
mp <- matrixPlot(t(mat), cnames = c("TUMORS", "CLASS.PROB"),
                    bw.thresh = 0.5, gthresh = 0.2)
mp <- mp + theme(axis.text.x = element_blank(),
                 axis.title.x = element_blank(),
                 axis.ticks.x = element_blank(),
                 axis.line.x = element_blank(),
                 plot.margin = margin(b = 0))
mp <- mp + ylab("BULK")
p0 <- ggarrange(mp, bp, vjust = 0.5, hjust = 0.5,
                nrow = 2, align = "v", heights = c(1,3), labels = c("F", ""))
p0
p3 <- ggarrange(p1, p0, nrow = 1, widths = c(2,1))
ggarrange(p2, p3, heights = c(2,1), nrow = 2)

InferCNV

Prepare input files and execute InferCNV

Prepare input files from a SingleCellExperiment

obj <- generateInferCNVInput(sces[[1]], "tumor59")

Run inferCNV for one tumor at a time (computational intensive, several hours). Best executed on a cluster (here with 20 cores).

# cutoff = 1 works well for Smart-seq2, 
# cutoff = 0.1 works well for 10x Genomics
res <- infercnv::run(obj,
                     cutoff = 0.1, 
                     out_dir = "tumor59",
                     num_threads = 20,
                     denoise = TRUE,
                     HMM = TRUE)

Cross-tumor heatmap

for(i in seq_along(sces)) colnames(sces[[i]]) <- sces[[i]]$Barcode

Read observation matrix (expression profiles of cancer epithelial cells)

tags <- c("p7r313prargcp3g", "6yunw77zfe4nzgw", "qaop7h1sh2bxm5g",
          "x469lcpm3rp5dxo", "m56zm2zywjcongz")
durl <- "https://dl.dropboxusercontent.com/s/tag/TXX_infercnv.observations.rds"
readObsMat <- function(i)
{ 
    dl <- sub("XX", tumors[i], durl)
    dl <- sub("tag", tags[i], dl)
    readRDS(file(dl))
}
obs.mats <- lapply(1:5, readObsMat)
names(obs.mats) <- paste0("T", tumors)

Subset and annotate cells:

for(i in seq_along(tumors)) 
    obs.mats[[i]] <- subsetObservations(obs.mats[[i]], sces[[i]])
ra <- getCellAnnotation(sces, obs.mats)

Create union of genes featured in the inferCNV output of each tumor, extend individual observations as needed, and create a combined observation matrix storing the information across all tumors:

un <- Reduce(union, lapply(obs.mats, rownames))
obs.mats <- lapply(obs.mats, extendObservations, un = un) 
obs.mat <- do.call(cbind, lapply(obs.mats, function(o) o[un,]))

Obtain genomic coordinates for the genes in the combined observation matrix:

egenes <- symbols2ranges(rownames(obs.mat))
obs.mat <- obs.mat[names(egenes),]
chr <- as.character(seqnames(egenes))

Highlight specific recurrent regions with known driver genes from the TCGA OVC paper:

clabs <- getRecurrentDriverGenes(rownames(obs.mat))

Plot the heatmap:

im <- ComplexHeatmap::Heatmap(t(obs.mat),
    #top_annotation = ta,
    right_annotation = ra,
    name = "Expression",
    col = circlize::colorRamp2(#c(0.895, 1.105), c("white", "black")),
                                c(0.895, 1, 1.105), c("blue", "white", "red")),
    row_title_rot = 0, column_title_rot = 90,
    show_row_names = FALSE, show_column_names = TRUE,
    cluster_rows = FALSE, cluster_columns = FALSE,
    row_split = rep(paste0("T", tumors), sapply(obs.mats, ncol)),
    column_spl = factor(chr, levels = unique(chr)),
    column_gap = unit(0, "mm"),
    column_title_gp = gpar(fontsize = 6),
    column_labels = clabs,
    column_names_gp = gpar(fontsize = 6),
    border = TRUE)
im <- grid.grabExpr(draw(im))
im

Cell Cycle

Cyclins

Tumor 59

for(i in seq_along(sces)) colnames(sces[[i]]) <- NULL
sce59 <- subsetByCellType(sces[[1]], cell.type = "EPI", subtype = c("DIF", "PRO"))
sce59
cyclin.genes <- grep("^CCN[ABDE][0-9]$", names(sce59), value = TRUE)
cyclin.genes
markerHeatmap(sce59, cyclin.genes)
bpCyclins(sce59)

Tumor 77

sce77 <- subsetByCellType(sces[[3]], cell.type = "EPI", subtype = c("DIF", "PRO"))
bpCyclins(sce77)

Tumor 90

sce90 <- subsetByCellType(sces[[5]], cell.type = "EPI", subtype = c("DIF", "PRO"))
bpCyclins(sce90)

Marker genes

Tumor 59

markers59 <- scran::findMarkers(sce59, groups = sce59$subtype)
head(markers59$DIF)
markers59$DIF["CCND1",]
head(markers59$DIF[markers59$DIF$logFC.PRO < 0,])
markerHeatmap(sce59, rownames(markers59$DIF)[1:25])

Contrast with bulk markers

data.dir <- system.file("extdata", package = "subtypeHeterogeneity")
verhaak.file <- file.path(data.dir, "Verhaak_supplementT8A.txt")
bulk.markers <- getExtendedVerhaakSignature(verhaak.file, 
                                            nr.genes.per.subtype = 200,
                                            by.subtype = TRUE)
bmarkers59 <- compileMarkers(markers59, bulk.markers)
bmarkers59
markerHeatmap(sce59, rownames(bmarkers59), row.split = TRUE)

Tumor 77

markers77 <- scran::findMarkers(sce77, groups = sce77$subtype)
markerHeatmap(sce77, rownames(markers77$DIF)[1:25])
bmarkers77 <- compileMarkers(markers77, bulk.markers)
markerHeatmap(sce77, rownames(bmarkers77), row.split = TRUE)

Tumor 90

markers90 <- scran::findMarkers(sce90, groups = sce90$subtype)
markerHeatmap(sce90, rownames(markers90$DIF)[1:25])
bmarkers90 <- compileMarkers(markers90, bulk.markers)
markerHeatmap(sce90, rownames(bmarkers90), row.split = TRUE)

Combined

markers <- list(markers59, markers77, markers90)
dif.markers <- getCommonMarkers(markers, n = 10)
pro.markers <- getCommonMarkers(markers, subtype = "PRO", n = 10)
markers <- c(dif.markers, pro.markers)

capture the heatmap

ml <- markerHeatmapList(sces[c(1,3,5)], markers, row.split = TRUE)
m <- grid.grabExpr(draw(ml))

Cyclone

Tumor 59

Run the cyclone classifier (pre-computed, computationally intensive)

sce.ensembl <- EnrichmentBrowser::idMap(sce, org = "hsa", 
                                        from = "SYMBOL", to = "ENSEMBL")
marker.file <- system.file("extdata", "human_cycle_markers.rds", package="scran")
hm.pairs <- readRDS(marker.file)
cy.assignments <- scran::cyclone(sce.ensembl, hm.pairs)
cy.file <- file.path(data.dir, "T59_cyclone.rds")
saveRDS(cy.assignments, file = cy.file)

Get the computed assignments and plot:

cy.file <- file.path(data.dir, "T59_cyclone.rds")
cy.assignments <- readRDS(cy.file)
plot(cy.assignments$score$G1, cy.assignments$score$G2M,
     xlab = "G1 score", ylab = "G2/M score", pch = 16)
df <- data.frame(cy.assignments$score, subtype = sce59$subtype)
ggscatter(df, x = "G1", y = "G2M", color = "subtype",
          palette = stcols[c("DIF", "PRO")], ggtheme = ggplot2::theme_bw())
tab <- table(cy.assignments$phases, sce59$subtype)
tab
chisq.test(tab)

Figure 5:

bc <- bpCyclins(sces[[1]], top.only = TRUE) + ylim(c(0,4.3))
bc <- bc + geom_signif(annotations = "1.8e-07", 
                  y_position = 4.25, xmin = 4.8, xmax = 5.2) 
gs <- ggscatter(df, x = "G1", y = "G2M", 
                color = "subtype", palette = stcols[c("DIF", "PRO")], 
                ggtheme = ggplot2::theme_bw(), legend = "none")
bc <- ggarrange(bc, gs, ncol = 2, labels = c("B", "C"))
bcd <- ggarrange(bc, m, nrow = 2, labels = c("", "D"))
bcd
ggarrange(im, bcd, ncol = 2, labels = c("A",""))

Tumor 77

cy.file <- sub("59", "77", cy.file)
cy.assignments <- readRDS(cy.file)
df <- data.frame(cy.assignments$score, subtype = sce77$subtype)
ggscatter(df, x = "G1", y = "G2M", color = "subtype",
          palette = stcols[c("DIF", "PRO")], ggtheme = ggplot2::theme_bw())
tab <- table(cy.assignments$phases, sce77$subtype)
tab
chisq.test(tab)

Tumor 90

cy.file <- sub("77", "90", cy.file)
cy.assignments <- readRDS(cy.file)
df <- data.frame(cy.assignments$score, subtype = sce90$subtype)
ggscatter(df, x = "G1", y = "G2M", color = "subtype",
          palette = stcols[c("DIF", "PRO")], ggtheme = ggplot2::theme_bw())
tab <- table(cy.assignments$phases, sce90$subtype)
tab
fisher.test(tab)

Session Info

sessionInfo()


waldronlab/subtypeHeterogeneity documentation built on Oct. 28, 2020, 9:14 a.m.