utils::globalVariables(
names = c('x', 'y', 'lsr'),
package = 'CellMembrane',
add = TRUE
)
#' @title Wrapper around Seurat::CreateSeuratObject
#'
#' @description Create Seurat object from count data (usually from Read10X()). This also sets pct.mito.
#' @param seuratData Seurat input data, usually from Read10X().
#' @param datasetId This will be used as a prefix for barcodes, and stored in metadata. Also used as the project name for the Seurat object.
#' @param datasetName An optional print-friendly name that will be stored in metadata
#' @param minFeatures Include cells where at least this many features are detected.
#' @param minCells Include features detected in at least this many cells.
#' @param mitoGenesPattern The expression to use when identifying mitochondrial genes
#' @param annotateMitoFromReferenceIfNoHitsFound If true, a list of mitochondrial genes, taken from (https://en.wikipedia.org/wiki/Category:Human_mitochondrial_genes) will be used to calculate p.mito
#' @return A Seurat object with p.mito calculated.
#' @export
#' @importFrom Matrix colSums
CreateSeuratObj <- function(seuratData, datasetId, datasetName = NULL, minFeatures = 25, minCells = 0, mitoGenesPattern = "^MT-", annotateMitoFromReferenceIfNoHitsFound = TRUE){
seuratObj <- Seurat::CreateSeuratObject(counts = seuratData, min.cells = minCells, min.features = minFeatures, project = datasetId)
seuratObj <- .PossiblyAddBarcodePrefix(seuratObj, datasetId = datasetId, datasetName = datasetName)
seuratObj<- CalculatePercentMito(seuratObj, mitoGenesPattern = mitoGenesPattern, annotateMitoFromReferenceIfNoHitsFound = annotateMitoFromReferenceIfNoHitsFound)
return(seuratObj)
}
#' @title Calculate Mitochrondrial Percentage
#'
#' @description This will identify mitochrondrial genes and calculate p.mito for each cell
#' @param seuratObj The seurat object
#' @param mitoGenesPattern The expression to use when identifying mitochondrial genes
#' @param annotateMitoFromReferenceIfNoHitsFound If true, a list of mitochondrial genes, taken from (https://www.genedx.com/wp-content/uploads/crm_docs/Mito-Gene-List.pdf) will be used to calculate p.mito
#' @param outputColName The name of the output column to hold p.mito
#' @return A Seurat object with p.mito calculated.
#' @export
CalculatePercentMito <- function(seuratObj, mitoGenesPattern = "^MT-", annotateMitoFromReferenceIfNoHitsFound = TRUE, outputColName = 'p.mito') {
mito.features <- grep(pattern = mitoGenesPattern, x = rownames(x = seuratObj), value = TRUE)
if (length(mito.features) == 0 && annotateMitoFromReferenceIfNoHitsFound) {
print('There were no genes matching mitoGenesPattern, so attempting to identify MT genes using name')
mito.features <- c('ATP6','ATP8','COX1','COX2','COX3','CYTB','ND1','ND2','ND3','ND4','ND4L','ND5','ND6')
mito.features.prefix <- c('MT-', mito.features)
i1 <- length(intersect(mito.features, rownames(seuratObj)))
i2 <- length(intersect(mito.features.prefix, rownames(seuratObj)))
if (i2 > 0 && i2 > i1) {
print('Selecting using reference gene set with MT- prefix')
mito.features <- mito.features.prefix
}
mito.features <- intersect(mito.features, rownames(seuratObj))
}
print(paste0('Total mito features: ', length(mito.features)))
mito.features <- intersect(mito.features, rownames(seuratObj))
print(paste0('Total intersecting with seurat rownames (total: ', length(rownames(seuratObj)),'): ', length(mito.features)))
if (all(is.null(mito.features)) || length(mito.features) == 0) {
print('No mito features found')
seuratObj[[outputColName]] <- 0
} else {
p.mito <- Matrix::colSums(x = GetAssayData(object = seuratObj, layer = 'counts')[mito.features, ]) / Matrix::colSums(x = GetAssayData(object = seuratObj, layer = 'counts'))
seuratObj[[outputColName]] <- p.mito
}
return(seuratObj)
}
#' @importFrom Matrix colSums
.PrintQcPlots <- function(seuratObj) {
if ('p.mito' %in% colnames(seuratObj@meta.data)) {
totalPMito <- length(unique(seuratObj$p.mito))
} else {
totalPMito <- -1
}
assayName <- Seurat::DefaultAssay(seuratObj)
nCountField <- paste0('nCount_', assayName)
nFeatureField <- paste0('nFeature_', assayName)
feats <- c(nFeatureField, nCountField)
if (totalPMito > 1) {
feats <- c(feats, "p.mito")
}
suppressWarnings(print(VlnPlot(object = seuratObj, features = feats, ncol = length(feats))))
if (totalPMito > 1) {
print(FeatureScatter(object = seuratObj, feature1 = nCountField, feature2 = "p.mito"))
} else {
print("p.mito absent or identical across all cells, will not plot")
}
print(FeatureScatter(object = seuratObj, feature1 = nCountField, feature2 = nFeatureField))
#10x-like plot
nUMI <- Matrix::colSums(suppressWarnings(GetAssayData(object = seuratObj, layer = "counts")))
nUMI <- sort(nUMI)
countAbove <-sapply(nUMI, function(x){
sum(nUMI >= x)
})
print(ggplot(data.frame(x = log10(countAbove), y = log(nUMI)), aes(x = x, y = y)) +
geom_point() + ylab("UMI/Cell") + xlab("log10(# Cells)") +
egg::theme_presentation()
)
}
#' @title PerformEmptyDropletFiltering
#'
#' @param seuratRawData Raw data
#' @param fdrThreshold FDR threshold, passed directly to PerformEmptyDrops()
#' @param emptyDropNIters Number of iterations, passed directly to PerformEmptyDrops()
#' @param emptyDropsLower Passed directly to emptyDrops(). The lower bound on the total UMI count, at or below which all barcodes are assumed to correspond to empty droplets.
#' @param useEmptyDropsCellRanger If TRUE, will use DropletUtils emptyDropsCellRanger instead of emptyDrops
#' @param nExpectedCells Only applied if emptyDropsCellRanger is selected. Passed to n.expected.cells argument
#' @return Plot
#' @importFrom DropletUtils barcodeRanks
PerformEmptyDropletFiltering <- function(seuratRawData, fdrThreshold=0.001, emptyDropNIters=10000, emptyDropsLower=200, useEmptyDropsCellRanger = FALSE, nExpectedCells = 8000) {
br.out <- DropletUtils::barcodeRanks(seuratRawData)
plot(br.out$rank, br.out$total+1, log="xy", xlab="Rank", ylab="Total")
o <- order(br.out$rank)
lines(br.out$rank[o], br.out$fitted[o], col="red")
abline(h=br.out@metadata$knee, col="dodgerblue", lty=2)
abline(h=br.out@metadata$inflection, col="forestgreen", lty=2)
legend("bottomleft", lty=2, col=c("dodgerblue", "forestgreen"), legend=c("knee", "inflection"))
print(paste0('Knee: ', br.out@metadata$knee))
print(paste0('Inflection: ', br.out@metadata$inflection))
e.out <- PerformEmptyDrops(seuratRawData, emptyDropNIters = emptyDropNIters, fdrThreshold = fdrThreshold, emptyDropsLower = emptyDropsLower, useEmptyDropsCellRanger = useEmptyDropsCellRanger, nExpectedCells = nExpectedCells)
toPlot <- e.out[is.finite(e.out$LogProb),]
if (nrow(toPlot) > 0) {
plot(toPlot$Total, -toPlot$LogProb, col=ifelse(toPlot$is.cell, "red", "black"), log = "x", xlab="log(Total UMI count)", ylab="-Log Probability")
} else {
print('Probabilities all -Inf, unable to plot')
}
if (nrow(toPlot) != nrow(e.out)) {
print(paste0('Total rows with non-finite probabilities: ', (nrow(e.out) - nrow(toPlot))))
}
print(paste0('Min UMI count in a droplet called a cell: ', min(e.out$Total[e.out$is.cell])))
print(paste0('Max UMI count in a droplet not called a cell: ', max(e.out$Total[!e.out$is.cell])))
passingCells <- rownames(e.out)[e.out$is.cell]
return(seuratRawData[,passingCells])
}
PerformEmptyDrops <- function(seuratRawData, emptyDropNIters, fdrThreshold=0.001, emptyDropsLower = 100, useEmptyDropsCellRanger = FALSE, nExpectedCells = 8000, seed = GetSeed()){
print(paste0('Performing ', ifelse(useEmptyDropsCellRanger, yes = 'emptyDropsCellRanger', no = 'emptyDrops'), ' with ', emptyDropNIters, ' iterations'))
if (!is.null(seed)) {
set.seed(seed)
}
if (useEmptyDropsCellRanger) {
e.out <- DropletUtils::emptyDropsCellRanger(seuratRawData, niters = emptyDropNIters, n.expected.cells = nExpectedCells)
} else {
e.out <- DropletUtils::emptyDrops(seuratRawData, niters = emptyDropNIters, lower = emptyDropsLower)
}
print(paste0('Input cells: ', nrow(e.out)))
badRows <- !is.finite(e.out$FDR)
if (sum(badRows) > 0) {
print(paste0('Cells with non-finite FDR: ', sum(badRows)))
e.out <- e.out[!badRows,]
}
e.out$is.cell <- e.out$FDR <= fdrThreshold
print(paste0('Cells passing FDR (', fdrThreshold, '): ', sum(e.out$is.cell, na.rm=TRUE)))
print(paste0('Cells failing FDR: ', sum(!e.out$is.cell, na.rm=TRUE)))
totalLimited <- 0
if (!useEmptyDropsCellRanger) {
#If there are any entries with FDR above the desired threshold and Limited==TRUE, it indicates that npts should be increased in the emptyDrops call.
print(table(Limited=e.out$Limited, Significant=e.out$is.cell))
totalLimited <- sum(e.out$Limited == T & e.out$Significant == F)
}
if (totalLimited == 0){
return(e.out)
} else {
print('Repeating emptyDrops with more iterations')
return(PerformEmptyDrops(seuratRawData, emptyDropNIters = emptyDropNIters * 2, fdrThreshold = fdrThreshold, emptyDropsLower = emptyDropsLower, seed = seed, useEmptyDropsCellRanger = useEmptyDropsCellRanger, nExpectedCells = nExpectedCells))
}
}
.DoMergeSimple <- function(seuratObjs, projectName, merge.data = FALSE, expectedDefaultAssay = NULL, doGC = FALSE){
seuratObj <- NULL
for (datasetId in names(seuratObjs)) {
if (is.null(seuratObj)) {
seuratObj <- .MergeSplitLayersIfNeeded(seuratObjs[[datasetId]])
if (!is.null(expectedDefaultAssay)) {
DefaultAssay(seuratObj) <- expectedDefaultAssay
}
} else {
assayName <- DefaultAssay(seuratObj)
DefaultAssay(seuratObjs[[datasetId]]) <- assayName
assayObj <- GetAssay(seuratObjs[[datasetId]], assay = assayName)
hasGeneId <- 'GeneId' %in% names(slot(assayObj, GetAssayMetadataSlotName(assayObj)))
if (any(rownames(seuratObj[[assayName]]) != rownames(seuratObjs[[datasetId]][[assayName]]))) {
missing <- rownames(seuratObj[[assayName]])[!(rownames(seuratObj[[assayName]]) %in% rownames(seuratObjs[[datasetId]][[assayName]]))]
missing <- c(missing, rownames(seuratObjs[[datasetId]][[assayName]])[!(rownames(seuratObjs[[datasetId]][[assayName]]) %in% rownames(seuratObj[[assayName]]))])
if (length(missing) > 0) {
stop(paste0('Gene names are not equal! Missing: ', paste0(missing, collapse = ',')))
} else {
sel <- rownames(seuratObj[[assayName]]) != rownames(seuratObjs[[datasetId]][[assayName]])
genes1 <- rownames(seuratObj[[assayName]])[sel]
genes2 <- rownames(seuratObjs[[datasetId]][[assayName]])[sel]
message(paste0('Gene names not identical between objects using assay: ', assayName, '. Problem genes:'))
message(paste0(genes1, sep =', ', collapse = ','))
message(paste0(genes2, sep =', ', collapse = ','))
}
}
if (hasGeneId) {
assayObj <- Seurat::GetAssay(seuratObj, assay = assayName)
# This can occur if the first object lacks GeneId but the second has it.
if (!'GeneId' %in% names(slot(assayObj, GetAssayMetadataSlotName(assayObj)))) {
message('GeneId was present in the second merged object, but not the first. Adding NAs')
slot(assayObj, GetAssayMetadataSlotName(assayObj))$GeneId <- NA
}
geneIds1 <- slot(assayObj, GetAssayMetadataSlotName(assayObj))$GeneId
geneIds2 <- slot(assayObj, GetAssayMetadataSlotName(assayObj))$GeneId
names(geneIds1) <- rownames(seuratObj[[assayName]])
names(geneIds2) <- rownames(seuratObjs[[datasetId]][[assayName]])
}
# NOTE: if collapse = TRUE is ever supported, we should use this.
seuratObj <- merge(x = seuratObj, y = .MergeSplitLayersIfNeeded(seuratObjs[[datasetId]]), project = projectName, merge.data = merge.data)
seuratObj <- .MergeSplitLayersIfNeeded(seuratObj)
seuratObjs[[datasetId]] <- NULL
if (doGC) {
gc()
}
if (hasGeneId) {
if (any(is.na(geneIds1)) & !any(is.na(geneIds2))) {
if (length(geneIds2) != nrow(seuratObj[[assayName]])) {
stop(paste0('Adding geneIds2 of length that differs from assay: ', length(geneIds2), ' vs. ', nrow(seuratObj[[assayName]])))
}
names(geneIds2) <- rownames(seuratObj[[assayName]])
seuratObj[[assayName]] <- AddMetaData(seuratObj[[assayName]], metadata = geneIds2, col.name = 'GeneId')
} else if (!any(is.na(geneIds1)) & any(is.na(geneIds2))) {
if (length(geneIds1) != nrow(seuratObj[[assayName]])) {
stop(paste0('Adding geneIds1 of length that differs from assay: ', length(geneIds1), ' vs. ', nrow(seuratObj[[assayName]])))
}
names(geneIds1) <- rownames(seuratObj[[assayName]])
seuratObj[[assayName]] <- AddMetaData(seuratObj[[assayName]], metadata = geneIds1, col.name = 'GeneId')
} else if (!any(is.na(geneIds1)) & !any(is.na(geneIds2))) {
if (any(geneIds1 != geneIds2)) {
stop('Gene IDs did not match between seurat objects!')
}
names(geneIds1) <- rownames(seuratObj[[assayName]])
seuratObj[[assayName]] <- AddMetaData(seuratObj[[assayName]], metadata = geneIds1, col.name = 'GeneId')
} else {
names(geneIds1) <- rownames(seuratObj[[assayName]])
seuratObj[[assayName]] <- AddMetaData(seuratObj[[assayName]], metadata = geneIds1, col.name = 'GeneId')
}
}
}
}
seuratObj <- .MergeSplitLayersIfNeeded(seuratObj)
print(paste0('Merge complete, layers:'))
for (assayName in names(seuratObj@assays)) {
print(paste0(assayName, ': ', paste0(SeuratObject::Layers(seuratObj, assay = assayName), collapse = ', ')))
}
return(seuratObj)
}
.MergeSplitLayersIfNeeded <- function(seuratObj) {
if (HasSplitLayers(seuratObj)) {
return(MergeSplitLayers(seuratObj))
}
return(seuratObj)
}
#' @title HasSplitLayers
#'
#' @param seuratObj The seurat object
#' @param assaysToTest An option list of assays to test
#' @return A boolean indicating whether the object has split layers
#' @export
HasSplitLayers <- function(seuratObj, assaysToTest = NULL) {
if (all(is.null(assaysToTest))) {
assaysToTest <- Seurat::Assays(seuratObj)
}
for (assayName in assaysToTest) {
if (length(suppressWarnings(SeuratObject::Layers(seuratObj, assay = assayName, search = 'counts'))) > 1) {
return(TRUE)
}
if (length(suppressWarnings(SeuratObject::Layers(seuratObj, assay = assayName, search = 'data'))) > 1) {
return(TRUE)
}
}
return(FALSE)
}
#' @title MergeSplitLayers
#'
#' @param seuratObj The seurat object
#' @return The updated seurat object
#' @export
MergeSplitLayers <- function(seuratObj) {
# NOTE: in Seurat 5.x, the default is to rename layers (i.e. counts.1 and counts.2). Collapse=TRUE avoids this, but this is not supported yet
for (assayName in Seurat::Assays(seuratObj)) {
print(paste0('Inspecting assay layers: ', assayName))
print(paste0('Layers: ', paste0(SeuratObject::Layers(seuratObj[[assayName]]), collapse = ',')))
print(paste0('Class: ', class(seuratObj[[assayName]])[1]))
if (inherits(seuratObj[[assayName]], 'Assay5')) {
toJoin <- .FindLayersToJoin(seuratObj, assayName)
print(paste0('Joining layers: ', assayName, ', ', paste0(toJoin, collapse = ';')))
seuratObj[[assayName]] <- SeuratObject::JoinLayers(seuratObj[[assayName]], layers = toJoin)
print(paste0('After join: ', paste0(SeuratObject::Layers(seuratObj[[assayName]]), collapse = ',')))
} else {
print(paste0('Not an assay5 object, not joining layers: ', assayName))
print(seuratObj)
}
if (HasSplitLayers(seuratObj, assaysToTest = assayName)) {
stop(paste0('Layers were not joined: ', assayName, ', ', paste0(SeuratObject::Layers(seuratObj[[assayName]]), collapse = ',')))
}
}
if (HasSplitLayers(seuratObj)) {
stop(paste0('This seurat object has split layers'))
}
return(seuratObj)
}
.RunSoupX <- function(outsDir) {
sc <- SoupX::load10X(outsDir)
sc <- SoupX::autoEstCont(sc, doPlot = T)
seuratRawData <- SoupX::adjustCounts(sc)
print(paste0('Passing cells: ', ncol(seuratRawData)))
print(SoupX::plotMarkerMap(sc, geneSet = sc$fit$markersUsed$gene))
print(SoupX::plotChangeMap(sc, cleanedMatrix = seuratRawData, geneSet = sc$fit$markersUsed$gene))
# Drop suffixes:
colnames(seuratRawData) <- sapply(colnames(seuratRawData), function(x){
return(unlist(strsplit(x, split = '-'))[1])
})
return(seuratRawData)
}
.InferMatrixDir <- function(dataDir) {
matrixFile <- paste0(dataDir, 'matrix.mtx')
matrixFileGz <- paste0(dataDir, 'matrix.mtx.gz')
if (file.exists(matrixFile) || file.exists(matrixFileGz)) {
return(dataDir)
}
dirWithFeatureMatrix <- paste0(dataDir, 'raw_feature_bc_matrix')
matrixFile <- paste0(dirWithFeatureMatrix, '/matrix.mtx')
matrixFileGz <- paste0(dirWithFeatureMatrix, '/matrix.mtx.gz')
if (file.exists(matrixFile) || file.exists(matrixFileGz)) {
return(dirWithFeatureMatrix)
}
stop(paste0('Unable to find matrix file in: ', dataDir, ' or ', dirWithFeatureMatrix))
}
.FindLayersToJoin <- function(seuratObj, assayName) {
layerNames <- SeuratObject::Layers(seuratObj, assay = assayName)
layerBasenames <- sapply(layerNames, function(x){
x <- unlist(strsplit(x, split = '\\.'))
if (length(x) == 1) {
return(NA)
}
# Drop the final suffix, on the assumption the initial piece is the layer basename, and the final element is projectName:
return(paste0(x[-length(x)], collapse = '.'))
})
# Any duplicated basenames inducate a split layer:
layerBasenames <- layerBasenames[!is.na(layerBasenames)]
layerBasenames <- unique(layerBasenames[duplicated(layerBasenames)])
return(unlist(layerBasenames))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.