In BimberLab/cellhashR: A Package for Demultiplexing Cell Hashing Data

library(cellhashR)
library(dplyr)

knitr::opts_chunk$set(message=FALSE, warning=FALSE, echo=TRUE, error=FALSE)

requiredVars <- c('rawCountData', 'callFile')
for (v in requiredVars) {
    if (!exists(v)) {
        stop(paste0('Need to define variable: ', v))
    }
}

if (!file.exists(rawCountData)) {
  stop(paste0('Could not find rawCountData: ', rawCountData))
}

optionalVars <- c('barcodeWhitelist', 'barcodeBlacklist', 'cellbarcodeWhitelist', 'minCountPerCell', 'metricsFile', 'rawCountsExport', 'molInfoFile', 'rawFeatureMatrixH5', 'methodsForConsensus', 'majorityConsensusThreshold', 'callerDisagreementThreshold', 'doTSNE', 'datatypeName')
for (v in optionalVars) {
    if (!exists(v)) {
        if (v == 'minCountPerCell') {
            minCountPerCell <- 5
        } else if (v == 'doTSNE') {
            t <- TRUE
        } else {
            assign(v, NULL)
        }
    }
}

# If cellbarcodeWhitelist == inputMatrix, save/restore the set of cellbarcodes for reporting:
saveOriginalCellBarcodeFile <- NULL
if (!is.null(cellbarcodeWhitelist)) {
    if (cellbarcodeWhitelist == 'inputMatrix') {
        saveOriginalCellBarcodeFile <- 'originalBarcodes.txt'
        cellbarcodeWhitelist <- NULL
    }
}

# Truncate metricsFile if provided:
if (!is.null(metricsFile)) {
  file.create(metricsFile)
}

Data Loading / QC

barcodeData <- ProcessCountMatrix(rawCountData = rawCountData, minCountPerCell = minCountPerCell, barcodeWhitelist = barcodeWhitelist, barcodeBlacklist = barcodeBlacklist, cellbarcodeWhitelist = cellbarcodeWhitelist, saveOriginalCellBarcodeFile = saveOriginalCellBarcodeFile, metricsFile = metricsFile, datatypeName = datatypeName)
if (nrow(barcodeData) == 0) {
  stop('No passing barcodes')
}

if (ncol(barcodeData) == 0) {
  stop('No passing cells')
}

if (!is.null(rawCountsExport)) {
  saveRDS(barcodeData, file = rawCountsExport)
}

if (!params$skipNormalizationQc) {
  PlotNormalizationQC(barcodeData)
}

saturationData <- NULL
if (!is.null(molInfoFile)) {
  saturationData <- CalculateSaturationFor10x(barcodeMatrix = barcodeData, molInfoFile = molInfoFile)
}

Generate Hashing Calls

df <- NULL
if (nrow(barcodeData) > 0 && ncol(barcodeData) > 0){

    if (!is.null(saveOriginalCellBarcodeFile)) {
      cellbarcodeWhitelist <- read.table(saveOriginalCellBarcodeFile, header = FALSE, col.names = c('cellbarcode'))
      cellbarcodeWhitelist <- cellbarcodeWhitelist$cellbarcode
  }

    df <- GenerateCellHashingCalls(barcodeMatrix = barcodeData, methods = methods, methodsForConsensus = methodsForConsensus, cellbarcodeWhitelist = cellbarcodeWhitelist, metricsFile = metricsFile, perCellSaturation = saturationData, rawFeatureMatrixH5 = rawFeatureMatrixH5, majorityConsensusThreshold = majorityConsensusThreshold, callerDisagreementThreshold = callerDisagreementThreshold, doTSNE = doTSNE)
    write.table(df, file = callFile, sep = '\t', row.names = FALSE, quote = FALSE)

    if (!is.null(saveOriginalCellBarcodeFile)) {
      unlink(saveOriginalCellBarcodeFile)
  }
} else {
    stop('No passing cels were found in the count matrix')
}

Final Calls

if (!is.null(df)) {
  knitr::kable(head(df, n = 10))
}

Summary of Negative Cells

if (!is.null(df)) {
  SummarizeCellsByClassification(calls = df, barcodeMatrix = barcodeData)
} else {
  print('Something went wrong scoring cells')
}

Summary of Discordant Calls

if (!is.null(df) && sum(df$consensuscall == 'Discordant') > 0) {
  discord <- df[df$consensuscall == 'Discordant',]

  colSelect <- methods
  if (!is.null(methodsForConsensus)) {
    colSelect <- unique(c(methodsForConsensus, methods))
  }

  discord <- discord %>% group_by_at(colSelect) %>% summarize(Total = n()) %>% arrange(desc(Total))
  discord$Percent <- discord$Total / sum(discord$Total)
  discord$Percent <- discord$Total / sum(discord$Total)

  if (!is.null(methodsForConsensus)) {
    isConsensusMethod <- names(discord) %in% methodsForConsensus
    names(discord)[isConsensusMethod] <- paste0(names(discord)[isConsensusMethod], '*')
  }

  knitr::kable(discord, caption = 'All Methods')
}

Metrics

if (!is.null(metricsFile)) {
  metrics <- read.table(metricsFile, sep = '\t')
  names(metrics) <- c('Category', 'Metric', 'Value')
  knitr::kable(metrics[c('Metric', 'Value')], format.args = list(big.mark = ",", scientific = FALSE))
}