R/Common.R

Defines functions SortFactor CreateCommit NamedList CreateRandomSparseMatrix GenerateUUID Write10XSeurat Write10X Meow

Documented in CreateCommit CreateRandomSparseMatrix GenerateUUID NamedList SortFactor Write10X Write10XSeurat

Meow <- function(...) {
  prefix <- paste0("[", Sys.time(), "]")
  cat(prefix, ..., "\n")
}

#' An function that can write a 10X feature-barcode matrix
#'
#' @param expr.mat An expression matrix or a list of epxression matrices
#' @param output.dir Output folder
#' @export
Write10X <- function(expr.mat, output.dir) {
  SummarizeMatrices <- function(expr.mat) {
    ft.type <- "Gene Expression"
    if (is(expr.mat, "list")) {
      if (is.null(names(expr.mat))) {
        stop("List must be named")
      } else {
        ft.type <- unlist(lapply(seq_along(expr.mat), function(i) {
          return(rep(names(expr.mat)[i], nrow(expr.mat[[i]])))
        }))
      }
      expr.mat <- do.call(rbind, lapply(expr.mat, NormalizeMatrixType))
    }
    return(list(mtx=expr.mat, bc=colnames(expr.mat), ft=rownames(expr.mat), ft.type=ft.type))
  }

  NormalizeMatrixType <- function(x) {
    if (is(x, 'matrix')) {
      cl <- if (is.integer(x)) "ngCMatrix" else "dgCMatrix"
      x <- as(x, cl)
    }
    return(x)
  }

  WriteMatrixGZ <- function(info, mtx.path) {
    Meow("Writing matrix.mtx.gz...")
    con <- gzfile(mtx.path, compression=9)
    mtype <- if (is(info$mtx, "ngCMatrix")) "integer" else "real"
    writeLines(c(
      sprintf("%%%%MatrixMarket matrix coordinate %s general", mtype),
      sprintf("%s %s %s", nrow(info$mtx), ncol(info$mtx), length(info$mtx@x))
    ), con)
    data.table::fwrite(summary(info$mtx), mtx.path, sep=" ", append=TRUE, row.names=FALSE, col.names=FALSE)
    return(TRUE)
  }

  WriteBC <- function(info, bc.path) {
    Meow("Writing barcodes.tsv.gz...")
    con <- gzfile(bc.path, compression=9)
    write(info$bc, con)
    return(TRUE)
  }

  WriteFT <- function(info, ft.path) {
    Meow("Writing features.tsv.gz...")
    ft.tb <- data.frame(
      X1 = info$ft, # TODO: Handle gene ID
      X2 = info$ft,
      X3 = info$ft.type,
      stringsAsFactors = FALSE
    )
    data.table::fwrite(ft.tb, ft.path, compress="gzip", col.names=FALSE, row.names=FALSE, quote=FALSE, sep="\t")
  }

  require("Matrix")
  dir.create(output.dir, showWarnings=FALSE, recursive=TRUE)
  mtx.path <- file.path(output.dir, "matrix.mtx.gz")
  bc.path <- file.path(output.dir, "barcodes.tsv.gz")
  ft.path <- file.path(output.dir, "features.tsv.gz")
  info <- SummarizeMatrices(expr.mat)
  WriteMatrixGZ(info, mtx.path)
  WriteBC(info, bc.path)
  WriteFT(info, ft.path)
  Meow(sprintf("Successful: %s cells, %s features, and %s assay(s)",
      ncol(info$mtx), nrow(info$mtx), length(expr.mat)))
  return(TRUE)
}

#' A wrapper of Write10X for Seurat object
#'
#' @param obj A Seurat object
#' @param output.dir Output folder
#' @param rna.assay Name of RNA assay. Default is `RNA`.
#' @param adt.assay Name of ADT assay. Default is `ADT`.
#' @export
Write10XSeurat <- function(obj, output.dir, rna.assay="RNA", adt.assay="ADT") {
  expr.mat <- obj@assays[[rna.assay]]@counts
  if (adt.assay %in% names(obj@assays)) {
    expr.mat <- list("Gene Expression"=expr.mat, "Antibody Capture"=obj@assays[[adt.assay]]@counts)
  }
  Write10X(expr.mat, output.dir)
  return(TRUE)
}

#' Generate a uuid without '-'
#' @export
GenerateUUID <- function() {
  return (gsub('-', '', uuid::UUIDgenerate()))
}

#' For testing purposes
CreateRandomSparseMatrix <- function(n.genes, n.cells) {
  mat <- matrix(0, n.genes, n.cells, dimnames=list(paste0("g", 1:n.genes), paste0("c", 1:n.cells)))
  idx <- sample(length(mat), length(mat) * 0.01) # index of non-zeros
  mat[idx] <- ceiling(runif(length(idx)) * 100)
  require(Matrix)
  mat <- as(mat, "sparseMatrix")
  return(mat)
}

#' Creates an empty named list
NamedList <- function() {
  x <- list()
  names(x) <- character(0)
  return(x)
}

#' Create a commit format for BBrowser
#' @export
CreateCommit <- function(
  hash.id=GenerateUUID(),
  description="Empty",
  created.by="support@bioturing.com",
  created.at=as.numeric(Sys.time()) * 1000
) {
  return(list(
    created_by = created.by,
    created_at = created.at,
    hash_id = hash.id,
    description = description
  ))
}

#' Sort factor by counting labels (descending)
#' @export
SortFactor <- function(x) {
  n <- names(sort(table(x), decreasing=TRUE))
  return(factor(as.character(x), levels=n))
}
bioturing/rBCS documentation built on May 18, 2022, 5:26 p.m.