R/quality_control.R

Defines functions asurat_add_qcmetrics asurat_quick_genefilter

Documented in asurat_add_qcmetrics asurat_quick_genefilter

#' Remove rarely expressed genes.
#'
#' This function removes genes rarely expressed among the cells.
#'
#' @param obj An ASURAT object.
#' @param min_nsamples A non-negative integer.
#'   The genes expressed by at least min_nsamples samples with non-zero
#'   read counts are saved and the others removed.
#'
#' @return An ASURAT object.
#' @export
#'
asurat_quick_genefilter <- function(obj, min_nsamples){
  #-----------------------------------------------
  # Error check
  #-----------------------------------------------
  if(min_nsamples >= length(obj[["sample"]][["identity"]])){
    stop("min_nsamples must be less than the number of samples.")
  }
  #-----------------------------------------------
  # History
  #-----------------------------------------------
  slot_name <- "asurat_quick_genefilter"
  obj[["history"]][[slot_name]][["min_nsamples"]] <- min_nsamples
  #-----------------------------------------------
  # Removing
  #-----------------------------------------------
  tmp <- as.matrix(obj[["data"]][["raw"]])
  inds <- which(apply(tmp, 1, function(x) sum(x > 0)) >= min_nsamples)
  mat <- tmp[inds,]
  obj[["data"]][["raw"]] <- as.data.frame(mat)
  #-----------------------------------------------
  # Definition
  #-----------------------------------------------
  obj[["variable"]] <- data.frame(
    symbol = obj[["variable"]][["symbol"]][inds],
    entrez = obj[["variable"]][["entrez"]][inds]
  )
  #-----------------------------------------------
  # Message
  #-----------------------------------------------
  n <- nrow(tmp) - length(inds)
  message(paste(n, " genes were removed.", sep = ""))

  return(obj)
}


#' Compute QC metrics for each sample.
#'
#' This function computes (i) the number of reads, (ii) number of genes
#' expressed with non-zero read counts, and (iii) percent of reads mapped
#' to the mitochondrial genes.
#'
#' @param obj An ASURAT object.
#' @param mt_gene_symbol A regex pattern to match gene symbols of mitochondrial
#' genes.
#'
#' @return An ASURAT object.
#' @export
#'
#' @examples
#' data(cerv_asurat)
#' cerv <- asurat_make_obj(
#'   mat_expression = cerv_asurat[["mat_expression"]],
#'   gene_symbol = cerv_asurat[["gene_symbol"]],
#'   gene_entrez = cerv_asurat[["gene_entrez"]],
#'   sample_identity = cerv_asurat[["sample_identity"]]
#' )
#' cerv <- qc.addmetrics(obj = cerv, mt_gene_symbol = "^MT-")
#'
asurat_add_qcmetrics <- function(obj, mt_gene_symbol){
  #-----------------------------------------------
  # Error check
  #-----------------------------------------------
  vec <- obj[["variable"]][["symbol"]]
  mt_inds <- which(grepl(mt_gene_symbol, vec))
  if(length(mt_inds) == 0){
    message("Warning: mt_gene_symbol was not found in the gene symbols.")
  }
  #-----------------------------------------------
  # History
  #-----------------------------------------------
  slot_name <- "asurat_add_qcmetrics"
  obj[["history"]][[slot_name]][["mt_gene_symbol"]] <- mt_gene_symbol
  #-----------------------------------------------
  # Definition
  #-----------------------------------------------
  mat <- as.matrix(obj[["data"]][["raw"]])
  obj[["sample"]][["n_reads"]] <-
    as.integer(apply(mat, 2, function(x) sum(x)))
  obj[["sample"]][["n_expressed_genes"]] <-
    as.integer(apply(mat, 2, function(x) sum(x > 0)))
  obj[["sample"]][["percent_mt"]] <-
    as.numeric(apply(mat[mt_inds,], 2, sum) / apply(mat, 2, sum) * 100)
  #-----------------------------------------------
  # Message
  #-----------------------------------------------

  return(obj)
}
johannesnicolaus/ASURAT_source documentation built on Dec. 21, 2021, 2:11 a.m.