#' Make an ASURAT object.
#'
#' This function creates an ASURAT object.
#'
#' An ASURAT object consists of history, variable, sample, and data slots
#' at the beginning, and several others in the future.
#' Here, the parameters of ASURAT's functions that users run are recorded in
#' history, while information of genes, samples, and gene expression matrices
#' are written in variable, sample, and data, respectively.
#'
#' @param mat_expression A numeric matrix or data frame of raw read counts,
#' where rows and columns stand for genes and samples, respectively.
#' @param gene_symbol A one-dimensional vector of gene symbols.
#' @param gene_entrez A one-dimensional vector of Entrez gene IDs.
#' @param sample_identity A one-dimensional vector of sample identities.
#'
#' @return An ASURAT object in the form of a list.
#' @export
#'
#' @examples
#' mat_expression <- matrix(1:12, nrow = 3, ncol = 4)
#' gene_symbol <- data.frame(c("gene_A", "gene_B", "gene_C"))
#' gene_entrez <- data.frame(c("1", "2", "3"))
#' sample_identity <- data.frame(c("cell_1", "cell_2", "cell_3", "cell_4"))
#' myobj <- asurat_make_obj(
#' mat_expression = mat_expression,
#' gene_symbol = gene_symbol,
#' gene_entrez = gene_entrez,
#' sample_identity = sample_identity
#' )
#'
make_asurat_obj <- function(
mat_expression, gene_symbol, gene_entrez, sample_identity
){
#-----------------------------------------------
# Error check
#-----------------------------------------------
if(anyNA(gene_symbol)){
stop("gene_symbol is not permitted to contain NAs.")
}else if(anyNA(sample_identity)){
stop("sample_identity is not permitted to contain NAs.")
}else if(anyNA(mat_expression)){
stop("mat_expression is not permitted to contain NAs.")
}
#-----------------------------------------------
# Definition
#-----------------------------------------------
obj <- list(
history = list(),
variable = data.frame(
symbol = gene_symbol,
entrez = gene_entrez
),
sample = data.frame(
identity = sample_identity
),
data = c()
)
obj[["data"]][["raw"]] <- as.data.frame(mat_expression)
#-----------------------------------------------
# Names
#-----------------------------------------------
names(obj[["variable"]]) <- c("symbol", "entrez")
names(obj[["sample"]]) <- c("identity")
rownames(obj[["data"]][["raw"]]) <- obj[["variable"]][["symbol"]]
colnames(obj[["data"]][["raw"]]) <- obj[["sample"]][["identity"]]
#-----------------------------------------------
# Message
#-----------------------------------------------
return(obj)
}
#' Filter samples by previously calculated single-cell metrics
#'
#' @param obj ASURAT object
#' @param min_nReads Minimal number of total read counts per cell.
#' @param max_nReads Maximal number of total read counts per cell.
#' @param min_nGenes Minimal number of detected genes per cell.
#' @param max_nGenes Maximal number of detected genes per cell.
#' @param min_percent_MT Minimal percentage of mitochondrial gene expression per cell.
#' @param max_percent_MT Maximal percentage of mitochondrial gene expression per cell.
#'
#' @return ASURAT object.
#' @export
#'
#' @examples trim_samples(obj = pbmc_4000,
#' min_nReads = 2606, max_nReads = 30000,
#' min_nGenes = 993, max_nGenes = 1e+10,
#' min_percent_MT = 0, max_percent_MT = 14)
trim_samples <- function(obj,
min_nReads = 0, max_nReads = 1e+10,
min_nGenes = 0, max_nGenes = 1e+10,
min_percent_MT = 0, max_percent_MT = 100
){
#--------------------------------------------------
# History
#--------------------------------------------------
slot_name <- "trim_samples"
obj[["history"]][[slot_name]][["min_nReads"]] <- min_nReads
obj[["history"]][[slot_name]][["max_nReads"]] <- max_nReads
obj[["history"]][[slot_name]][["min_nGenes"]] <- min_nGenes
obj[["history"]][[slot_name]][["max_nGenes"]] <- max_nGenes
obj[["history"]][[slot_name]][["min_percent_MT"]] <- min_percent_MT
obj[["history"]][[slot_name]][["max_percent_MT"]] <- max_percent_MT
#--------------------------------------------------
# Reduce count matrices
#--------------------------------------------------
inds_1 <- which(
(obj[["sample"]][["nReads"]] >= min_nReads) &
(obj[["sample"]][["nReads"]] <= max_nReads)
)
inds_2 <- which(
(obj[["sample"]][["nGenes"]] >= min_nGenes) &
(obj[["sample"]][["nGenes"]] <= max_nGenes)
)
inds_3 <- which(
(obj[["sample"]][["percent_MT"]] >= min_percent_MT) &
(obj[["sample"]][["percent_MT"]] <= max_percent_MT)
)
inds <- intersect(intersect(inds_1, inds_2), inds_3)
obj[["data"]][["raw"]] <- as.data.frame(obj[["data"]][["raw"]][,inds])
#--------------------------------------------------
# The other information
#--------------------------------------------------
obj[["sample"]] <- data.frame(
barcode = obj[["sample"]][["barcode"]][inds],
nReads = as.integer(obj[["sample"]][["nReads"]][inds]),
nGenes = as.integer(obj[["sample"]][["nGenes"]][inds]),
percent_MT = obj[["sample"]][["percent_MT"]][inds]
)
return(obj)
}
#' Remove low quality genes
#'
#' Remove low quality genes based on the number of mean read counts across all cells
#'
#' @param obj ASURAT object.
#' @param min_meanReads Minimal mean read counts per gene across all cells.
#'
#' @return ASURAT object.
#' @export
#'
#' @examples trim_variables(obj = pbmc_4000, min_meanReads = 0.05)
trim_variables <- function(obj, min_meanReads){
#--------------------------------------------------
# History
#--------------------------------------------------
slot_name <- "trim_variables"
obj[["history"]][[slot_name]][["min_meanReads"]] <- min_meanReads
#--------------------------------------------------
# Reduce count matrix
#--------------------------------------------------
tmp <- as.matrix(obj[["data"]][["raw"]])
inds <- which(apply(tmp, 1, mean) >= min_meanReads)
mat <- tmp[inds, ]
obj[["data"]][["raw"]] <- as.data.frame(mat)
#--------------------------------------------------
# The others
#--------------------------------------------------
genes <- data.frame(
symbol = obj[["variable"]][["symbol"]][inds],
entrez = obj[["variable"]][["entrez"]][inds]
)
obj[["variable"]] <- genes
return(obj)
}
#' Perform quick QC
#'
#' Perform quick quality control by removing variables for which the number of
#' non-zero expressing samples is less than min_nsamples. ***Explanation unclear***
#'
#' @param obj ASURAT object.
#' @param min_nsamples ***Need clarification***
#' @param mitochondria_symbol Regex for mitochondrial genes (e.g. "^MT-").
#'
#' @return ASURAT object.
#' @export
#'
#' @examples do_quickQC(obj = pbmc_4000, min_nsamples = 10,
#' mitochondria_symbol = "^MT-")
do_quickQC <- function(obj, min_nsamples, mitochondria_symbol){
#--------------------------------------------------
# History
#--------------------------------------------------
slot_name <- "do_quickQC"
obj[["history"]][[slot_name]][["min_nsamples"]] <- min_nsamples
#--------------------------------------------------
# Reduce variables
#--------------------------------------------------
tmp <- as.matrix(obj[["data"]][["raw"]])
inds <- which(apply(tmp, 1, function(x) sum(x>0)) >= min_nsamples)
mat <- tmp[inds,]
obj[["data"]][["raw"]] <- as.data.frame(mat)
#--------------------------------------------------
# Other information
#--------------------------------------------------
obj[["variable"]] <- data.frame(
symbol = obj[["variable"]][["symbol"]][inds],
entrez = obj[["variable"]][["entrez"]][inds]
)
obj[["sample"]] <- data.frame(
barcode = colnames(mat),
nReads = as.integer(apply(mat, 2, function(x) sum(x))),
nGenes = as.integer(apply(mat, 2, function(x) sum(x > 0))),
percent_MT = apply(
mat[grepl(mitochondria_symbol, rownames(mat)),], 2, function(x) 100 * sum(x)
) / apply(mat, 2, function(x) sum(x))
)
return(obj)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.