R/qc.R

Defines functions remove_outliers plot_UMI_dup plot_demultiplex plot_mapping plot_QC_pairs calculate_QC_metrics detect_outlier .qq_outliers_robust

Documented in calculate_QC_metrics detect_outlier plot_demultiplex plot_mapping plot_QC_pairs plot_UMI_dup .qq_outliers_robust remove_outliers

#' Detect outliers based on robust linear regression of QQ plot
#' @param x a vector of mahalanobis distance
#' @param df degree of freedom for chi-square distribution
#' @param conf confidence for linear regression
#'
#' @return cell names of outliers
#' @importFrom stats ppoints qchisq coef qnorm 
.qq_outliers_robust <- function(x, df, conf) {
    n <- length(x)
    P <- stats::ppoints(n)
    z <- stats::qchisq(P, df=df)
    ord.x <- x[order(x)]
    coef <- stats::coef(MASS::rlm(ord.x ~ z, maxit=2000))
    a <- coef[1]
    b <- coef[2]
    zz <- stats::qnorm(1 - (1 - conf)/2)
    SE <- (b/stats::dchisq(z, df))*sqrt(P*(1 - P)/n)
    fit.value <- a + b*z
    upper <- fit.value + zz*SE
    thr <- max(ord.x[ord.x < upper])
    return(names(x[x > thr]))
}


#' Detect outliers based on QC metrics
#'
#' @description This algorithm will try to find \code{comp} number of components
#'   in quality control metrics using a Gaussian mixture model. Outlier
#'   detection is performed on the component with the most genes detected. The
#'   rest of the components will be considered poor quality cells. More cells
#'   will be classified low quality as you increase \code{comp}.
#
#' @param sce a \code{SingleCellExperiment} object containing QC metrics.
#' @param comp the number of component used in GMM. Depending on the quality of
#'  the experiment.
#' @param sel_col a vector of column names which indicate the columns to use
#'  for QC.
#' By default it will be the statistics generated by `calculate_QC_metrics()`
#' @param type only looking at low quality cells (`low`) or possible
#' doublets (`high`) or both (`both`)
#' @param conf confidence interval for linear regression at lower and
#' upper tails.Usually, this is smaller for lower tail because we hope
#' to pick out more low quality cells than doublets.
#' @param batch whether to perform quality control separately for each batch.
#' Default is FALSE. If set to TRUE then you should have a column called
#' `batch` in the `colData(sce)`.
#'
#' @details detect outlier using Mahalanobis distances
#'
#' @return an updated \code{SingleCellExperiment} object with an `outlier`
#' column in \code{colData}
#'
#' @importFrom stats cov pchisq mahalanobis complete.cases qchisq
#' @importFrom robustbase covMcd
#' @importFrom mclust Mclust mclustBIC
#' @importFrom glue glue glue_collapse
#'
#' @export
#' @examples
#' data("sc_sample_data")
#' data("sc_sample_qc")
#' sce = SingleCellExperiment(assays = list(counts = as.matrix(sc_sample_data)))
#' organism(sce) = "mmusculus_gene_ensembl"
#' gene_id_type(sce) = "ensembl_gene_id"
#' QC_metrics(sce) = sc_sample_qc
#' demultiplex_info(sce) = cell_barcode_matching
#' UMI_dup_info(sce) = UMI_duplication
#' # the sample qc data already run through function `calculate_QC_metrics`
#' # for a new sce please run `calculate_QC_metrics` before `detect_outlier`
#' sce = detect_outlier(sce)
#' table(QC_metrics(sce)$outliers)
#'
detect_outlier <- function(sce,
                        comp=1,
                        sel_col=NULL,
                        type=c("low", "both", "high"),
                        conf=c(0.9, 0.99),
                        batch=FALSE) {
    type <- match.arg(type)
    sce <- validObject(sce)
    # check format:
    if (is.null(sel_col)) {
        sel_col <- c("number_of_genes", "total_count_per_cell",
                    "non_mt_percent", "non_ERCC_percent")
    }
    if (!all(sel_col %in% colnames(QC_metrics(sce)))) {
        missing <- sel_col[!(sel_col %in% colnames(QC_metrics(sce)))]

        missing_vars <- glue_collapse(glue("'{missing}'"), sep = ",", last = " and ")
        message(glue("the following QC metrics not found in colData from sce: {missing_vars}"))

        if (any(c("number_of_genes", "total_count_per_cell") %in% missing_vars)) {
            stop("the quality control metrics should at least contain the number of
                genes(`number_of_genes`) and counts per cell(`total_count_per_cell`)!")
        }
    }
    qc_cols <- colnames(QC_metrics(sce)) %in% sel_col
    x_all <- as.data.frame(QC_metrics(sce)[, qc_cols])
    x_all$total_count_per_cell <- log2(x_all$total_count_per_cell + 1)

    if (!all(stats::complete.cases(x_all))) {
        stop("NAs found in the selected columns, check the quality control matrix")
    }
    if (is.null(dim(x_all))) {
        QC_metrics(sce)$outliers <- FALSE
        return(sce)
    }
    if (!batch) {
        batch_info <- rep(1, nrow(x_all))
    } else {
        if ("batch" %in% colnames(colData(sce))) {
            batch_info <- colData(sce)$batch
        } else {
            stop("did not find column `batch` in colData(sce).")
        }
    }

    if(any(rowSums(x_all) == 0)){
        stop("Some cells have zero counts. remove these cells before detecting outlier.")
    }else if(any(colSums(x_all) == 0)){
        stop("Some QC metrics are all zeros, check your quality control matrix by QC_metrics(sce).")
    }
    outliers <- rep(FALSE, nrow(x_all))

    for (a_batch in unique(batch_info)) {
        x <- x_all[batch_info == a_batch, ]
        dist <- mahalanobis(x, center=colMeans(x), cov=cov(x))
        keep <- !(dist > qchisq(0.99, ncol(x)))
        mod <- Mclust(x[keep, ],
                    G=comp,
                    modelNames=c("EEE","EVV", "VVV"),
                    verbose=FALSE)

        if (comp == 1) {
            covr <- covMcd(x, alpha=0.7)
            dist <- mahalanobis(x,
                                center=covr$center,
                                cov=covr$cov)
            mean_diff <- sign(t(x) - covr$center)
            QC_sign <- c(-1, 1)[as.factor(apply(mean_diff, 2, function(t) {sum(t) > 0}))]
            neg_dist <- dist[QC_sign == -1]
            pos_dist <- dist[QC_sign == 1]
            if (type == "both") {
                outlier_cells <- .qq_outliers_robust(neg_dist, ncol(x), conf[1])
                outlier_cells <- c(outlier_cells,
                                .qq_outliers_robust(pos_dist, ncol(x), conf[2]))
            } else if (type == "low") {
                outlier_cells <- .qq_outliers_robust(neg_dist, ncol(x), conf[1])
            } else if (type == "high") {
                outlier_cells <- .qq_outliers_robust(pos_dist, ncol(x), conf[2])
            }
        } else {
            ord_fst <- c(seq_len(comp))[order(mod$parameters$mean[1, ], decreasing = TRUE)]
            poor_comp <- ord_fst[2:comp]
            good_comp <- ord_fst[1]
            keep1 <- rep(TRUE, nrow(x))
            keep1[keep][mod$classification %in% poor_comp] <- FALSE
            keep1[!keep] <- FALSE
            sub_x <- x[keep1, ]
            covr <- covMcd(sub_x, alpha=0.7)
            sub_dist <- mahalanobis(sub_x,
                                    center=covr$center,
                                    cov=covr$cov)

            mean_diff <- sign(t(sub_x) - covr$center)
            QC_sign <- c(-1, 1)[as.factor(apply(mean_diff, 2, function(t) {sum(t) > 0}))]
            neg_dist <- sub_dist[QC_sign == -1]
            pos_dist <- sub_dist[QC_sign == 1]

            outlier_cells <- .qq_outliers_robust(neg_dist, ncol(sub_x), conf[1])
            outlier_cells <- c(outlier_cells,
                                .qq_outliers_robust(pos_dist, ncol(sub_x), conf[2]))
            outlier_cells <- c(outlier_cells, rownames(x[!keep1, ]))

            if (!(type == "both")) {
                mean_diff <- sign(t(x)-mod$parameters$mean[, good_comp])
                QC_sign <- c(-1, 1)[as.factor(apply(mean_diff, 2, function(t) {sum(t) > 0}))]
                is_outlier_cell <- rownames(x) %in% outlier_cells
                if (type == "low") {
                    outlier_cells <- rownames(x)[is_outlier_cell & (QC_sign == -1)]
                } else if (type == "high") {
                    outlier_cells <- rownames(x)[is_outlier_cell & (QC_sign == 1)]
                }
            }
        }

        if (any(rownames(x) %in% outlier_cells)) {
            outliers[batch_info == a_batch][rownames(x)  %in% outlier_cells] <- TRUE
        }
    }
    QC_metrics(sce)$outliers <- as.factor(outliers)
    return(sce)
}


#' Calculate QC metrics from gene count matrix
#'
#' @param sce a \code{SingleCellExperiment} object containing gene counts
#' @details get QC metrics using gene count matrix.
#' The QC statistics added are \itemize{
#'  \item{number_of_genes} number of genes detected.
#'  \item{total_count_per_cell} sum of read number after UMI deduplication.
#'  \item{non_mt_percent} 1 - percentage of mitochondrial gene counts.
#'  Mitochondrial genes are retrived by GO term GO:0005739
#'  \item{non_ERCC_percent} ratio of exon counts to ERCC counts
#'  \item{non_ribo_percent} 1 - percentage of ribosomal gene counts
#' ribosomal genes are retrived by GO term GO:0005840.
#' }
#' @return an \code{SingleCellExperiment} with updated QC metrics
#'
#' @importFrom SummarizedExperiment assay assay<- assays assays<- assayNames rowData rowData<- colData colData<-
#'
#' @export
#' @examples
#' data("sc_sample_data")
#' data("sc_sample_qc")
#' sce <- SingleCellExperiment(assays = list(counts = as.matrix(sc_sample_data)))
#' organism(sce) <- "mmusculus_gene_ensembl"
#' gene_id_type(sce) <- "ensembl_gene_id"
#' QC_metrics(sce) <- sc_sample_qc
#' demultiplex_info(sce) <- cell_barcode_matching
#' UMI_dup_info(sce) <- UMI_duplication
#'
#' # The sample qc data already run through function `calculate_QC_metrics`.
#' # So we delete these columns and run `calculate_QC_metrics` to get them again:
#' colnames(colnames(QC_metrics(sce)))
#' QC_metrics(sce) <- QC_metrics(sce)[,c("unaligned","aligned_unmapped","mapped_to_exon")]
#' sce = calculate_QC_metrics(sce)
#' colnames(QC_metrics(sce))
#'
calculate_QC_metrics <- function(sce) {
    sce <- validObject(sce) # check the sce object
    if (!("counts" %in% names(assays(sce)))){
        stop("counts not in names(assays(sce)). Cannot find count data.")
    }

    # get gene number
    gene_number <- colSums(assay(sce,"counts")>0)
    if (all(gene_number == 0)) {
        stop("all genes have zero count. Check the expression matrix.")
    }else{
        if (!("scPipe" %in% names(sce@metadata))){
            sce@metadata[["scPipe"]] <- list(QC_cols=c("number_of_genes"))
        } else if (!("QC_cols" %in% names(sce@metadata$scPipe))){
            sce@metadata[["scPipe"]] <- list(QC_cols=c("number_of_genes"))
            QC_metrics(sce) <- DataFrame(row.names = colnames(sce))
            # create a empty QC metrics if not exists
        }
        QC_metrics(sce)$number_of_genes <- gene_number
    }

    # get total count per cell
    QC_metrics(sce)$total_count_per_cell <- colSums(assay(sce,"counts"))

    # get ERCC ratio
    if(any(grepl("^ERCC-", rownames(sce)))){
        exon_count <- colSums(assay(sce,"counts")[!grepl("^ERCC-", rownames(sce)), , drop = FALSE])
        ERCC_count <- colSums(assay(sce,"counts")[grepl("^ERCC-", rownames(sce)), , drop = FALSE])
        QC_metrics(sce)$non_ERCC_percent <- exon_count/(ERCC_count+exon_count+1e-5)
    }else{
        message("no ERCC spike-in. Skip `non_ERCC_percent`")
        }

    # get mt percentage
    if (!is.na(gene_id_type(sce))) {
        mt_genes <- get_genes_by_GO(returns=gene_id_type(sce),
                                    dataset=organism(sce),
                                    go=c("GO:0005739"))
        if (length(mt_genes)>0) {
            if (sum(rownames(sce) %in% mt_genes)>1) {
                mt_count <- colSums(assay(sce,"counts")[rownames(sce) %in% mt_genes, , drop = FALSE])
                QC_metrics(sce)$non_mt_percent <- (colSums(assay(sce,"counts"))-
                                                    mt_count)/(colSums(assay(sce,"counts"))+1e-5)
                # add 1e-5 to make sure they are not NA
            }
        }
    }
    else {
        message("no gene_id_type, skip `non_mt_percent`")
    }

    # get ribosomal percentage
    if (!is.na(gene_id_type(sce))) {
        ribo_genes <- get_genes_by_GO(returns=gene_id_type(sce),
                                dataset=organism(sce),
                                go=c("GO:0005840"))
        if (length(ribo_genes)>0) {
            if (sum(rownames(sce) %in% ribo_genes)>1) {
                ribo_count <- colSums(assay(sce,"counts")[rownames(sce) %in% ribo_genes, ])
                QC_metrics(sce)$non_ribo_percent <- (colSums(assay(sce,"counts"))-
                                                    ribo_count)/(colSums(assay(sce,"counts"))+0.01)
            }
        }
    }
    else {
        message("no gene_id_type, skip `non_ribo_percent`")
    }
    return(sce)
}


#' Plot GGAlly pairs plot of QC statistics from \code{SingleCellExperiment}
#' object
#'
#' @param sce a \code{SingleCellExperiment} object
#' @param sel_col a vector of column names which indicate the columns to use for
#'   plot. By default it will be the statistics generated by
#'   `calculate_QC_metrics()`
#' @export
#' @return a ggplot2 object
#' @examples
#' data("sc_sample_data")
#' data("sc_sample_qc")
#' sce = SingleCellExperiment(assays = list(counts = as.matrix(sc_sample_data)))
#' organism(sce) = "mmusculus_gene_ensembl"
#' gene_id_type(sce) = "ensembl_gene_id"
#' QC_metrics(sce) = sc_sample_qc
#' demultiplex_info(sce) = cell_barcode_matching
#' UMI_dup_info(sce) = UMI_duplication
#' sce = detect_outlier(sce)
#'
#' plot_QC_pairs(sce)
#'
plot_QC_pairs <- function(sce, sel_col=NULL) {
    sce <- validObject(sce) # check the sce object
    if (is.null(sel_col)) {
        sel_col <- c("number_of_genes", "total_count_per_cell", "non_mt_percent",
                    "non_ERCC_percent", "non_ribo_percent", "outliers")
    }
    if (!any(sel_col %in% colnames(QC_metrics(sce)))){
        stop("`sel_col` not in colnames(QC_metrics(sce))).")
    }
    x <- as.data.frame(QC_metrics(sce)[, colnames(QC_metrics(sce)) %in% sel_col])


    if ("outliers" %in% colnames(x)) {
        plot_output <- GGally::ggpairs(
        x,
        mapping = ggplot2::aes_string(colour = "outliers"),
        upper = list(continuous = GGally::wrap("cor", size=3, hjust=0.8))
        )
    }
    else {
        plot_output <- GGally::ggpairs(x)
    }

    return(plot_output)
}

#' Plot mapping statistics for \code{SingleCellExperiment} object.
#'
#' @param sce a \code{SingleCellExperiment} object
#' @param sel_col a vector of column names, indicating the columns to use for
#'   plot. by default it will be the mapping result.
#' @param percentage TRUE to convert the number of reads to percentage
#' @param dataname the name of this dataset, used as plot title
#'
#' @importFrom reshape melt
#' @importFrom stats prcomp reorder
#' @return a ggplot2 object
#' @export
#'
#' @examples
#' data("sc_sample_data")
#' data("sc_sample_qc")
#' sce = SingleCellExperiment(assays = list(counts = as.matrix(sc_sample_data)))
#' organism(sce) = "mmusculus_gene_ensembl"
#' gene_id_type(sce) = "ensembl_gene_id"
#' QC_metrics(sce) = sc_sample_qc
#' demultiplex_info(sce) = cell_barcode_matching
#' UMI_dup_info(sce) = UMI_duplication
#'
#' plot_mapping(sce,percentage=TRUE,dataname="sc_sample")
#'
plot_mapping <- function(sce,
                        sel_col=NULL,
                        percentage=FALSE,
                        dataname="") {
    sce <- validObject(sce) # check the sce object
    if (is.null(sel_col)) {
        sel_col <- c("unaligned", "aligned_unmapped", "ambiguous_mapping",
                    "mapped_to_ERCC", "mapped_to_intron", "mapped_to_exon")
    }
    x <- as.data.frame(QC_metrics(sce)[, sel_col])


    mapping_stat <- x
    mapping_stat$sample_name <- stats::reorder(rownames(mapping_stat), mapping_stat$mapped_to_exon)
    mapping_stat_prop <- as.data.frame(prop.table(as.matrix(mapping_stat[, sapply(mapping_stat, is.numeric)]), 1))
    mapping_stat_prop$sample_name <- mapping_stat$sample_name
    dat.m <- melt(mapping_stat, id.vars="sample_name")
    dat.m1 <- melt(mapping_stat_prop, id.vars="sample_name")

    if (!percentage) {
        p <- ggplot2::ggplot(dat.m, ggplot2::aes_string(x="sample_name", y="value", fill="variable")) + ggplot2::scale_fill_brewer(palette="Set1")+
        ggplot2::geom_bar(stat="identity", width=1)+
        ggplot2::ylab("number of reads")+
        ggplot2::xlab("cell sorted by number of reads mapped to exon")+
        ggplot2::theme(axis.title.x=ggplot2::element_blank(), axis.text.x=ggplot2::element_blank())

        if (dataname != "") {
            p <- p + ggplot2::ggtitle(paste0("Overall mapping statistics of ", dataname, " (number of reads)"))
        } else {
            p <- p + ggplot2::ggtitle(paste0("Overall mapping statistics (number of reads)"))
        }
    }
    else {
        p <- ggplot2::ggplot(dat.m1, ggplot2::aes_string(x="sample_name", y="value", fill="variable")) + ggplot2::scale_fill_brewer(palette="Set1")+
        ggplot2::geom_bar(stat="identity", width=1)+
        ggplot2::ylab("percentage of reads")+
        ggplot2::xlab("cell sorted by number of reads mapped_to_exon")+
        ggplot2::scale_y_continuous(labels=scales::percent_format())+
        ggplot2::theme(axis.title.x=ggplot2::element_blank(), axis.text.x=ggplot2::element_blank())

        if (dataname != "") {
            p <- p + ggplot2::ggtitle(paste0("Overall mapping statistics of ", dataname, " (percentage)"))
        } else {
            p <- p + ggplot2::ggtitle(paste0("Overall mapping statistics (percentage)"))
        }
    }

    return(p)
}



#' plot_demultiplex
#'
#'@description Plot cell barcode demultiplexing result for the
#'  \code{SingleCellExperiment}. The barcode demultiplexing result is shown
#'  using a barplot, with the bars indicating proportions of total reads.
#'  Barcode matches and mismatches are summarised along with whether or not the
#'  read mapped to the genome. High proportion of genome aligned reads with no
#'  barcode match may indicate barcode integration failure.
#'
#' @param sce a \code{SingleCellExperiment} object
#'
#' @return a ggplot2 bar chart
#'
#' @importFrom rlang .data
#' @export
#'
#' @examples
#' data("sc_sample_data")
#' data("sc_sample_qc")
#' sce = SingleCellExperiment(assays = list(counts = as.matrix(sc_sample_data)))
#' organism(sce) = "mmusculus_gene_ensembl"
#' gene_id_type(sce) = "ensembl_gene_id"
#' QC_metrics(sce) = sc_sample_qc
#' demultiplex_info(sce) = cell_barcode_matching
#' UMI_dup_info(sce) = UMI_duplication
#'
#' plot_demultiplex(sce)
#'
plot_demultiplex <- function(sce){
    sce <- validObject(sce) # check the sce object
    demultiplex_stat <- demultiplex_info(sce)
    demultiplex_stat[,"count"] <- demultiplex_stat[,"count"]/sum(demultiplex_stat[,"count"])
    if (is.null(demultiplex_stat)){
        stop("`demultiplex_stat` does not exists in sce. demultiplex_info(sce) == NULL)")
    }
    demultiplex_stat$label_y <- demultiplex_stat[,"count"]+0.05
    demultiplex_stat$label_tx <- scales::percent(demultiplex_stat[,"count"])
    #kp = demultiplex_stat[,"count"]/sum(demultiplex_stat[,"count"])>0.2
    #label_tx[!kp] = ""

    blank_theme <- ggplot2::theme_minimal()+
        ggplot2::theme(
        axis.text.x = ggplot2::element_text(angle = 30, hjust=1,size=12),
        panel.border = ggplot2::element_blank(),
        plot.title = ggplot2::element_text(size=14, face="bold"),
        legend.position = "none"
        )

    p <- ggplot2::ggplot(data=demultiplex_stat, ggplot2::aes(x=.data$status, y=.data$count,
                                            fill=.data$status))+
        ggplot2::geom_bar(width = 1, stat = "identity")+
        #coord_polar("y", start=0)+
        ggplot2::scale_fill_brewer(palette="Dark2")+
        blank_theme+
        ggplot2::geom_text(ggplot2::aes(y = .data$label_y, label = .data$label_tx))+
        ggplot2::labs(title="Cell barcode demultiplexing statistics", y="percentage",x="")
    return(p)
}


#' Plot UMI duplication frequency
#'
#' @description Plot the UMI duplication frequency.
#'
#' @param sce a \code{SingleCellExperiment} object
#' @param log10_x whether to use log10 scale for x axis
#'
#' @return a line chart of the UMI duplication frequency
#' @export
#'
#' @examples
#' data("sc_sample_data")
#' data("sc_sample_qc")
#' sce = SingleCellExperiment(assays = list(counts = as.matrix(sc_sample_data)))
#' organism(sce) = "mmusculus_gene_ensembl"
#' gene_id_type(sce) = "ensembl_gene_id"
#' QC_metrics(sce) = sc_sample_qc
#' demultiplex_info(sce) = cell_barcode_matching
#' UMI_dup_info(sce) = UMI_duplication
#'
#' plot_UMI_dup(sce)
#'
plot_UMI_dup <- function(sce, log10_x = TRUE){
    sce <- validObject(sce) # check the sce object
    tmp <- UMI_dup_info(sce)
    tmp <- tmp[-nrow(tmp),] # remove 1000
    the_sum <- cumsum(tmp)
    for (i in seq_len(nrow(tmp)-1)){ # cut the zero counts
        if (the_sum$count[nrow(tmp)] - the_sum$count[i]<10){
            tmp <- tmp[seq_len(i),]
            break
        }
    }
    dup_col <- ""
    if ("duplication_number" %in% colnames(tmp)){
        dup_col <- "duplication_number"
    }else if ("duplication.number" %in% colnames(tmp)){
        dup_col <- "duplication.number"
    }else{
        message("cannot find column containing UMI duplication number.")
        return(NULL)
    }
    p <- ggplot2::ggplot(data=tmp, ggplot2::aes(x=tmp[, dup_col], y=tmp[,"count"])) +
        ggplot2::geom_smooth(method = "loess",se = FALSE)+ggplot2::theme_bw()
    if (log10_x){
        p <- p + ggplot2::scale_x_log10()+
            ggplot2::labs(title="Distribution of UMI duplication numbers(log10).",
                x="log10(UMI duplication number)",
                y="frequency")
    }else{
        p <- p + ggplot2::labs(title="Distribution of UMI duplication numbers.",
                x="UMI duplication number",
                y="frequency")
    }
    return(p)
}



#' Remove outliers in \code{SingleCellExperiment}
#'
#' @description Removes outliers flagged by \code{detect_outliers()}
#'
#' @param sce a \code{SingleCellExperiment} object
#' @export
#' @return a \code{SingleCellExperiment} object without outliers
#' @examples
#' data("sc_sample_data")
#' data("sc_sample_qc")
#' sce = SingleCellExperiment(assays = list(counts = as.matrix(sc_sample_data)))
#' organism(sce) = "mmusculus_gene_ensembl"
#' gene_id_type(sce) = "ensembl_gene_id"
#' QC_metrics(sce) = sc_sample_qc
#' demultiplex_info(sce) = cell_barcode_matching
#' UMI_dup_info(sce) = UMI_duplication
#' sce = detect_outlier(sce)
#' dim(sce)
#' sce = remove_outliers(sce)
#' dim(sce)
#'
remove_outliers <- function(sce) {
    sce <- validObject(sce) # check the sce object
    if (!("outliers" %in% colnames(QC_metrics(sce)))) {
      stop("No outlier information. Please run `detect_outlier()` to get the outliers.")
    }
    out_cell <- QC_metrics(sce)$outliers == FALSE
    return(sce[, out_cell])
}
LuyiTian/scPipe documentation built on Dec. 11, 2023, 8:21 p.m.