R/QCfunctions.R

Defines functions funMixModel getThreshold withinSampleDoublets plotHTOSingle plotHTO crossSampleDoublets .clr .ziMinMax .minMax normaliseExprs readFrom10X preprocessing

Documented in crossSampleDoublets normaliseExprs plotHTO plotHTOSingle preprocessing readFrom10X withinSampleDoublets

#' A function to preprocess the list of expression matrix
#'
#' @description  This function will keep the samples that are
#' common across the list of expression matrix,
#' and filter the features that are all zeros across samples,
#' and finally construct a \code{SingleCellExperiment} object
#'
#' @param exprsMat A list or a matrix indicates the expression matrices of the
#' testing datasets (each matrix must be \code{matrix} or
#' \code{dgCMatrix} class)
#' @param return_sce A logical input indicates whether
#' a \code{SingleCellExperiment} object will be return
#' @param assay_matrix A integer indicates which list will be
#' used as `assay` input of `SingleCellExperiment`
#' @param filter_features A logical input indicates
#' whether the features with all zeros will be removed
#' @param rowData A DataFrame indicates the rowData to be stored
#' in the sce object
#' @param colData A DataFrame indicates the colData to be stored
#' in the sce object
#'
#' @return either a SingleCellExperiment object or
#' a preprocessed expression matrix
#'
#' @examples
#' data(CITEseq_example, package = "CiteFuse")
#' sce_citeseq <- preprocessing(CITEseq_example)
#'
#' @importFrom SingleCellExperiment SingleCellExperiment altExp
#' @importFrom Matrix rowSums
#' @importFrom SummarizedExperiment SummarizedExperiment colData rowData
#' @importFrom methods as is
#' @importFrom S4Vectors DataFrame
#'
#' @export



preprocessing <- function(exprsMat = NULL,
                          return_sce = TRUE,
                          assay_matrix = 1,
                          filter_features = TRUE,
                          rowData = NULL,
                          colData = NULL) {


    if (!any("matrix" %in% methods::is(exprsMat),
             "dgCMatrix" %in% methods::is(exprsMat),
             "list" %in% methods::is(exprsMat))) {
        stop("exprsMat need to be a matrix or a list")
    }

    if ("list" %in% methods::is(exprsMat)) {
        if (any(!unlist(lapply(exprsMat, function(x) "dgCMatrix" %in% is(x))) &
                !unlist(lapply(exprsMat, function(x) "matrix" %in% is(x))))) {
            stop("Please make sure every expression matrix in
           the list are matrix or sparse matrix")
        }
    }

    if (any("matrix" %in% methods::is(exprsMat),
            "dgCMatrix" %in% methods::is(exprsMat))) {
        exprsMat <- list(exprsMat)
    }

    common_cells <- Reduce(intersect, lapply(exprsMat, colnames))

    if (length(common_cells) == 0) {
        stop("There is no common cells in this list...
         please check the matrix input")
    }




    # only keep the samples that are common across the list of exprsMat
    exprsMat <- lapply(exprsMat, function(exprs) {
        exprs <- exprs[, common_cells]
        if (!"dgCMatrix" %in% methods::is(exprs)) {
            exprs <- methods::as(exprs, "dgCMatrix")
        }
        exprs
    })

    # filter the features with all zeros

    if (filter_features) {
        exprsMat <- lapply(exprsMat, function(exprs) {
            if ("dgCMatrix" %in% methods::is(exprsMat)) {
                rowsums <- Matrix::rowSums(exprs)
                exprs <- exprs[rowsums != 0, ]

            }
            exprs
        })
    }



    if (return_sce) {

        sce <- SingleCellExperiment(assays = list(counts =
                                                      exprsMat[[assay_matrix]]))

        list_idx <- seq_len(length(exprsMat))[-assay_matrix]

        for (i in list_idx) {
            if (is.null(names(exprsMat)[i])) {
                name_exprs <- paste("altExp", i, sep = "_")
            } else {
                name_exprs <- names(exprsMat)[i]
            }
            SingleCellExperiment::altExp(sce, name_exprs) <-
                SummarizedExperiment(assays = list(counts = exprsMat[[i]]))
        }

        if (!is.null(rowData)) {

            if (!all(rownames(sce) %in% rownames(rowData))) {
                stop("Some rownames of the assay matrix
             does not have info in rowData (rownames of rowData)")
            }

            SummarizedExperiment::rowData(sce) <-
                DataFrame(rowData[rownames(sce), ])
        }

        if (!is.null(colData)) {

            if (!all(colnames(sce) %in% rownames(colData))) {
                stop("Some colnames of the assay matrix
             does not have info in colData (rownames of colData)")
            }

            SummarizedExperiment::colData(sce) <-
                DataFrame(colData[colnames(sce), ])
        }

        return(sce)

    } else {
        return(exprsMat)
    }

}



#' readFrom10X
#'
#' A function to read the data from 10X
#'
#'
#' @param dir A character indicates the directory of the 10X files
#' @param type A character indicates the format of the data, sparse or HDF5
#' @param feature_named_by A character indicates whehter the genes
#' will be named by gene_id or gene_symbol
#' @param filter_features A logical input indicates whether the features
#' with all zeros will be removed
#'
#' @return a SingleCellExperiment object
#'
#' @importFrom rhdf5 h5read
#' @importFrom Matrix readMM sparseMatrix
#' @importFrom SummarizedExperiment SummarizedExperiment
#' @importFrom methods as
#' @importFrom utils read.delim
#'
#' @examples
#'
#' \dontrun{
#' tmpdir <- tempdir()
#' tenXdata <- "http://cf.10xgenomics.com/samples/cell-exp/3.1.0/connect_5k_pbmc_NGSC3_ch1/"
#' file <- "connect_5k_pbmc_NGSC3_ch1_filtered_feature_bc_matrix.tar.gz"
#' download.file(paste0(tenXdata, file),file.path(tmpdir, file))
#' untar(file.path(tmpdir,file),
#'       exdir = tmpdir)
#' sce_citeseq_10X <- readFrom10X(file.path(tmpdir,
#' "filtered_feature_bc_matrix/"))
#' sce_citeseq_10X
#'}
#'
#' @export
#'


readFrom10X <- function(dir,
                        type = c("auto", "sparse", "HDF5"),
                        feature_named_by = c("gene_id", "gene_symbol"),
                        filter_features = TRUE) {





    type <- match.arg(type, c("auto", "sparse", "HDF5"))
    feature_named_by <- match.arg(feature_named_by, c("gene_id", "gene_symbol"))

    all_files <- list.files(dir)


    if (type == "auto") {

        if (sum(grepl(c("barcodes.tsv|features.tsv|matrix.mtx"),
                      all_files)) == 3) {
            type <- "sparse"
        } else if (any(grepl("\\.h5", all_files))) {
            type <- "HDF5"
        } else {
            stop("No avaliable files are found in the given dir.")
        }

    }

    if (type == "sparse") {
        if (sum(grepl(c("barcodes.tsv|features.tsv|matrix.mtx"),
                      all_files)) != 3) {
            stop("No avaliable files are found in the given dir.")
        }
    }


    if (type == "HDF5") {
        if (!any(grepl("\\.h5", all_files))) {
            stop("No avaliable files are found in the given dir.")
        }
    }



    if (type == "HDF5") {

        h5_path <- paste0(dir, list.files(dir, pattern = "\\.h5"))

        #rhdf5::h5ls(h5_path, all = TRUE)

        barcodes  <- as.character(h5read(h5_path,
                                         paste0("matrix", "/barcodes")))


        gene_id  <- as.character(h5read(h5_path,
                                        paste0("matrix",
                                               "/features/id")))
        gene_symbol  <- as.character(h5read(h5_path,
                                            paste0("matrix",
                                                   "/features/name")))
        gene_type  <- as.character(h5read(h5_path,
                                          paste0("matrix",
                                                 "/features/feature_type")))

        features <- data.frame(gene_id = gene_id,
                               gene_symbol = gene_symbol,
                               gene_type = gene_type)

        counts <- rhdf5::h5read(h5_path, paste0("matrix", "/data"))
        indices <- rhdf5::h5read(h5_path, paste0("matrix", "/indices"))
        indptr <- rhdf5::h5read(h5_path, paste0("matrix", "/indptr"))
        shape <- rhdf5::h5read(h5_path, paste0("matrix", "/shape"))

        exprs_mat <- Matrix::sparseMatrix(i = indices + 1,
                                          p = indptr,
                                          x = as.numeric(x = counts),
                                          dims = shape,
                                          giveCsparse = TRUE)

    }


    if (type == "sparse") {

        mat_path <- paste0(dir, "matrix.mtx.gz")
        exprs_mat <- Matrix::readMM(mat_path)
        exprs_mat <- methods::as(exprs_mat, "dgCMatrix")

        barcodes_path <- paste0(dir, "barcodes.tsv.gz")
        barcodes <- utils::read.delim(barcodes_path, header = FALSE,
                                      stringsAsFactors = FALSE)
        barcodes <- barcodes[, 1]



        feature_path <- paste0(dir, "features.tsv.gz")
        features <- utils::read.delim(feature_path, header = FALSE,
                                      stringsAsFactors = FALSE)
        colnames(features) <- c("gene_id", "gene_symbol", "gene_type")




    }

    rownames(exprs_mat) <- features[, feature_named_by]
    colnames(exprs_mat) <- barcodes


    gene_idx <- features$gene_type == "Gene Expression"
    adt_idx <- features$gene_type == "Antibody Capture"

    exprs_rna <- exprs_mat[gene_idx, ]
    exprs_adt <- exprs_mat[adt_idx, ]

    rownames(features) <- features$gene_id


    sce <- preprocessing(list(RNA = exprs_rna, ADT = exprs_adt),
                         rowData = features[gene_idx, ],
                         filter_features = filter_features)




    return(sce)

}




#' normaliseExprs
#'
#' A function that perform normalisation for alternative expression
#'
#' @param sce A \code{SingleCellExperiment} object
#' @param altExp_name Name of alternative expression
#' that will be used to perform normalisation
#' @param exprs_value A character indicates
#' which expression value in assayNames is used.
#' @param transform type of transformation,
#' either log or clr (Centered log ratio transform)
#' @param log_offset Numeric scalar specifying the pseudo-count
#' to add when log-transforming expression values. Default is 1
#'
#' @examples
#' data(CITEseq_example, package = "CiteFuse")
#' sce_citeseq <- preprocessing(CITEseq_example)
#' sce_citeseq <- normaliseExprs(sce = sce_citeseq,
#' altExp_name = "ADT",
#' transform = "log")
#' @importFrom SummarizedExperiment assay assayNames
#' @importFrom SingleCellExperiment altExpNames altExp
#' @importFrom Matrix rowMeans
#'
#' @return a SingleCellExperiment object
#'
#' @export

normaliseExprs <- function(sce,
                           altExp_name = NULL,
                           exprs_value = "counts",
                           transform = c("log", "clr", "zi_minMax", "minMax"),
                           log_offset = NULL

) {

    transform <- match.arg(transform, c("log", "clr", "zi_minMax", "minMax"),
                           several.ok = TRUE)

    if (altExp_name != "none") {
        if (!altExp_name %in% altExpNames(sce)) {
            stop("sce does not contain altExp_name as altExpNames")
        }

        assaynames <- SummarizedExperiment::assayNames(altExp(sce, altExp_name))
        if (!exprs_value %in% assaynames) {
            stop("sce does not contain exprs_value as assayNames for altExp")
        }

        exprs <- SummarizedExperiment::assay(altExp(sce, altExp_name),
                                             exprs_value)

    } else {

        # if altExp_name is "none", then the assay in SingleCellExperiment
        # is extracted (RNA in most of the cases)

        assaynames <- SummarizedExperiment::assayNames(sce)
        if (!exprs_value %in% assaynames) {
            stop("sce does not contain exprs_value as assayNames")
        }

        exprs <- SummarizedExperiment::assay(sce, exprs_value)
    }

    if (is.null(log_offset)) {
        log_offset <- 1
    }

    if (transform %in% "log") {

        exprs_norm <- log(exprs + log_offset)

    }

    if (transform %in% "clr") {

        exprs_norm <- .clr(exprs)

    }

    if (transform %in% "zi_minMax") {

        exprs_norm <- apply(exprs, 1, .ziMinMax)
        exprs_norm <- t(exprs_norm)

    }

    if (transform %in% "minMax") {


        exprs_norm <- apply(exprs, 1, .minMax)
        exprs_norm <- t(exprs_norm)

    }


    if (transform == "log") {
        new_assay_name <- "logcounts"
    } else {
        new_assay_name <- transform
    }

    colnames(exprs_norm) <- colnames(exprs)
    rownames(exprs_norm) <- rownames(exprs)

    if (altExp_name != "none") {
        SummarizedExperiment::assay(SingleCellExperiment::altExp(sce,
                                                                 altExp_name),
                                    new_assay_name) <- exprs_norm
    } else {
        SummarizedExperiment::assay(sce, new_assay_name) <- exprs_norm
    }

    return(sce)
}


.minMax <- function(x) {
    if (max(x) != 0) {
        res <- (x - min(x))/(max(x) - min(x))
    } else {
        res <- rep(0, length(x))
    }
    return(res)
}

.ziMinMax <- function(x) {
    res <- .minMax(x)
    res <- ifelse(res < 0.5, 0, res)
    return(res)
}

.clr <- function(X) {

    X[X == 0] <- min(X[X != 0])
    logX <- log(X)
    logSet <- logX[, seq_len(ncol(X)), drop = FALSE]
    ref <- Matrix::rowMeans(logSet)
    res <- sweep(logX, 1, ref, "-")

    return(res)
}


#' crossSampleDoublets
#'
#' A function that perform normalisation for alternative expression
#'
#' @param sce A \code{SingleCellExperiment} object
#' @param altExp_name Name of alternative expression that will be
#' used to perform normalisation.
#' If it is NULL, it will set to HTO.
#' @param totalExp_threshold the threshold indicates for the HTO
#' less than this threshold
#' will be filtered from the analysis
#'
#'
#' @return A SingleCellExperiment Object
#'
#' @examples
#' data(CITEseq_example, package = "CiteFuse")
#' sce_citeseq <- preprocessing(CITEseq_example)
#' sce_citeseq <- normaliseExprs(sce = sce_citeseq,
#' altExp_name = "HTO",
#' transform = "log")
#' sce_citeseq <- crossSampleDoublets(sce_citeseq)
#'
#' @importFrom SummarizedExperiment assay assayNames
#' @importFrom SingleCellExperiment altExpNames altExp
#' @importFrom Matrix rowSums
#' @importFrom mixtools normalmixEM
#' @importFrom S4Vectors metadata
#' @importFrom stats kmeans
#' @importFrom methods is
#'
#'
#' @export


crossSampleDoublets <- function(sce,
                                altExp_name = NULL,
                                totalExp_threshold = 10) {

    if (is.null(altExp_name)) {
        if (!"HTO" %in% SingleCellExperiment::altExpNames(sce)) {
            stop("There is no HTO data in the object")
        } else {
            altExp_name <- "HTO"
        }
    }

    if (!"logcounts" %in% assayNames(altExp(sce, altExp_name))) {
        warning("HTO does not contain logcounts...
            we will perform normaliseExprs() to get logcounts")
        sce <- normaliseExprs(sce, altExp_name, "log")
    }

    hto_cellHash_log <- assay(altExp(sce, altExp_name), "logcounts")

    hto_cellHash_log <- hto_cellHash_log[Matrix::rowSums(hto_cellHash_log) >
                                             totalExp_threshold, ]

    hto_threshold <- list()
    for (i in seq_len(nrow(hto_cellHash_log))) {

        vec <- hto_cellHash_log[i,][hto_cellHash_log[i,] > 0]
        mixmdl <- try(mixtools::normalmixEM(vec,
                                            fast = TRUE, maxrestarts = 1000,
                                            k = 2, maxit = 10000,
                                            mu = c(0, 10),
                                            lambda = c(1/2),
                                            sigma = rep(2, 2),
                                            ECM = TRUE, verb = FALSE),
                      silent = TRUE)



        if ("try-error" %in% methods::is(mixmdl)) {
            km <- stats::kmeans(vec, centers = 2)
            hto_threshold[[i]] <- min(max(vec[km$cluster == 1]),
                                      max(vec[km$cluster == 2]))
        } else {
            hto_threshold[[i]] <- getThreshold(mixmdl)
        }
    }

    names(hto_threshold) <- NULL
    hto_threshold <- unlist(hto_threshold)


    hto_cellHash_pass <- vapply(seq_len(nrow(hto_cellHash_log)),
                                function(x) {
                                    hto_cellHash_log[x,] > hto_threshold[x]
                                },
                                logical(ncol(hto_cellHash_log)))


    colnames(hto_cellHash_pass) <- rownames(hto_cellHash_log)


    hto_cellHash_mix_label <- apply(hto_cellHash_pass, 1, function(x) {
        if (sum(x) == 0) {
            "negative"
        } else if (sum(x) == 1) {
            which(x)
        } else {
            "doublet/multiplet"
        }
    })

    doubletClassify_between_class <- ifelse(!hto_cellHash_mix_label %in%
                                                c("negative",
                                                  "doublet/multiplet"),
                                            "Singlet", hto_cellHash_mix_label)


    sce$doubletClassify_between_label <- hto_cellHash_mix_label
    sce$doubletClassify_between_class <- doubletClassify_between_class
    doublet_res <- list(doubletClassify_between_threshold = hto_threshold,
                        doubletClassify_between_resultsMat = hto_cellHash_pass)
    S4Vectors::metadata(sce) <- c(S4Vectors::metadata(sce),
                                  doublet_res)


    return(sce)

}



#' plotHTO
#'
#' A function to plot HTO expression
#'
#' @param sce sce
#' @param which_idx which_idx
#' @param altExp_name altExp_name
#' @param ncol ncol
#'
#' @return A plot visualising the HTO expression
#'
#' @examples
#' data(CITEseq_example, package = "CiteFuse")
#' sce_citeseq <- preprocessing(CITEseq_example)
#' sce_citeseq <- normaliseExprs(sce = sce_citeseq,
#' altExp_name = "HTO",
#' transform = "log")
#' plotHTO(sce_citeseq, 1:4)
#'
#' @importFrom SummarizedExperiment assay assayNames
#' @importFrom SingleCellExperiment altExpNames altExp
#' @importFrom gridExtra grid.arrange
#' @importFrom cowplot axis_canvas insert_xaxis_grob insert_yaxis_grob
#' @importFrom grid unit
#' @importFrom utils combn
#' @import ggplot2
#'
#' @export


plotHTO <- function(sce,
                    which_idx = seq_len(2),
                    altExp_name = NULL,
                    ncol = 2) {

    combination <- utils::combn(which_idx, 2)

    ggList <- apply(combination, 2, function(x) {
        plotHTOSingle(sce, which_idx = x, altExp_name = altExp_name)
    })



    do.call(gridExtra::grid.arrange,
            c(ggList, ncol = min(length(ggList), ncol)))
}


#' plotHTOSingle
#'
#' A function to plot HTO expression
#'
#' @param sce sce
#' @param which_idx which_idx
#' @param altExp_name altExp_name
#'
#' @return A plot visualising the HTO expression
#'
#' @importFrom SummarizedExperiment assay assayNames
#' @importFrom SingleCellExperiment altExpNames altExp
#' @importFrom cowplot axis_canvas insert_xaxis_grob insert_yaxis_grob
#' @importFrom grid unit




plotHTOSingle <- function(sce,
                          which_idx = seq_len(2),
                          altExp_name = NULL
) {

    if (is.null(altExp_name)) {
        if (!"HTO" %in% altExpNames(sce)) {
            stop("There is no HTO data in the object")
        } else {
            altExp_name <- "HTO"
        }
    }

    if (!"logcounts" %in% assayNames(altExp(sce, altExp_name))) {
        warning("HTO does not contain logcounts...
            we will perform normaliseExprs() to get logcounts")
        sce <- normaliseExprs(sce, altExp_name, "log")
    }

    noThreshold <- FALSE
    if (!"doubletClassify_between_label" %in% colnames(colData(sce))) {
        warning("Haven't performed doubletClassify() yet!")
        noThreshold <- TRUE
    }

    hto_cellHash_log <- assay(SingleCellExperiment::altExp(sce, altExp_name),
                              "logcounts")


    df <- data.frame(t(as.matrix(hto_cellHash_log[which_idx, ])))

    colnames(df) <- lapply(strsplit(rownames(hto_cellHash_log[which_idx, ]),
                                    "\\."), "[[", 1)


    if (!noThreshold) {
        hto_cellHash_pass <- metadata(sce)[["doubletClassify_between_resultsMat"]]

        doublets <- hto_cellHash_pass[, which_idx[1]] &
            hto_cellHash_pass[, which_idx[2]]

        hto_threshold <- metadata(sce)[["doubletClassify_between_threshold"]]

    } else {
        doublets <- rep(FALSE, ncol(sce))
        hto_threshold <- rep(NULL, 2)
        hto_cellHash_pass <- NULL
    }


    pmain <- ggplot(df, aes(x = df[, 1], y = df[, 2], color = doublets)) +
        geom_point(alpha = 0.5) +
        geom_hline(yintercept = hto_threshold[which_idx[2]], col = "red",
                   linetype = 2, size = 1) +
        geom_vline(xintercept = hto_threshold[which_idx[1]], col = "red",
                   linetype = 2, size = 1) +
        scale_color_manual(values = c("#377EB8", "#E41A1C")) +
        theme_bw() +
        theme(aspect.ratio = 1) +
        xlab(colnames(df)[1]) +
        ylab(colnames(df)[2])




    xdens <- cowplot::axis_canvas(pmain, axis = "x") +
        geom_density(data = df, aes(x = df[, 1],
                                    fill = hto_cellHash_pass[, which_idx[1]],
                                    alpha = 0.5)) +
        scale_fill_manual(values = c("#377EB8", "#E41A1C")) +
        NULL

    ydens <- cowplot::axis_canvas(pmain, axis = "y", coord_flip = TRUE) +
        geom_density(data = df, aes(x = df[, 2],
                                    fill = hto_cellHash_pass[, which_idx[2]],
                                    alpha = 0.5)) +
        coord_flip() +
        scale_fill_manual(values = c("#377EB8", "#E41A1C")) +
        NULL

    p1 <- cowplot::insert_xaxis_grob(pmain, xdens,
                                     grid::unit(.2, "null"), position = "top")

    p2 <- cowplot::insert_yaxis_grob(p1, ydens,
                                     grid::unit(.2, "null"), position = "right")

    # ggdraw(p2)
    return(p2)
}




#' withinSampleDoublets
#'
#' doublet identification within batch
#'
#' @param sce a SingleCellExperiment
#' @param altExp_name expression name of HTO matrix
#' @param eps eps of DBSCAN
#' @param minPts minPts of DBSCAN
#'
#'
#' @return A SingleCellExperiment object
#'
#' @examples
#'
#' data(CITEseq_example, package = "CiteFuse")
#' sce_citeseq <- preprocessing(CITEseq_example)
#' sce_citeseq <- normaliseExprs(sce = sce_citeseq,
#' altExp_name = "HTO",
#' transform = "log")
#' sce_citeseq <- crossSampleDoublets(sce_citeseq)
#' sce_citeseq <- withinSampleDoublets(sce_citeseq,
#' minPts = 10)
#'
#' @importFrom SummarizedExperiment assay assayNames
#' @importFrom SingleCellExperiment altExpNames altExp counts
#' @importFrom dbscan dbscan
#' @export


withinSampleDoublets <- function(sce,
                                 altExp_name = NULL,
                                 eps = 200,
                                 minPts = 50) {
    sce$nUMI <- Matrix::colSums(SingleCellExperiment::counts(sce))

    if (is.null(altExp_name)) {
        if (!"HTO" %in% SingleCellExperiment::altExpNames(sce)) {
            stop("There is no HTO data in the object")
        } else {
            altExp_name <- "HTO"
        }
    }


    if (!"logcounts" %in% assayNames(altExp(sce, altExp_name))) {
        warning("HTO does not contain logcounts...
            we will perform normaliseExprs() to get logcounts")
        sce <- normaliseExprs(sce, altExp_name, "log")
    }

    if (!"doubletClassify_between_label" %in% colnames(colData(sce))) {
        stop("Haven't performed doubletClassify_between() yet!")
    }

    hto_threshold <- metadata(sce)[["doubletClassify_between_threshold"]]


    hto_cellHash_log <- assay(altExp(sce, altExp_name), "logcounts")

    hto_cellHash_mix_label <- sce$doubletClassify_between_label

    nUMI <- sce$nUMI

    batch_doublets_list <- lapply(seq_len(nrow(hto_cellHash_log)), function(i) {

        db_cluster <- dbscan::dbscan(cbind(nUMI, hto_cellHash_log[i,]),
                                     eps = eps, minPts = minPts)
        batch_doublets <- (db_cluster$cluster == 0 &
                               hto_cellHash_log[i,] > hto_threshold[i] &
                               hto_cellHash_mix_label != "doublet/multiplet")

    })

    batch_doublets_mat <- t(do.call(rbind, batch_doublets_list))


    doubletClassify_within_label <- apply(batch_doublets_mat, 1, function(res) {
        if (sum(res, na.rm = TRUE) == 0) {
            "NotDoublets(Within)"
        } else {
            paste("Doublets(Within)", which(res), sep = "_")
        }
    })

    doubletClassify_within_class <- ifelse(doubletClassify_within_label ==
                                               "NotDoublets(Within)",
                                           "Singlet", "Doublet")

    sce$doubletClassify_within_label <- doubletClassify_within_label
    sce$doubletClassify_within_class <- doubletClassify_within_class

    S4Vectors::metadata(sce) <- c(S4Vectors::metadata(sce),
                                  list(doubletClassify_within_resultsMat =
                                           batch_doublets_mat))

    return(sce)

}


#' @importFrom stats uniroot
#' @importFrom methods is


getThreshold <- function(mixmdl, verbose = FALSE){

    # if (verbose) {
    #   plot(mixmdl, which = 2)
    # }

    membership <- apply(mixmdl$posterior, 1, which.max)
    m_list <- sort(unique(membership))

    mu_list <- mixmdl$mu
    names(mu_list) <- seq_len(length(mu_list))
    mu_list <- mu_list[m_list]

    if (length(mu_list) > 1) {
        idx1 <- as.numeric(names(mu_list)[order(mu_list)][1])
        idx2 <- as.numeric(names(mu_list)[order(mu_list)][2])

        root <- try(stats::uniroot(funMixModel,
                                   interval = c(mixmdl$mu[idx1] -
                                                    mixmdl$sigma[idx1],
                                                mixmdl$mu[idx2] +
                                                    mixmdl$sigma[idx2]),
                                   mu1 = mixmdl$mu[idx1],
                                   mu2 = mixmdl$mu[idx2],
                                   sd1 = mixmdl$sigma[idx1],
                                   sd2 = mixmdl$sigma[idx2],
                                   rho1 = mixmdl$lambda[idx1],
                                   rho2 = mixmdl$lambda[idx2]),
                    silent = TRUE)

        if (!"try-error" %in% methods::is(root)) {
            t <- root$root
        }else{
            t <- 0
        }

    }else{
        t <- 0
    }

    return(t)
}


#' @importFrom stats dnorm

funMixModel <- function(x, mu1, mu2, sd1, sd2, rho1, rho2) {

    stats::dnorm(x, mean = mu1, sd = sd1) * rho1 -
        stats::dnorm(x, mean = mu2, sd = sd2) * rho2


}
SydneyBioX/CiteFuse documentation built on Feb. 13, 2023, 6:04 a.m.