R/CoreMethods.R
In scfind: A search tool for single cell RNA-seq data by gene lists

Documented in buildCellIndex.SCESet buildCellTypeIndex.SCESet findCell.SCESet findCellType.data.frame

#' Build a cell type Index
#' 
#' Calculates a fraction of expressed cells per gene per cell type
#'
#' @param object object of SingleCellExperiment class
#' @param cell_type_column column name in the colData slot of the object SingleCellExperiment 
#' containing the cell classification information
#' 
#' @name buildCellTypeIndex
#'
#' @return a `data.frame` containing calculated gene index
#'
#' @importFrom SummarizedExperiment rowData colData colData<-
#' @importFrom SingleCellExperiment logcounts
#' @importFrom dplyr group_by summarise %>%
#' @importFrom reshape2 melt dcast
buildCellTypeIndex.SCESet <- function(object, cell_type_column) {
    if (is.null(object)) {
        stop("Please define a object using the `object` parameter!")
    }
    if (is.null(colData(object)[[cell_type_column]])) {
        stop("Please define a correct `cell_type_column` in the `colData` slot!")
    }
    gene <- cell_class <- exprs <- NULL
    object_local <- logcounts(object) > 0
    rownames(object_local) <- rowData(object)$feature_symbol
    colnames(object_local) <- colData(object)[[cell_type_column]]
    
    # calculate median feature expression in every cell class of object
    object_local <- reshape2::melt(object_local)
    colnames(object_local) <- c("gene", "cell_class", "exprs")
    object_local <- object_local %>% group_by(gene, cell_class) %>% summarise(gene_exprs_prob = sum(exprs)/length(exprs))
    object_local <- reshape2::dcast(object_local, gene ~ cell_class, value.var = "gene_exprs_prob")
    rownames(object_local) <- object_local$gene
    object_local <- object_local[, 2:ncol(object_local), drop = FALSE]
    return(object_local)
}

#' @rdname buildCellTypeIndex
#' @aliases buildCellTypeIndex
#' @importFrom SingleCellExperiment SingleCellExperiment
setMethod("buildCellTypeIndex", "SingleCellExperiment", buildCellTypeIndex.SCESet)

#' Find cell types associated with a given gene list
#' 
#' Calculates p-values of a log-likelihood of a list of genes to be associated
#' with each cell type. Log-likelihood is based on gene expression values.
#'
#' @param gene_index a data.frame with cell types in columns and genes in rows
#' @param gene_list genes that need to be searched in the gene_index
#' 
#' @name findCellType
#'
#' @return a named numeric vector containing p-values
#'
#' @importFrom stats pchisq
#' @importFrom methods is
findCellType.data.frame <- function(gene_index, gene_list) {
    if (is.null(gene_index)) {
        stop("Please define a gene_index using the `gene_index` parameter!")
    }
    if (is.null(gene_list)) {
        stop("Please define a list of genes using the `gene_list` parameter!")
    }
    if (!"data.frame" %in% is(gene_index)) {
        stop("The gene_index must be a data.frame!")
    }
    if (!"character" %in% is(gene_list)) {
        stop("The gene_list must be a character vector!")
    }
    
    p0 <- colSums(gene_index)/nrow(gene_index)
    
    if (length(gene_list[!gene_list %in% rownames(gene_index)]) != 0) {
        warning(paste0("Genes: ", paste(gene_list[!gene_list %in% rownames(gene_index)], collapse = ", "), 
            " were exluded from search since they are not present in the Gene Index!"))
        gene_list <- gene_list[gene_list %in% rownames(gene_index)]
    }
    
    if (length(gene_list) == 0) {
        stop("None of the genes in the gene_list are present in the gene_index!")
    }
    
    gene_index <- gene_index[gene_list, ]
    lambda <- 2 * log(apply(gene_index, 2, prod)/p0^(nrow(gene_index)))
    lambda[is.na(lambda)] <- NA
    lambda[is.infinite(lambda)] <- NA
    p_values <- pchisq(lambda, length(gene_list), lower.tail = FALSE)
    return(p_values)
}

#' @rdname findCellType
#' @aliases findCellType
setMethod("findCellType", "data.frame", findCellType.data.frame)

#' Build a cell Index
#' 
#' Creates a compressed cell Index
#'
#' @param object object of SingleCellExperiment class
#' containing the cell classification information
#' @param cell_type_column column name in the colData slot of the object SingleCellExperiment 
#' containing the cell classification information
#' 
#' @name buildCellIndex
#'
#' @return a `data.frame` containing calculated gene index
#' @useDynLib scfind
#' @importFrom hash hash
#' @importFrom bit as.bit
#' @importFrom SingleCellExperiment logcounts
#' @importFrom SummarizedExperiment colData
#' @import Rcpp
buildCellIndex.SCESet <- function(object, cell_type_column) {
    if (is.null(object)) {
        stop("Please define a object using the `object` parameter!")
    }
    if (is.null(colData(object)[[cell_type_column]])) {
        stop("Please define a correct `cell_type_column` in the `colData` slot!")
    }
    gene_exprs <- logcounts(object) > 0
    l <- as.numeric(floor(log2(ncol(object)/rowSums(gene_exprs))))
    filter <- !is.infinite(l)
    l <- l[filter]
    gene_exprs <- gene_exprs[filter, ]
    inds <- lapply(apply(gene_exprs, 1, which), as.numeric)
    p0 <- vapply(unique(colData(object)[[cell_type_column]]), function(ct) {
        sum(gene_exprs[, colData(object)[[cell_type_column]] == ct])/
        (nrow(gene_exprs) * length(which(colData(object)[[cell_type_column]] == ct)))
    }, numeric(1))
    names(p0) <- unique(colData(object)[[cell_type_column]])
    f_symbs <- rowData(object)$feature_symbol[filter]
    rownames(gene_exprs) <- f_symbs
    codes <- eliasFanoCoding(inds, l)
    res <- Map(list, H = lapply(codes$H, as.bit), L = lapply(codes$L, as.bit), l = l)
    index <- hash(f_symbs, res)
    return(list(index = index, cell_types = colData(object)[[cell_type_column]], p0 = p0))
}

#' @rdname buildCellIndex
#' @aliases buildCellIndex
#' @importFrom SingleCellExperiment SingleCellExperiment
setMethod("buildCellIndex", "SingleCellExperiment", buildCellIndex.SCESet)

#' Find cells associated with a given gene list
#' 
#' Calculates p-values of a log-likelihood of a list of genes to be associated
#' with each cell type. Log-likelihood is based on gene expression values.
#'
#' @param input object of SingleCellExperiment class
#' @param genelist column name in the colData slot of the object SingleCellExperiment 
#' containing the cell classification information
#' 
#' @name findCell
#'
#' @return a `list` containing calculated gene index
#' @useDynLib scfind
#' @import Rcpp
findCell.SCESet <- function(input, genelist) {
    if (is.null(input)) {
        stop("Please define an input parameter!")
    }
    if (is.null(genelist)) {
        stop("Please define a list of genes using the `genelist` parameter!")
    }
    if (!"list" %in% is(input)) {
        stop("The gene_index must be a list!")
    }
    if (!"character" %in% is(genelist)) {
        stop("The genelist must be a character vector!")
    }
    inds <- list()
    for (i in genelist) {
        tmp <- eliasFanoDecoding(as.numeric(input$index[[i]]$H), as.numeric(input$index[[i]]$L), 
            input$index[[i]]$l)
        inds[[i]] <- tmp
    }
    common_exprs_cells <- data.frame(cell_id = Reduce(intersect, inds), cell_type = input$cell_types[Reduce(intersect, 
        inds)])
    cell_types_p <- sapply(sapply(inds, function(x) {
        factor(input$cell_types[x], levels = unique(input$cell_types))
    }, simplify = FALSE), table)/as.vector(table(factor(input$cell_types, levels = unique(input$cell_types))))
    
    # log-likelihood with chi-squared distribution
    lambda <- 2 * log(apply(cell_types_p, 1, prod)/(input$p0)^(length(genelist)))
    lambda[is.na(lambda)] <- NA
    lambda[is.infinite(lambda)] <- NA
    p_values <- pchisq(lambda, length(genelist), lower.tail = FALSE)
    return(list(p_values = p_values, common_exprs_cells = common_exprs_cells))
}

#' @rdname findCell
#' @aliases findCell
setMethod("findCell", "list", findCell.SCESet)

Any scripts or data that you put into this service are public.

scfind documentation built on April 28, 2020, 7:01 p.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

scfind
A search tool for single cell RNA-seq data by gene lists

R/CoreMethods.R
In scfind: A search tool for single cell RNA-seq data by gene lists

Defines functions findCell.SCESet buildCellIndex.SCESet findCellType.data.frame buildCellTypeIndex.SCESet

Documented in buildCellIndex.SCESet buildCellTypeIndex.SCESet findCell.SCESet findCellType.data.frame

Try the scfind package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

scfind A search tool for single cell RNA-seq data by gene lists

R/CoreMethods.R In scfind: A search tool for single cell RNA-seq data by gene lists

Defines functions findCell.SCESet buildCellIndex.SCESet findCellType.data.frame buildCellTypeIndex.SCESet

Documented in buildCellIndex.SCESet buildCellTypeIndex.SCESet findCell.SCESet findCellType.data.frame

Try the scfind package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

scfind
A search tool for single cell RNA-seq data by gene lists

R/CoreMethods.R
In scfind: A search tool for single cell RNA-seq data by gene lists