R/lbic_model.R

Defines functions lbic_model

Documented in lbic_model

#' lbic_model
#'
#' This function is used to select the best fit model for each gene based on the
#' least BIC value
#'
#' @param bic.value A dataframe of BIC values from fitting GLM using
#' error distributions P, NB, ZIP, ZINB; the output from \code{model_bic}.
#'
#' @param counts A non-negative integer matrix of scRNA-seq filtered read counts
#' containing genes belonging to the family of ZINB distributions selected from
#' \code{ks_test}.
#' The rows of the matrix are genes and columns are samples/cells.
#'
#' @export
#'
#' @import magrittr
#'
#' @return A list of genes chosen to be following one of the 4 distributions
#' P, NB, ZIP, ZINB based on the least BIC value and the corresponding subset
#' of counts from \code{filter_counts}
#'
#' @examples
#'
#' data(scData)
#'
#' # apply the lbic_model function to select the model with the least
#' # BIC value on the matrix of BIC values obtained after running
#' # model_bic function.
#'
#' library(BiocParallel)
#' scData_models <- fit_models(counts=scData$counts, cexpr=scData$covariates, lib.size=scData$lib_size,
#' BPPARAM=bpparam())
#'
#' scData_bicvals <- model_bic(scData_models)
#'
#' scData_least.bic <- lbic_model(scData_bicvals, scData$counts)


lbic_model <- function(bic.value, counts){

  #subset counts that passed the KS test and was passed on to calculate the BIC
  counts_bic <- counts[rownames(counts) %in% rownames(bic.value),]
  counts_list <- lapply(as.list(seq_len(1):dim(counts_bic)[1]), function(x) counts_bic[x[1],])
  names(counts_list) <- rownames(counts_bic)

  #return column name of min value for each row
  BIC_colmax <- colnames(bic.value)[apply(bic.value,1,function(x) which.min(x[x>0]))]

  #create data frame
  BIC_dataframe <- as.data.frame(BIC_colmax)

  #Create data frame of minimum BIC values with gene names
  BIC_genename <- as.data.frame(BIC_dataframe)
  row.names(BIC_genename) <- rownames(bic.value)

  #grep the names of genes following each distribution
  poi_genes <- rownames(BIC_genename)[grep(BIC_genename[,1], pattern = "^P_bic")]
  nb_genes <- rownames(BIC_genename)[grep(BIC_genename[,1], pattern = "^NB_bic")]
  zip_genes <- rownames(BIC_genename)[grep(BIC_genename[,1], pattern = "^ZIP_bic")]
  zinb_genes <- rownames(BIC_genename)[grep(BIC_genename[,1], pattern = "^ZINB_bic")]

  least_bic <- list()

  least_bic[["P"]] <- counts_list[names(counts_list) %in% poi_genes]
  least_bic[["NB"]] <- counts_list[names(counts_list) %in% nb_genes]
  least_bic[["ZIP"]] <- counts_list[names(counts_list) %in% zip_genes]
  least_bic[["ZINB"]] <- counts_list[names(counts_list) %in% zinb_genes]

  return(least_bic)
}
Malindrie/scShapes documentation built on Nov. 21, 2022, 8:58 a.m.