R/bisque.R

Defines functions .bisque_patched_deconvolution .bisque_patched_model deconvolute_bisque build_model_bisque

Documented in .bisque_patched_deconvolution .bisque_patched_model build_model_bisque deconvolute_bisque

#' No model is build as Bisque does both steps in one.
#'
#' Please use the deconvolute method with your single cell and bulk rna seq data to use Bisque
#'
#'
#' @return NULL.
#'
#' @export
build_model_bisque <- function() {
  message(
    "The deconvolution with Bisque is done in only one step. Please just use the ",
    "deconvolute method."
  )

  return(NULL)
}

#' Calculates the decomposition using the bisque algorithm
#' IMPORTANT: No model is needed. Everything is done inside this method.
#'
#' Generates a reference profile based on single-cell data. Learns a
#' transformation of bulk expression based  on  observed  single-cell
#' proportions  and  performs  NNLS  regression  on  these  transformed
#' values to estimate cell proportions.
#'
#' @param bulk_gene_expression A matrix of bulk data. Rows are genes, columns are samples.
#'   Row and column names need to be set.
#' @param single_cell_object A matrix with the single-cell data. Rows are genes, columns are
#'   samples. Row and column names need to be set.
#' @param cell_type_annotations A vector of the cell type annotations. Has to be in the same order
#'   as the samples in single_cell_object.
#' @param batch_ids A vector of the ids of the samples or individuals.
#' @param markers Structure, such as character vector, containing marker genes
#'   to be used in decomposition. `unique(unlist(markers))` should
#'   return a simple vector containing each gene name. If no argument or NULL
#'   provided, the method will use all available genes for decomposition.
#' @param cell_types Character string. Name of phenoData attribute in sc.eset
#'   indicating cell type label for each cell.
#' @param subject_names Character string. Name of phenoData attribute in sc.eset
#'   indicating individual label for each cell.
#' @param use_overlap Boolean. Whether to use and expect overlapping samples
#'   in decomposition.
#' @param old_cpm Prior to version 1.0.4 (updated in July 2020), the package
#'   converted counts to CPM after subsetting the marker genes. Github user
#'   randel pointed out that the order of these operations should be switched.
#'   Thanks randel! This option is provided for replication of older BisqueRNA
#'   but should be enabled, especially for small marker gene sets.
#'   We briefly tested this change on the cortex and adipose datasets.
#'   The original and new order of operations produce estimates that have an
#'   average correlation of 0.87 for the cortex and 0.84 for the adipose within
#'   each cell type.
#' @param verbose Whether to produce an output on the console.
#'
#' @return A list including:
#' \item{bulk_props}{A matrix of cell type proportion estimates with cell types as rows and
#'   individuals as columns.}
#' \item{sc_props}{A matrix of cell type proportions estimated directly from counting single-cell
#'   data.}
#' \item{rnorm}{Euclidean norm of the residuals for each individual's proportion estimates.}
#' \item{genes_used}{A vector of genes used in decomposition.}
#' \item{transformed_bulk}{The transformed bulk expression used for decomposition. These values
#'   are generated by applying a linear transformation to the CPM expression.}
#'
#' @export
#'
deconvolute_bisque <- function(bulk_gene_expression, single_cell_object, cell_type_annotations,
                               batch_ids, markers = NULL, cell_types = "cellType",
                               subject_names = "batchId", use_overlap = FALSE, verbose = FALSE,
                               old_cpm = TRUE) {
  if (is.null(bulk_gene_expression)) {
    stop("Parameter 'bulk_gene_expression' is missing or null, but it is required.")
  }
  if (is.null(single_cell_object)) {
    stop("Parameter 'single_cell_object' is missing or null, but it is required.")
  }
  if (is.null(cell_type_annotations)) {
    stop("Parameter 'cell_type_annotations' is missing or null, but it is required.")
  }
  if (is.null(batch_ids)) {
    stop("Parameter 'batch_ids' is missing or null, but it is required.")
  }

  if (ncol(bulk_gene_expression) < 2) {
    stop("Bisque requires at least two bulk samples.")
  }
  sc_eset <- get_single_cell_expression_set(
    single_cell_object, batch_ids,
    rownames(single_cell_object), cell_type_annotations
  )
  bulk_eset <- Biobase::ExpressionSet(assayData = bulk_gene_expression)

  return(
    BisqueRNA::ReferenceBasedDecomposition(bulk_eset, sc_eset,
      markers = markers,
      cell.types = cell_types, subject.names = subject_names,
      use.overlap = use_overlap, verbose = verbose,
      old.cpm = old_cpm
    )
  )
}


#' Calculates the signature model with bisque
#'
#' @param single_cell_object A matrix with the single-cell data. Rows are genes, columns are
#'   samples. Row and column names need to be set.
#' @param cell_type_annotations A Vector of the cell type annotations. Has to be in the same order
#'   as the samples in single_cell_object.
#' @param batch_ids A vector of the ids of the samples or individuals.
#' @param old_cpm Prior to version 1.0.4 (updated in July 2020), the package
#'   converted counts to CPM after subsetting the marker genes. Github user
#'   randel pointed out that the order of these operations should be switched.
#'   Thanks randel! This option is provided for replication of older BisqueRNA
#'   but should be enabled, especially for small marker gene sets.
#'   We briefly tested this change on the cortex and adipose datasets.
#'   The original and new order of operations produce estimates that have an
#'   average correlation of 0.87 for the cortex and 0.84 for the adipose within
#'   each cell type.
#' @param verbose Whether to produce an output on the console.
#'
#' @return The signature matrix. Rows are genes, columns are cell types.
#' @keywords internal
#'
.bisque_patched_model <- function(single_cell_object, cell_type_annotations, batch_ids, old_cpm = TRUE,
                                  verbose = FALSE) {
  if (is.null(single_cell_object)) {
    stop("Parameter 'single_cell_object' is missing or null, but it is required.")
  }
  if (is.null(cell_type_annotations)) {
    stop("Parameter 'cell_type_annotations' is missing or null, but it is required.")
  }
  if (is.null(batch_ids)) {
    stop("Parameter 'batch_ids' is missing or null, but it is required.")
  }
  sc_eset <- get_single_cell_expression_set(
    single_cell_object, batch_ids,
    rownames(single_cell_object), cell_type_annotations
  )

  if (old_cpm) {
    sc_eset <- Biobase::ExpressionSet(
      assayData = Biobase::exprs(sc_eset),
      phenoData = sc_eset@phenoData
    )
  }
  if (verbose) {
    message(
      "Converting single-cell counts to CPM and ",
      "filtering zero variance genes."
    )
  }
  sc_eset <- BisqueRNA:::CountsToCPM(sc_eset)
  if (!old_cpm) {
    sc_eset <- Biobase::ExpressionSet(
      assayData = Biobase::exprs(sc_eset),
      phenoData = sc_eset@phenoData
    )
  }
  sc_eset <- BisqueRNA:::FilterZeroVarianceGenes(sc_eset, verbose)

  return(BisqueRNA::GenerateSCReference(sc_eset, "cellType"))
}



#' Calculates the decomposition using the bisque algorithm
#'
#' Generates a reference profile based on single-cell data. Learns a
#' transformation of bulk expression based  on  observed  single-cell
#' proportions  and  performs  NNLS  regression  on  these  transformed
#' values to estimate cell proportions.
#'
#' @param bulk_gene_expression A matrix of bulk data. Rows are genes, columns are samples.
#'   Row and column names need to be set.
#' @param signature The signature matrix. Rows are genes, columns are cell types.
#' @param single_cell_object A matrix with the single-cell data. Rows are genes, columns are
#'   samples. Row and column names need to be set.
#' @param cell_type_annotations A vector of the cell type annotations. Has to be in the same order
#'   as the samples in single_cell_object.
#' @param batch_ids A vector of the ids of the samples or individuals.
#' @param markers Structure, such as character vector, containing marker genes
#'   to be used in decomposition. `unique(unlist(markers))` should
#'   return a simple vector containing each gene name. If no argument or NULL
#'   provided, the method will use all available genes for decomposition.
#' @param cell_types Character string. Name of phenoData attribute in sc.eset
#'   indicating cell type label for each cell.
#' @param subject_names Character string. Name of phenoData attribute in sc.eset
#'   indicating individual label for each cell.
#' @param use_overlap Boolean. Whether to use and expect overlapping samples
#'   in decomposition.
#' @param old_cpm Prior to version 1.0.4 (updated in July 2020), the package
#'   converted counts to CPM after subsetting the marker genes. Github user
#'   randel pointed out that the order of these operations should be switched.
#'   Thanks randel! This option is provided for replication of older BisqueRNA
#'   but should be enabled, especially for small marker gene sets.
#'   We briefly tested this change on the cortex and adipose datasets.
#'   The original and new order of operations produce estimates that have an
#'   average correlation of 0.87 for the cortex and 0.84 for the adipose within
#'   each cell type.
#' @param verbose Whether to produce an output on the console.
#'
#' @return A list including:
#' \item{bulk_props}{A matrix of cell type proportion estimates with cell types as rows and
#'   individuals as columns.}
#' \item{sc_props}{A matrix of cell type proportions estimated directly from counting single-cell
#'   data.}
#' \item{rnorm}{Euclidean norm of the residuals for each individual's proportion estimates.}
#' \item{genes_used}{A vector of genes used in decomposition.}
#' \item{transformed_bulk}{The transformed bulk expression used for decomposition. These values
#'   are generated by applying a linear transformation to the CPM expression.}
#'
#' @keywords internal
#'
.bisque_patched_deconvolution <- function(bulk_gene_expression, signature, single_cell_object,
                                          cell_type_annotations, batch_ids, markers = NULL,
                                          cell_types = "cellType", subject_names = "batchId",
                                          use_overlap = FALSE, verbose = FALSE, old_cpm = TRUE) {
  if (is.null(bulk_gene_expression)) {
    stop("Parameter 'bulk_gene_expression' is missing or null, but it is required.")
  }
  if (is.null(signature)) {
    stop("Parameter 'signature' is missing or null, but it is required.")
  }
  if (is.null(single_cell_object)) {
    stop("Parameter 'single_cell_object' is missing or null, but it is required.")
  }
  if (is.null(cell_type_annotations)) {
    stop("Parameter 'cell_type_annotations' is missing or null, but it is required.")
  }
  if (is.null(batch_ids)) {
    stop("Parameter 'batch_ids' is missing or null, but it is required.")
  }
  # Method is BisqueRNA::ReferenceBasedDecomposition, I only added the
  # signature matrix (so ones from other method can be used)

  if (ncol(bulk_gene_expression) < 2) {
    stop("Bisque requires at least two bulk samples.")
  }
  sc_eset <- get_single_cell_expression_set(
    single_cell_object, batch_ids,
    rownames(single_cell_object), cell_type_annotations
  )
  bulk_eset <- Biobase::ExpressionSet(assayData = bulk_gene_expression)

  return(
    BisqueRNA::ReferenceBasedDecomposition(bulk_eset, sc_eset,
      markers = markers,
      cell.types = cell_types, subject.names = subject_names,
      use.overlap = use_overlap, verbose = verbose,
      old.cpm = old_cpm
    )
  )
}
PelzKo/immunedeconv2 documentation built on Feb. 12, 2025, 4:16 p.m.