R/runspatialdecon.R

#' Run spatialdecon
#' 
#' Runs spatialdecon from an S4 object
#' @param object An S4 object such as a Seurat object that includse a "Spatial" element in the "assays" slot or a GeoMxSet object
#' @param ... Arguments passed to spatialdecon
#' @return decon results in list or in GeoMxSet object
#' 
setGeneric("runspatialdecon", signature = "object",
           function(object, ...) standardGeneric("runspatialdecon"))

#' Run spatialdecon on a NanostringGeomxSet object
#'
#' A wrapper for applying spatialdecon to a NanostringGeomxSet object.
#'
#' @param norm_elt normalized data element in assayData in NanostringGeomxSet object
#' @param raw_elt raw data element in assayData in NanostringGeomxSet object
#' @param X Cell profile matrix. If NULL, the safeTME matrix is used.
#' @param wts Optional, a matrix of weights.
#' @param resid_thresh A scalar, sets a threshold on how extreme individual data
#'  points' values
#'  can be (in log2 units) before getting flagged as outliers and set to NA.
#' @param lower_thresh A scalar. Before log2-scale residuals are calculated,
#'  both observed and fitted
#'  values get thresholded up to this value. Prevents log2-scale residuals from
#'  becoming extreme in
#'  points near zero.
#' @param align_genes Logical. If TRUE, then Y, X, bg, and wts are row-aligned
#'  by shared genes.
#' @param is_pure_tumor A logical vector denoting whether each AOI consists of
#'  pure tumor. If specified,
#'  then the algorithm will derive a tumor expression profile and merge it with
#'  the immune profiles matrix.
#' @param cell_counts Number of cells estimated to be within each sample. If
#' provided alongside norm_factors,
#'  then the algorithm will additionally output cell abundance esimtates on the
#'  scale of cell counts.
#' @param cellmerges A list object holding the mapping from beta's cell names to
#'  combined cell names. If left
#'  NULL, then defaults to a mapping of granular immune cell definitions to
#'   broader categories.
#' @param n_tumor_clusters Number of tumor-specific columns to merge into the
#' cell profile matrix.
#'  Has an impact only when is_pure_tumor argument is used to indicate pure
#'   tumor AOIs.
#'  Takes this many clusters from the pure-tumor AOI data and gets the average
#'  expression profile in each cluster.  Default 10.
#' @param maxit Maximum number of iterations. Default 1000.

#' @importFrom Biobase assayData
#' @importClassesFrom GeomxTools NanoStringGeoMxSet
#'
#' @return For GeoMxSet object, if not given cellmerges and cell_counts, a valid GeoMx S4 object
#' including the following items
#'  \itemize{
#'    \item In pData
#'    \itemize{
#'       \item beta: matrix of cell abundance estimates, cells in rows and
#'                observations in columns
#'       \item p: matrix of p-values for H0: beta == 0
#'       \item t: matrix of t-statistics for H0: beta == 0
#'       \item se: matrix of standard errors of beta values
#'       \item prop_of_all: rescaling of beta to sum to 1 in each observation
#'       \item prop_of_nontumor: rescaling of beta to sum to 1 in each observation,
#'                            excluding tumor abundance estimates
#'       \item sigmas: covariance matrices of each observation's beta estimates
#'      }
#'    \item In assayData
#'    \itemize{
#'       \item yhat: a matrix of fitted values
#'       \item resids: a matrix of residuals from the model fit.
#'                  (log2(pmax(y, lower_thresh)) - log2(pmax(xb, lower_thresh))).
#'    }
#'    \item In experimentData
#'    \itemize{
#'       \item SpatialDeconMatrix: the cell profile matrix used in the decon fit.
#'    }
#'}
#'
#'
#' if given cellmerges, the valid GeoMx S4 object will additionally include
#' the following items
#'  \itemize{
#'    \item In pData
#'    \itemize{
#'       \item beta.granular: cell abundances prior to combining closely-related
#'                         cell types
#'       \item sigma.granular: sigmas prior to combining closely-related cell types
#'    }
#'}
#'
#' if given cell_counts, the valid GeoMx S4 object will additionally include
#'  the following items
#'  \itemize{
#'  \item In pData
#'    \itemize{
#'       \item cell.counts: beta rescaled to estimate cell numbers, based on
#'                       prop_of_all and nuclei count
#'    }
#'}
#'
#' if given both cellmerges and cell_counts, the valid GeoMx S4 object will
#'  additionally include the following items
#'  \itemize{
#'  \item In pData
#'    \itemize{
#'       \item cell.counts.granular: cell.counts prior to combining closely-related
#'                                cell types
#'    }
#'}
#'
#' @examples
#'
#' ## GeoMxSet Object ##
#' library(GeomxTools)
#' datadir <- system.file("extdata", "DSP_NGS_Example_Data", package = "GeomxTools")
#' demoData <- readRDS(file.path(datadir, "/demoData.rds"))
#' 
#' demoData <- shiftCountsOne(demoData)
#' target_demoData <- aggregateCounts(demoData)
#' 
#' target_demoData <- normalize(target_demoData, "quant")
#' 
#' demoData <- runspatialdecon(object = target_demoData, 
#'                             norm_elt = "exprs_norm",
#'                             raw_elt = "exprs")
#'
#' @export
#' @rdname runspatialdecon

setMethod("runspatialdecon", "NanoStringGeoMxSet",
          function(object, X = NULL, norm_elt = NULL, raw_elt = NULL,
                   wts = NULL,
                   resid_thresh = 3, lower_thresh = 0.5,
                   align_genes = TRUE,
                   is_pure_tumor = NULL, n_tumor_clusters = 10,
                   cell_counts = NULL,
                   cellmerges = NULL,
                   maxit = 1000){
            
            # check that norm and raw data exists:
            if(is.null(norm_elt)){
              stop("norm_elt must be set")
            }
            if(is.null(raw_elt)){
              stop("raw_elt must be set")
            }
            if (!is.element(norm_elt, names(object@assayData))) {
              stop(paste(norm_elt, "is not an element in assaysData slot"))
            }
            if (!is.element(raw_elt, names(object@assayData))) {
              stop(paste(raw_elt, "is not an element in assaysData slot"))
            }
            
            # prep components:
            norm <- as.matrix(Biobase::assayDataElement(object , elt = norm_elt))
            raw <- as.matrix(Biobase::assayDataElement(object , elt = raw_elt))
            
            # estimate background
            bg <- derive_GeoMx_background(norm = norm,
                                          # access the probe pool information from the feature metadata
                                          probepool = Biobase::fData(object)$Module,
                                          # access the names of the negative control probes
                                          negnames = Biobase::fData(object)$TargetName[Biobase::fData(object)$Negative])
            
            # run spatialdecon:
            result <- spatialdecon(norm = norm,
                                   bg = bg,
                                   X = X,
                                   raw = raw,
                                   wts = wts,
                                   resid_thresh = resid_thresh, 
                                   lower_thresh = lower_thresh,
                                   align_genes = align_genes,
                                   is_pure_tumor = is_pure_tumor, 
                                   n_tumor_clusters = n_tumor_clusters,
                                   cell_counts = cell_counts,
                                   cellmerges = cellmerges,
                                   maxit = maxit)
            
            
            # append results to the object
            # beta
            Biobase::pData(object)[["beta"]] <- matrix(NA, nrow = ncol(object), ncol = nrow(result$beta),
                                                       dimnames = list(Biobase::sampleNames(object), rownames(result$beta)))
            Biobase::pData(object)[["beta"]][colnames(result$beta), ] <- t(result$beta)
            
            
            # p
            Biobase::pData(object)[["p"]] <- matrix(NA, nrow = ncol(object), ncol = nrow(result$p),
                                                    dimnames = list(Biobase::sampleNames(object), rownames(result$p)))
            Biobase::pData(object)[["p"]][colnames(result$p), ] <- t(result$p)
            
            
            # t
            Biobase::pData(object)[["t"]] <- matrix(NA, nrow = ncol(object), ncol = nrow(result$t),
                                                    dimnames = list(Biobase::sampleNames(object), rownames(result$t)))
            Biobase::pData(object)[["t"]][colnames(result$t), ] <- t(result$t)
            
            
            # se
            Biobase::pData(object)[["se"]] <- matrix(NA, nrow = ncol(object), ncol = nrow(result$se),
                                                     dimnames = list(Biobase::sampleNames(object), rownames(result$se)))
            Biobase::pData(object)[["se"]][colnames(result$se), ] <- t(result$se)
            
            
            # prop_of_all
            Biobase::pData(object)[["prop_of_all"]] <- matrix(NA, nrow = ncol(object), ncol = nrow(result$prop_of_all),
                                                              dimnames = list(Biobase::sampleNames(object), rownames(result$prop_of_all)))
            Biobase::pData(object)[["prop_of_all"]][colnames(result$prop_of_all), ] <- t(result$prop_of_all)
            
            
            # prop_of_nontumor
            Biobase::pData(object)[["prop_of_nontumor"]] <- matrix(NA, nrow = ncol(object), ncol = nrow(result$prop_of_nontumor),
                                                                   dimnames = list(Biobase::sampleNames(object), rownames(result$prop_of_nontumor)))
            Biobase::pData(object)[["prop_of_nontumor"]][colnames(result$prop_of_nontumor), ] <- t(result$prop_of_nontumor)
            
            
            # add yhat matrix
            yhat_mat <- matrix(NA, nrow = nrow(object), ncol = ncol(object),
                               dimnames = dimnames(object))
            yhat_mat[rownames(result$yhat), colnames(result$yhat)] <- result$yhat
            Biobase::assayDataElement(object, "yhat") <- yhat_mat
            
            
            # add resids matrix
            resids_mat <- matrix(NA, nrow = nrow(object), ncol = ncol(object),
                                 dimnames = dimnames(object))
            resids_mat[rownames(result$resids), colnames(result$resids)] <- result$resids
            Biobase::assayDataElement(object, "resids") <- resids_mat
            
            
            # add X matrix
            Biobase::fData(object)[["X"]] <- matrix(NA, nrow = nrow(object), ncol = ncol(result$X),
                                                    dimnames = list(Biobase::featureNames(object), colnames(result$X)))
            Biobase::fData(object)[["X"]][rownames(result$X), ] <- result$X
            
            
            # transpose 3-d array
            trans <- function(array){
              NewArray <- array(NA, dim = c(dim(array)[3], dim(array)[2], dim(array)[1]))
              
              for(or in(seq_len(dim(array)[1]))){
                for(oc in(seq_len(dim(array)[2]))){
                  NewArray[,oc,or] <- array[or,oc,]
                }
              }
              array <- NewArray
            }
            
            if (is.null(cellmerges)) {
              # add sigmas matrix
              sigmas_mat <- trans(result$sigmas)
              sigmas_dimname <- list(Biobase::sampleNames(object),
                                     paste0("var", seq_len((dim(result$sigmas)[1]))),
                                     paste0("var", seq_len((dim(result$sigmas)[2]))))
              dimnames(sigmas_mat) <- sigmas_dimname
              
              Biobase::pData(object)[["sigmas"]] <- array(NA, dim = c(ncol(object),
                                                                      dim(result$sigmas)[1],
                                                                      dim(result$sigmas)[2]),
                                                          dimnames = sigmas_dimname)
              Biobase::pData(object)[["sigmas"]][unlist(dimnames(sigmas_mat)[1]),, ] <- sigmas_mat
            } else {
              # add sigma matrix
              sigma_mat <- trans(result$sigma)
              sigma_dimname <- list(Biobase::sampleNames(object),
                                    paste0("var", seq_len((dim(result$sigma)[1]))),
                                    paste0("var", seq_len((dim(result$sigma)[2]))))
              dimnames(sigma_mat) <- sigma_dimname
              
              Biobase::pData(object)[["sigma"]] <- array(NA, dim = c(ncol(object),
                                                                     dim(result$sigma)[1],
                                                                     dim(result$sigma)[2]),
                                                         dimnames = sigma_dimname)
              Biobase::pData(object)[["sigma"]][unlist(dimnames(sigma_mat)[1]),, ] <- sigma_mat
              
              
              # add beta.granular matrix
              Biobase::pData(object)[["beta.granular"]] <- matrix(NA, nrow = ncol(object),
                                                                  ncol = nrow(result$beta.granular),
                                                                  dimnames = list(Biobase::sampleNames(object),
                                                                                  rownames(result$beta.granular)))
              Biobase::pData(object)[["beta.granular"]][colnames(result$beta.granular), ] <- t(result$beta.granular)
              
              
              # add sigma.granular matrix
              sigma.granular_mat <- trans(result$sigma.granular)
              sigma.granular_dimname <- list(Biobase::sampleNames(object),
                                             paste0("var", seq_len((dim(result$sigma.granular)[1]))),
                                             paste0("var", seq_len((dim(result$sigma.granular)[2]))))
              dimnames(sigma.granular_mat) <- sigma.granular_dimname
              
              Biobase::pData(object)[["sigma.granular"]] <- array(NA, dim = c(ncol(object),
                                                                              dim(result$sigma.granular)[1],
                                                                              dim(result$sigma.granular)[2]),
                                                                  dimnames = sigma.granular_dimname)
              Biobase::pData(object)[["sigma.granular"]][unlist(dimnames(sigma.granular_mat)[1]),, ] <- sigma.granular_mat
            }
            
            
            if (length(cell_counts) > 0) {
              if (length(result$cell.counts) == 2){
                # create cell.counts list
                Biobase::pData(object)[["cell.counts"]] <- data.frame(matrix(NA,
                                                                             nrow = ncol(object),
                                                                             ncol = 2,
                                                                             dimnames = list(Biobase::sampleNames(object),
                                                                                             c("cells.per.100", "cell.counts"))))                                                                                           
                # add cells.per.100 matrix
                Biobase::pData(object)[["cell.counts"]][["cells.per.100"]] <- matrix(NA, nrow = ncol(object),
                                                                                     ncol = nrow(result$cell.counts$cells.per.100),
                                                                                     dimnames = list(Biobase::sampleNames(object),
                                                                                                     rownames(result$cell.counts$cells.per.100)))
                Biobase::pData(object)[["cell.counts"]][["cells.per.100"]][colnames(result$cell.counts$cells.per.100), ] <- t(result$cell.counts$cells.per.100)
                
                
                # add cell.counts matrix
                Biobase::pData(object)[["cell.counts"]][["cell.counts"]] <- matrix(NA, nrow = ncol(object),
                                                                                   ncol = nrow(result$cell.counts$cell.counts),
                                                                                   dimnames = list(Biobase::sampleNames(object),
                                                                                                   rownames(result$cell.counts$cell.counts)))
                Biobase::pData(object)[["cell.counts"]][["cell.counts"]][colnames(result$cell.counts$cell.counts), ] <- t(result$cell.counts$cell.counts)
              } else {
                # create cell.counts list
                Biobase::pData(object)[["cell.counts"]] <- data.frame(matrix(NA,
                                                                             nrow = ncol(object),
                                                                             ncol = 1,
                                                                             dimnames = list(Biobase::sampleNames(object),
                                                                                             "cells.per.100")))
                
                # add cells.per.100 matrix
                Biobase::pData(object)[["cell.counts"]][["cells.per.100"]] <- matrix(NA, nrow = ncol(object),
                                                                                     ncol = nrow(result$cell.counts$cells.per.100),
                                                                                     dimnames = list(Biobase::sampleNames(object),
                                                                                                     rownames(result$cell.counts$cells.per.100)))
                Biobase::pData(object)[["cell.counts"]][["cells.per.100"]][colnames(result$cell.counts$cells.per.100), ] <- t(result$cell.counts$cells.per.100)
              }
            }
            
            
            if (length(result$cell.counts.granular) > 0) {
              
              if(length(result$cell.counts.granular) == 2){
                # create cell.counts.granular list
                Biobase::pData(object)[["cell.counts.granular"]] <- data.frame(matrix(NA,
                                                                                      nrow = ncol(object),
                                                                                      ncol = 2,
                                                                                      dimnames = list(Biobase::sampleNames(object),
                                                                                                      c("cells.per.100",
                                                                                                        "cell.counts"))))
                # add cells.per.100 matrix
                Biobase::pData(object)[["cell.counts.granular"]][["cells.per.100"]] <- matrix(NA, nrow = ncol(object),
                                                                                              ncol = nrow(result$cell.counts.granular$cells.per.100),
                                                                                              dimnames = list(Biobase::sampleNames(object),
                                                                                                              rownames(result$cell.counts.granular$cells.per.100)))
                Biobase::pData(object)[["cell.counts.granular"]][["cells.per.100"]][colnames(result$cell.counts.granular$cells.per.100), ] <- t(result$cell.counts.granular$cells.per.100)
                
                
                # add cells.count matrix
                Biobase::pData(object)[["cell.counts.granular"]][["cell.counts"]] <- matrix(NA, nrow = ncol(object),
                                                                                            ncol = nrow(result$cell.counts.granular$cell.counts),
                                                                                            dimnames = list(Biobase::sampleNames(object),
                                                                                                            rownames(result$cell.counts.granular$cell.counts)))
                Biobase::pData(object)[["cell.counts.granular"]][["cell.counts"]][colnames(result$cell.counts.granular$cells.per.100), ] <- t(result$cell.counts.granular$cell.counts)
              } else {
                # create cell.counts.granular list
                Biobase::pData(object)[["cell.counts.granular"]] <- data.frame(matrix(NA,
                                                                                      nrow = ncol(object),
                                                                                      ncol = 1,
                                                                                      dimnames = list(Biobase::sampleNames(object),
                                                                                                      "cells.per.100")))
                # add cells.per.100 matrix
                Biobase::pData(object)[["cell.counts.granular"]][["cells.per.100"]] <- matrix(NA, nrow = ncol(object),
                                                                                              ncol = nrow(result$cell.counts.granular$cells.per.100),
                                                                                              dimnames = list(Biobase::sampleNames(object),
                                                                                                              rownames(result$cell.counts.granular$cells.per.100)))
                Biobase::pData(object)[["cell.counts.granular"]][["cells.per.100"]][colnames(result$cell.counts.granular$cells.per.100), ] <- t(result$cell.counts.granular$cells.per.100)
              }
            }
            
            object@experimentData@other$SpatialDeconMatrix <- result$X
            
            return(object)
            
          })




#' Run spatialdecon on a Seurat object
#' 
#' A wrapper for applying spatialdecon to the Spatial data element in a Seurat object. 
#' Unlike spatialdecon, which expects a normalized data matrix, this function operates 
#' on raw counts. Scaling for total cells 
#' @param X Cell profile matrix. If NULL, the safeTME matrix is used.
#' @param bg Expected background counts. Either a scalar applied equally to 
#'  all points in the count matrix, or a matrix with the same dimensions 
#'  as the count matrix in GetAssayData(object, assay = "Spatial").
#'  Recommended to use a small non-zero value, default of 0.1.
#' @param wts Optional, a matrix of weights.
#' @param resid_thresh A scalar, sets a threshold on how extreme individual data
#'  points' values
#'  can be (in log2 units) before getting flagged as outliers and set to NA.
#' @param lower_thresh A scalar. Before log2-scale residuals are calculated,
#'  both observed and fitted
#'  values get thresholded up to this value. Prevents log2-scale residuals from
#'  becoming extreme in
#'  points near zero.
#' @param align_genes Logical. If TRUE, then Y, X, bg, and wts are row-aligned
#'  by shared genes.
#' @param is_pure_tumor A logical vector denoting whether each AOI consists of
#'  pure tumor. If specified,
#'  then the algorithm will derive a tumor expression profile and merge it with
#'  the immune profiles matrix.
#' @param cell_counts Number of cells estimated to be within each sample. If
#' provided alongside norm_factors,
#'  then the algorithm will additionally output cell abundance esimtates on the
#'  scale of cell counts.
#' @param cellmerges A list object holding the mapping from beta's cell names to
#'  combined cell names. If left
#'  NULL, then defaults to a mapping of granular immune cell definitions to
#'   broader categories.
#' @param n_tumor_clusters Number of tumor-specific columns to merge into the
#' cell profile matrix.
#'  Has an impact only when is_pure_tumor argument is used to indicate pure
#'   tumor AOIs.
#'  Takes this many clusters from the pure-tumor AOI data and gets the average
#'  expression profile in each cluster.  Default 10.
#' @param maxit Maximum number of iterations. Default 1000.
#' @return For Seurat Object, if not given cellmerges and cell_counts, a list
#' including the following items:
#'  \itemize{
#'    \item beta: matrix of cell abundance estimates, cells in rows and
#'                observations in columns
#'    \item p: matrix of p-values for H0: beta == 0
#'    \item t: matrix of t-statistics for H0: beta == 0
#'    \item se: matrix of standard errors of beta values
#'    \item prop_of_all: rescaling of beta to sum to 1 in each observation
#'    \item prop_of_nontumor: rescaling of beta to sum to 1 in each observation,
#'                            excluding tumor abundance estimates
#'    \item yhat: a matrix of fitted values
#'    \item resids: a matrix of residuals from the model fit.
#'                  (log2(pmax(y, lower_thresh)) - log2(pmax(xb, lower_thresh))).
#'    \item X: the cell profile matrix used in the decon fit.
#'    \item sigmas: covariance matrices of each observation's beta estimates
#'}
#'
#'
#' if given cellmerges, the list will additionally include
#' the following items
#'  \itemize{
#'    \item beta.granular: cell abundances prior to combining closely-related
#'                         cell types
#'    \item sigma.granular: sigmas prior to combining closely-related cell types
#'}
#'
#' if given cell_counts, the list will additionally include
#'  the following items
#'  \itemize{
#'    \item cell.counts: beta rescaled to estimate cell numbers, based on
#'                       prop_of_all and nuclei count
#'}
#'
#' if given both cellmerges and cell_counts, the list will
#'  additionally include the following items
#'  \itemize{
#'    \item cell.counts.granular: cell.counts prior to combining closely-related
#'                                cell types
#'}
#' @importFrom SeuratObject GetAssayData
#' @importClassesFrom GeomxTools NanoStringGeoMxSet
#' @export
#' @examples
#' ## Seurat Object ##
#' # get dataset
#' con <- gzcon(url("https://github.com/almaan/her2st/raw/master/data/ST-cnts/G1.tsv.gz"))
#' txt <- readLines(con)
#' temp <- read.table(textConnection(txt), sep = "\t", header = TRUE, row.names = 1)
#' # parse data
#' raw = t(as.matrix(temp))
#' norm = sweep(raw, 2, colSums(raw), "/") * mean(colSums(raw))
#' x = as.numeric(substr(rownames(temp), 1, unlist(gregexpr("x", rownames(temp))) - 1))
#' y = -as.numeric(substr(rownames(temp), 
#'                  unlist(gregexpr("x", rownames(temp))) + 1, nchar(rownames(temp))))
#' # put into a seurat object:
#' andersson_g1 = SeuratObject::CreateSeuratObject(counts = raw, assay="Spatial")
#' andersson_g1@meta.data$x = x
#' andersson_g1@meta.data$y = y
#' 
#' res <- runspatialdecon(andersson_g1)
#' str(res)
#' @rdname runspatialdecon
setMethod("runspatialdecon", "Seurat", function(
  object,
  X = NULL,
  bg = 0.1,  
  wts = NULL,
  resid_thresh = 3, lower_thresh = 0.5,
  align_genes = TRUE,
  is_pure_tumor = NULL, 
  n_tumor_clusters = 10,
  cell_counts = NULL,
  cellmerges = NULL,
  maxit = 1000) {
  
  # check that it's a spatial assay:
  if (!is.element("Spatial", names(object@assays))) {
    stop("Expecting \'Spatial\' element in assays slot")
  }
  
  # prep components:
  raw <- as.matrix(SeuratObject::GetAssayData(object, assay = "Spatial"))
  # make bg a matrix if only a scalar was specified:
  if (length(bg) == 1) {
    bg <- 0 * raw + bg
  }
  
  # run spatialdecon:
  res <- spatialdecon(norm = raw, 
                      bg = bg, 
                      X = X,
                      raw = raw, 
                      wts = wts,
                      resid_thresh = resid_thresh, 
                      lower_thresh = lower_thresh,
                      align_genes = align_genes,
                      is_pure_tumor = is_pure_tumor, 
                      n_tumor_clusters = n_tumor_clusters,
                      cell_counts = cell_counts,
                      cellmerges = cellmerges,
                      maxit = maxit) 
  
  return(res)
})
Nanostring-Biostats/SpatialDecon documentation built on Jan. 26, 2024, 8:20 p.m.