R/heuristicAnalysis.R

Defines functions getSequencingBias plotIntensityDistribution getIntensityThreshold

Documented in getIntensityThreshold getSequencingBias plotIntensityDistribution

#' @title \code{getIntensityThreshold}
#'
#' @description \code{getIntensityThreshold} takes the output of peak calling with
#'   callOpenTiles and creates sample-tile matrices containing the signal
#'   intensity at each tile.
#'
#' @param TSAM a SummarizedExperiment object generated by MOCHA
#' @param cellPopulations vector of strings. Cell subsets found in the TSAM, or the word 'All' if all should be used.
#' @param type string. Describes the type of metric to be used. Options include median or mean.
#' @param returnPlots Boolean. Default is TRUE and returns a plot of 
#' @param verbose Set TRUE to display additional messages. Default is FALSE.
#'
#' @return plot object
#'
#' 
getIntensityThreshold <- function(TSAM, cellPopulations = 'all', type = 'mean', returnPlots = TRUE, verbose = FALSE){
    if(!requireNamespace("mixtools", quietly = TRUE)){
      stop("Package `mixtools` not found. Please install `mixtools` to use MOCHA::getIntensityThreshold")
    }

    allMat <- SummarizedExperiment::assays(TSAM)

    if(tolower(cellPopulations) == 'all'){

        combMat <- do.call('cbind', as.list(allMat))

    }else if(all(cellPopulations %in% names(allMat))){

        combMat <- do.call('cbind', as.list(allMat[names(allMat) %in% cellPopulations]))
    }else{

        stop('Cell populations were not all found within the MOCHA object.')
    }

    combMat[is.na(combMat)] <- 0

    ZeroSummary <- rowSums(is.na(combMat))
    

    if(tolower(type) == 'mean'){

        mean_intensities = rowMeans(combMat)

        unmixedMat = mixtools::normalmixEM(mean_intensities,
            lambda=0.5, sigma=1)

       


    }else if(tolower(type) == 'median'){

            median_intensities = matrixStats::rowMedians(combMat)

            unmixedMat = mixtools::normalmixEM(median_intensities,
                            lambda=0.5, sigma=1)

    }else{

        stop('type is not mean or median.')
    }

    if(returnPlots){
        plot(unmixedMat, density=T, breaks=200)
    }else{

        return(unmixedMat)
    }

}




#' @title \code{plotIntensityDistribution}
#' @description \code{plotIntensityDistribution}  Plots the distribution of sample-tile intensities for a give cell type
#'
#' @param TSAM_object  SummarizedExperiment from getSampleTileMatrix
#' @param cellPopulation Cell type names (assay name) within the TSAM_object
#' @param returnDF Boolean. Determines whether or not to return a DataFrame instead of a plot. 
#' @return data.frame or ggplot histogram. 
#'
#' @export

plotIntensityDistribution <- function(TSAM_object, cellPopulation, returnDF = FALSE){

     values <- NULL
  mat <- unlist(as.data.frame(log2(getCellPopMatrix(TSAM_object, cellPopulation)+1)))
  plotMat <- data.frame(Values = mat)

  if(returnDF){
    return(plotMat)
  }

  p1 <- ggplot2::ggplot(plotMat, ggplot2::aes(x = Values)) + ggplot2::geom_histogram() + ggplot2::theme_bw() 
  return(p1)

}



#' @title \code{getSequencingBias}
#'
#' @description \code{getSequencingBias} takes the output of peak calling with
#'   callOpenTiles and creates sample-tile matrices containing the signal
#'   intensity at each tile.
#'
#' @param SampleTileObj a SummarizedExperiment object generated by MOCHA
#' @param cellPopulations vector of strings. Cell subsets found in the TSAM, or the word 'All' if all should be used.
#' @param cellPopulation A string denoting the cell population of interest
#' @param groupColumn The column containing sample group labels
#' @param foreground The foreground group of samples for differential comparison
#' @param background The background group of samples for differential comparison
#' @param verbose Set TRUE to display additional messages. Default is FALSE.
#'
#' @return plot object
#'
#' 
getSequencingBias <- function(SampleTileObj, cellPopulations = 'all',
                                           cellPopulation,
                                           groupColumn,
                                           foreground,
                                           background,
                                           verbose = TRUE){

    if (!any(names(SummarizedExperiment::assays(SampleTileObj)) %in% cellPopulation)) {
        stop("cellPopulation was not found within SampleTileObj. Check available cell populations with `colData(SampleTileObj)`.")
    }

    metaFile <- SummarizedExperiment::colData(SampleTileObj)

    if (!(groupColumn %in% colnames(metaFile))) {
        stop(stringr::str_interp("Provided groupCol '{groupColumn}' not found in the provided SampleTileObj"))
    }
    if (!(foreground %in% metaFile[[groupColumn]])) {
        stop(stringr::str_interp("Provided foreground value is not present in the column {groupColumn} in the provided SampleTileObj"))
    }
    if (!(background %in% metaFile[[groupColumn]])) {
        stop(stringr::str_interp("Provided background value is not present in the column {groupColumn} in the provided SampleTileObj"))
    }

    # Get group labels
    foreground_samples <- metaFile[metaFile[, groupColumn] == foreground, "Sample"]
    background_samples <- metaFile[metaFile[, groupColumn] == background, "Sample"]

    # nFrags <- SampleTileObj@metadata$summarizedData$FragmentCounts
    nFrags <- SummarizedExperiment::assays(SampleTileObj@metadata$summarizedData)$FragmentCounts

    medianList <- c(stats::median(nFrags[foreground_samples,cellPopulations]), stats::median(nFrags[background_samples,cellPopulations]))

    return(min(medianList)/max(medianList))

}

Try the MOCHA package in your browser

Any scripts or data that you put into this service are public.

MOCHA documentation built on May 29, 2024, 2:25 a.m.