R/HiCDCPlus.R

Defines functions HiCDCPlus

Documented in HiCDCPlus

#' HiCDCPlus
#'
#' This function finds significant interactions in a HiC-DC readable matrix
#' and expresses statistical significance of counts through the following:
#' 'pvalue': significance \emph{P}-value, 'qvalue': FDR corrected 
#' \emph{P}-value, mu': expected counts, 'sdev': modeled standard deviation
#' of expected counts.
#'@import BSgenome splines
#'@importFrom dplyr %>%
#'@importFrom rlang .data
#'@importFrom S4Vectors mcols<- mcols queryHits subjectHits
#'@param gi_list List of \code{GenomicInteractions} objects where each object
#'named with chromosomes contains intrachromosomal interaction information
#'(minimally containing counts and genomic distance in 
#'\code{mcols(gi_list[[1]])}---see \code{?gi_list_validate} for a detailed 
#'explanation of valid \code{gi_list} instances). 
#'@param covariates covariates to be considered in addition to genomic
#'distance D. Defaults to all covariates besides 
#''D','counts','mu','sdev',pvalue','qvalue' in \code{mcols(gi_list[[1]])}
#'@param chrs select a subset of chromosomes' e.g.,
#'c('chr21','chr22'). Defaults to all chromosomes
#'in the \code{gi_list}.
#'@param distance_type distance covariate form: 'spline' or 'log'.
#'Defaults to 'spline'.
#'@param model_distribution 'nb' uses a Negative Binomial model, 'nb_vardisp' uses a 
#' Negative Binomial model with a distance specific dispersion parameter inferred 
#' from the data, 'nb_hurdle' uses the legacy HiCDC model.
#'@param binned TRUE if uniformly binned or FALSE if binned by
#'restriction enzyme fragment cutsites
#'@param df degrees of freedom for the genomic distance spline
#'function if \code{distance_type='spline'}. Defaults to 6, which corresponds to
#'a cubic spline as explained in Carty et al. (2017)
#'@param Dmin minimum distance (included) to check for significant interactions,
#' defaults to 0
#'@param Dmax maximum distance (included) to check for significant interactions,
#' defaults to 2e6 or maximum in the data; whichever is minimum.
#'@param ssize Distance stratified sampling size. Can decrease for
#' large chromosomes. Increase recommended if
#' model fails to converge. Defaults to 0.01.
#'@param splineknotting Spline knotting strategy. Either "uniform", uniformly
#'spaced in distance, or placed based on distance distribution of counts 
#'"count-based" (i.e., more closely spaced where counts are more dense).
#'@param model_filepath Outputs fitted HiC-DC model object as an .rds
#'file per chromosome. Defaults to NULL (no output).
#'@return A valid \code{gi_list} instance with additional \code{mcols(.)} for
#'each chromosome: pvalue': significance \emph{P}-value, 'qvalue': FDR 
#'corrected \emph{P}-value, mu': expected counts, 'sdev': modeled standard
#'deviation of expected counts.
#'@examples gi_list<-generate_binned_gi_list(50e3,chrs='chr22')
#'gi_list<-add_hic_counts(gi_list,
#'hic_path<-system.file("extdata", "GSE63525_HMEC_combined_example.hic",
#' package = "HiCDCPlus"))
#'gi_list<-HiCDCPlus(gi_list)
#'@export

HiCDCPlus <- function(gi_list, covariates = NULL, chrs = NULL, distance_type = "spline", model_distribution = "nb", binned = TRUE, 
    df = 6, Dmin = 0, Dmax = 2e+06, ssize = 0.01, splineknotting = "uniform", model_filepath = NULL) {
    options(scipen = 9999, digits = 4)
    gi_list_validate(gi_list)
    if (is.null(chrs)) chrs <- names(gi_list)
    # check if D and counts exist on each chromosome
    if (!(all(vapply(gi_list, function(x) sum(colnames(mcols(x)) %in% c("counts", "D")) == 2, TRUE)))) {
        stop("Some gi_list elements do not contain pairwise distances D
    and counts, the minimum set of features needed for HiC-DC+ modeling.")
    }
    for (chrom in chrs) {
        gi_list[[chrom]] <- HiCDCPlus_chr(gi_list[[chrom]], covariates = covariates, distance_type = distance_type, model_distribution = model_distribution, binned = binned, 
                                  df = df, Dmin = Dmin, Dmax = Dmax, ssize = ssize, splineknotting = splineknotting, model_filepath=model_filepath)
    }
    return(gi_list)
}
mervesa/HiCDCPlus documentation built on June 8, 2022, 3:43 a.m.