R/disco.R

Defines functions discoPeriodDetection PeriodDetection_range discoODAs discoApp discoBatch

Documented in discoApp discoBatch discoODAs discoPeriodDetection PeriodDetection_range

#' Core DiscoRhythm Workflow
#'
#' Execute the DiscoRhythm workflow with one command to obtain the results
#' of oscillation detection (\code{discoODAs}) and optionally generate an html
#' report with data visualizations from an Rmarkdown template. See the
#' DiscoRhythm vignette for more details on the analysis procedures.
#'
#' @param indata SummarizedExperiment or data.frame, see the vignette for
#' the specific formats expected for each of these input types.
#' \code{discoParseMeta}.
#' @param report character, if \code{!is.null(report)} an html report with
#` outputs of the DiscoRhythm workflow will be rendered in the current working
#` directory using the value as the file name.
#' @param outdata logical, whether to return the final discoODAs (note if run
#' with \code{is.null(report)} discoBatch will return nothing).
#' @param ncores numeric, number of cores to use for parallelized tasks.
#' Currently, only used in oscillation detection function \code{discoODAs}.
#' @param timeType character, nature of the sample times provided
#' (one of \code{"circular"} or \code{"linear"}).
#' @param main_per numeric, the length of the main hypothesized period
#' (e.g. 24hr for circadian experiments). Used in \code{discoPeriodDetection}.
#' @param cor_threshold numeric, threshold used in inter-sample correlation
#' analysis for outlier detection. Either in units of correlation coefficient
#' or standard deviations from the mean (see \code{cor_threshType}).
#' @param cor_method character, which correlation method to use for outlier
#' removal (see \link[stats]{cor} for more details).
#' @param cor_threshType character, one of "sd" or "value" indicating whether
#' cor_threshold should be set by absolute correlation coefficient or by
#' standard deviations from the mean of all samples.
#' @param pca_threshold numeric, the number of standard deviations to set as
#' the threshold for outlier detection in PCA outlier removal.
#' @param pca_scale logical, whether to scale the data prior to PCA.
#' @param pca_pcToCut character, names of which PCs to use for outlier
#' detection (e.g. "PC1","PC2" etc.).
#' @param aov_method character, method to use for ANOVA. One of:
#' "Equal Variance", "Welch", or "None".
#' @param aov_pcut numeric, p-value cutoff used to select rows with
#' statistically significant signal-to-noise.
#' @param aov_Fcut numeric, F-statistic cutoff used to select rows
#' with high signal-to-noise based on magnitude.
#' @param avg_method character, method for averaging technical replicates. One
#' of: "Median","Mean","Random", or "None".
#' @param osc_method character, vector of oscillation detection algorithms
#' to apply to the data. Methods that are detmined to be innappropraite for the
#' experimental design (using the \code{discoODAexclusionMatrix}) will be
#' ignored. If \code{is.null(osc_method)} all
#' suitable methods will be executed.
#' @param osc_period numeric, a fixed period to use for oscillation detection
#' using all methods.
#'
#' @export
#'
#' @return returns the results of \code{discoODAs}
#'
#' @examples
#' indata <- discoGetSimu()
#'
#' # Batch execute (on demo data) to generate a DiscoRhythm_report.html report.
#' # Returns the results of discoODAs
#' discoODAres <- discoBatch(indata,
#' report="DiscoRhythm_report.html",
#' osc_method="CS")
#'
#' @seealso discoODAs, discoRepAnalysis, discoPeriodDetection,
#' discoPCAoutliers, discoInterCorOurliers
#'
#'
discoBatch <- function(indata,
    report = NULL,
    outdata = TRUE,
    ncores = 1,
    # analysis parameters
    timeType = "circular",
    main_per = 24,
    cor_threshold = 3,
    cor_method = "pearson",
    cor_threshType = "sd",
    pca_threshold = 3,
    pca_scale = TRUE,
    pca_pcToCut = paste0("PC",seq_len(4)),
    aov_method = "None",
    aov_pcut = 0.05,
    aov_Fcut = 0,
    avg_method = "Median",
    osc_method = NULL,
    osc_period = 24) {

    if (is.null(report)) {
        batchscript <- system.file("", "DiscoRhythm_batch.R",
                                    package = "DiscoRhythm", mustWork = TRUE)
        source(batchscript, local = TRUE)
    } else {
        visualizationReport <- system.file("", "DiscoRhythm_report.Rmd",
                                            package = "DiscoRhythm",
                                            mustWork = TRUE)
        rmarkdown::render(visualizationReport, output_dir = dirname(report),
                            # avoid writing intermediate files to non-writable
                            # locations (i.e. if DiscoRhythm installed by root)
                            intermediates_dir = dirname(report),
                            output_file = report,clean = TRUE)
    }

    if(outdata){
        # discoODAres generated by either the render() or source() call above
        return(discoODAres)
    } else{
        return()
    }
}

#' Launch the DiscoRhythm Shiny Application
#'
#' This launches the web interface to DiscoRhythm containing all analysis
#' tools. The vignette contains details on usage.
#'
#' @inheritParams discoBatch
#' @param port numeric, port to run the shiny application on. Sets shiny.port
#' option.
#' @param local logical, set to FALSE for public server mode to reduce file
#' size limits.
#'
#' @return Nothing is returned by this function.
#'
#' @examples
#' \dontrun{
#'  discoApp()
#' }
#' @export
#'
discoApp <- function(ncores=1, port=3838, local=TRUE){
    appDir <- system.file("app", "", package = "DiscoRhythm")
    if (appDir == "") {
        stop("Could not find application directory. ",
            "Try re-installing DiscoRhythm.",
            call. = FALSE)
    }
    
    options(shiny.port=port)
    
    # making .discorhythm_ncores and .discorhythm_local available to the 
    # shiny app environment
    .GlobalEnv$.discorhythm_ncores <- ncores
    on.exit(rm(.discorhythm_ncores, envir=.GlobalEnv))
    .GlobalEnv$.discorhythm_local <- local
    on.exit(rm(.discorhythm_local, envir=.GlobalEnv))
    
    shiny::runApp(appDir, display.mode = "normal")
}


#' @rdname discoODAs
#' 
#' @inheritParams discoInterCorOutliers
#' @param period numeric, the hypothesized period to test for.
#' @param method character, short names of ODAs to use. If length>1 
#' all input method names will be evaluated.
#' @param circular_t logical, is time circular on some base-cycle
#' (ex. time of day). See the DiscoRhythm vignette for details.
#' @param ncores numeric, number of cores to parallelize with 
#' (applicable to JTK, ARSER and LS only). If 1, will execute in serial.
#'
#'
#' @return A named list of results where each element is a data.frame for the
#' corresponding method with rownames corresponding to the feature identifiers
#' and columns containing estimates for:
#' \itemize{
#'   \item acrophase
#'   \item amplitude
#'   \item p-value
#'   \item q-value
#' }
#' 
#' Additional columns relevant to each method will be present. 
#' 
#' @details 
#' There are currently 4 available algorithms for rhythm detection:
#' \itemize{
#'   \item CS = Cosinor (Cornelissen,G. 2014):  a.k.a “Harmonic Regression” 
#'   fits a sinusoid with a free phase parameter.
#'   \item LS = Lomb-Scargle (Glynn, 2006): an approach using spectral power 
#'   density.
#'   \item ARS = ARSER (Yang, 2010): removes linear trends and performs the 
#'   Cosinor test.
#'   \item JTK = JTK Cycle (Hughes, 2010): non-parametric test of rhythmicity 
#'   robust to outliers.
#' }
#' 
#' LS, ARS, and JTK results come directly from MetaCycle meta2d() output using
#' the specified fixed period. ARSmle is set to "nomle" and no method 
#' integration is used (see meta2d documentation for details). 
#' 
#' CS is implemented directly in DiscoRhythm's lmCSmat()
#' as the single-component cosinor described in Cornelissen,G. (2014). 
#' 
#' All q-values are calculated by performing p.adjust() on the resulting 
#' p-values with method="fdr".
#' 
#' Technical replicates are expected to be merged (likely by discoRepAnalysis) 
#' prior to usage of discoODAs. 
#' 
#' The discoGetODAs function is called by discoODAs to determine if the selected
#' methods may be used. If any methods are not valid, a warning will be 
#' thrown and only valid methods will be computed.
#' discoGetODAs is not typically used directly, 
#' however, it may be called by the user to determine
#' if the provided SummarizedExperiment is suitable for use with the specified
#' methods. 
#' 
#' @references 
#' Yang R. and  Su Z. (2010). Analyzing circadian expression data by
#'   harmonic regression based on autoregressive spectral estimation.
#'   \emph{Bioinformatics}, \bold{26(12)}, i168--i174.
#'
#' Hughes M. E., Hogenesch J. B. and Kornacker K. (2010). JTK_CYCLE: an
#'   efficient nonparametric algorithm for detecting rhythmic components in
#'   genome-scale data sets. \emph{Journal of Biological Rhythms},
#'   \bold{25(5)}, 372--380.
#'
#' Glynn E. F., Chen J. and Mushegian A. R. (2006). Detecting periodic
#'   patterns in unevenly spaced gene expression time series using
#'   Lomb-Scargle periodograms. \emph{Bioinformatics}, \bold{22(3)},
#'   310--316.
#' 
#' Cornelissen,G. (2014) Cosinor-based rhythmometry. 
#' \emph{Theor. Biol. Med. Model.}, \bold{11}, 16.
#'  
#' @examples
#' # Import the simulated example dataset
#' se <- discoCheckInput(discoGetSimu(TRUE))
#' 
#' # Use discoRepAnalysis to average technical replicates
#' se_merged <- discoRepAnalysis(se,aov_pcut=1)$se
#' 
#' # Execute the Cosinor and JTK methods with a 24hr period
#' discoODAres <- discoODAs(se_merged,method=c("CS","JTK"))
#' 
#' # Get the index of rhythmic features detected by both methods at qvalue<0.05
#' idx <- which(discoODAres$CS$qvalue<0.05 & discoODAres$JTK$qvalue<0.05)
#' 
#' # Get the identifiers for common rhythmic features
#' rownames(se_merged)[idx]
#'
#'
#' @export
#' @seealso \code{\link{lmCSmat}} \code{\link[MetaCycle]{meta2d}}
discoODAs <- function(se, period = 24,
    method = c("CS","JTK","LS","ARS"),
    circular_t=FALSE, ncores = 1) {
    
    method = match.arg(method,several.ok = TRUE)
    
    # Unit checks
    if (!methods::is(se, "SummarizedExperiment")) {
        stop("Input must be a SummarizedExperiment.")
    }
    if (any(c(!is.numeric(period), length(period) != 1, period <= 0))) {
        stop("Period should be single positive numeric value.")
    }
    if (any(c(!is.numeric(ncores), length(ncores) != 1, ncores <= 0))) {
        stop("Number of cores should be single positive numeric value.")
    }
    if (any(c(!is.logical(circular_t), length(circular_t) != 1))) {
        stop("circular_t cores should be single logical value.")
    }
    
    method <- discoGetODAs(se,method,period,circular_t)

    unif <- list()
    # Run CS
    if ("CS" %in% method) {
        unif$CS <- lmCSmat(assay(se), se$Time, period)
    # Fix NAs
        nanId <- is.nan(unif$CS$pvalue)
        unif$CS[nanId, "acrophase"] <- NaN
        unif$CS[nanId, "pvalue"] <- 1
    # Multiple Test Correction
        unif$CS$qvalue <- stats::p.adjust(unif$CS$pvalue, method = "BH")
        rownames(unif$CS) <- rownames(se)
    # Reorder columns such that first 4 are the same across all methods  
        unif$CS <- unif$CS[,c("acrophase","amplitude","pvalue",
                        "qvalue","Rsq","mesor","sincoef","coscoef")]
    }
    # Run MetaCycle Methods
    rest <- Filter(function(x) x != "CS", method)
    if (length(rest) > 0) {

        Maindata <- discoSEtoDF(se)
        
        # All arguments of meta2d are explicitly called
        cyc <- MetaCycle::meta2d(
            infile = "dummy", outdir = "dummy", filestyle = "csv",
            timepoints = se$Time,
            minper = period, maxper = period, cycMethod = rest,
            analysisStrategy = "auto",
            outputFile = FALSE,
            outIntegration = "both",
            adjustPhase = "predictedPer", combinePvalue = "bonferroni",
            weightedPerPha = FALSE,
            ARSmle = "nomle",
            ARSdefaultPer = period, outRawData = FALSE,
            releaseNote = FALSE,
            outSymbol = "dummy", parallelize = .Platform$OS.type != "windows",
            nCores = ncores,
            inDF = Maindata
            )
        
        for (method in rest) {
            tmpdf <- cyc$meta[, paste0(method, c("_adjphase", "_amplitude",
                "_pvalue", "_BH.Q", "_period"))]
            colnames(tmpdf) <- c("acrophase", "amplitude",
                "pvalue", "qvalue", "period")
            rownames(tmpdf) <- cyc[[method]]$CycID
            unif[[method]] <- tmpdf
        }
    }

    return(unif)
}

#' Helper for discoPeriodDetection
#'
#' @keywords internal
#'
#' @return a set of periods to use for \code{discoPeriodDetection}
PeriodDetection_range <- function(times,circular_t,main_per,test_periods){
    trainper <- as.numeric(main_per)
    rng <- diff(range(times))
    sampint <- min(unique(diff(sort(unique(times)))))

    # Circular time data can only detect harmonics of the base-cycle period
    # Studies with linear time can detect a continuous range of periods
    # Set default period ranges
    if (circular_t) {
        validPeriods <- (trainper / seq_len(8))[which(
            as.numeric(trainper / seq_len(8)) > sampint * 2)]
        if (is.null(test_periods)) {
            periods <- validPeriods
        } else {
            if(any(!(test_periods %in% validPeriods))){
                warning(paste0("Some test_periods passed to discoPeriodDetection
                    are not meaningful for
                    circular time and main_per==",
                    main_per))
            }
            periods <- test_periods
        }
    } else if (!circular_t) {
        if (is.null(test_periods)) {
            periods <- round(1 / seq(1 / (sampint*3), 1/(rng + sampint),
                length.out = 12), 2)
        } else if (length(test_periods)>2) {
            periods <- test_periods
        } else {
        # space evenly across frequency domain
            periods <- round(1 / seq(1 / test_periods[1], 1 / test_periods[2],
                length.out = 12), 2)
        }
    }

    return(periods)
}

#' Detect dataset-wide fits to multiple periodicities
#'
#' @param timeType character, time is either reported as "linear" or
#' "circular" on some base-cycle (ex. time of day). This determines the periods
#' that will be tested for.
#' @param main_per numeric, if \code{timeType=="circular"} main_per
#' indicates the period of the base-cycle where sampling times are derived.
#' @param test_periods numeric, a vector of the periods to test.
#' if \code{timeType=="linear"} and length(test_periods)==2 it will be assumed
#' to be a range of periods to test over.
#'
#' @inheritParams discoODAs
#'
#' @return A data.frame of Rsquared values for each period, for each row of
#' Maindata.
#'
#' @examples
#' se <- discoGetSimu(TRUE)
#'
#' # Detect periods
#' rsqs <- discoPeriodDetection(se)
#'
#' @export
#' 
#' @importFrom SummarizedExperiment colData
discoPeriodDetection <- function(se,
    timeType = c("linear","circular"), main_per = 24,
    test_periods = NULL) {
    
    timeType = match.arg(timeType)
    
    if(!methods::is(se,"SummarizedExperiment")){
        stop("Input must be a SummarizedExperiment.")
    }
    
    Metadata <- colData(se)
    
    times <- Metadata$Time
    circular_t <- (timeType == "circular")

    # Validates chosen periods or generates an appropraite range
    periods <- PeriodDetection_range(times,circular_t,main_per,test_periods)

    # Run Cosinor to get r-squared on all rows at candidate periods
    cosinor_res <- vapply(as.numeric(periods), function(per) {
        lmCSmat(assay(se), times, per = per)$Rsq
    }, numeric(nrow(se)))
    colnames(cosinor_res) <- periods

    allres <- reshape2::melt(cosinor_res)
    return(allres)
}

Try the DiscoRhythm package in your browser

Any scripts or data that you put into this service are public.

DiscoRhythm documentation built on Nov. 8, 2020, 7:32 p.m.