getspres: SPRE Statistics for Exploring Heterogeneity in Meta-Analysis

Documented in getspres getspres.default

# Goal: Generate and export S3 class/methods for getspres and document using roxygen tags
#
#' Exploring Heterogeneity in Meta-Analysis with \emph{SPRE} Statistics.
#' 
#' \code{getspres} computes \emph{SPRE} (standardised predicted random-effects)
#' statistics to identify outlier studies in genetic association meta-analyses
#' which might have undue influence on the average genetic effect leading to 
#' inflated genetic signals.
#'
#' \emph{SPRE} statistics are precision-weighted residuals that summarise the 
#' direction and extent with which observed study effects in a meta-analysis
#' differ from the summary (or average genetic) effect. See the getspres 
#' website for more information, documentation and examples.
#'
#' \code{getspres} takes as input study effect-size estimates and their 
#' corresponding standard errors (i.e. summary data). Study effect estimates 
#' could be in the form of linear regression coefficients or log-transformed
#' regression coefficients (per-allele log odds ratios) from logistic 
#' regression. 
#'
#' \code{getspres} uses inverse-variance weighted meta-analysis models in the
#' \code{metafor} R package to calculate \emph{SPRE} statistics.
#'
#' @seealso \url{https://magosil86.github.io/getspres/} to the visit \code{getspres} website.
#'
#' @aliases getspres getspre spre spres
#'
#' @param beta_in          A numeric vector of study effect-sizes e.g. log odds-ratios. 
#' @param se_in            A numeric vector of standard errors, genomically corrected at study-level.
#' @param study_names_in   A character vector of study names. 
#' @param variant_names_in A character vector of variant names e.g. rsIDs.
#' @param tau2_method      A character scalar, specifying the method that should be used to estimate heterogeneity either through DerSimonian and Laird's moment-based estimate "DL" or restricted maximum likelihood "REML". Note: The REML method uses the iterative Fisher scoring algorithm (step length = 0.5, maximum iterations = 10000) to estimate tau2. Default is "DL".
#' @param verbose_output   An optional boolean to display intermediate output. (Default is \code{FALSE}).
#' @param \dots            other arguments.
#'
#' @return Returns a list containing:
#' \itemize{
#'   \item number_variants A numeric scalar for the number of variants 
#'   \item number_studies  A numeric scalar for the number of studies
#'   \item spre_dataset    A dataframe that is a dataset of computed SPRE statistics and contains the following fields:
#'   \itemize{
#'         \item beta          , study effect-size estimates
#'         \item se            , corresponding standard errors of the study effect-size estimates
#'         \item variant_names , variant names
#'         \item study_names   , study names
#'         \item study         , study numbers
#'         \item snp           , snp numbers
#'         \item tau2          , tau_squared, estimate of amount of between-study variance
#'         \item I2            , I_squared, heterogeneity index (Higgins inconsistency metric)
#'                                 representing proportion of total observed variation due to 
#'                                 between-study variance
#'         \item Q             , Q-statistic (Cochran's Q)
#'         \item xb            , prediction excluding random effects
#'         \item xbse          , standard error of prediction excluding random effects
#'         \item xbu           , predictions including random effects
#'         \item stdxbu        , standard error of prediction (fitted values) 
#'                                 including random effects
#'         \item hat           , leverage a.k.a diagonal elements of the projection hat matrix
#'         \item rawresid      , raw residuals
#'         \item uncondse      , unconditional standard errors
#'         \item spre          , \emph{SPRE} statistics (standardised predicted random effects) i.e. 
#'                                 raw residuals divided by the unconditional standard errors
#'         }
#' }
#'
#'
#' @examples
#' library(getspres)
#'
#' 
#' # Calculate SPRE statistics for a subset of variants in the heartgenes214 dataset.
#' # heartgenes214 is a case-control GWAS meta-analysis of coronary artery disease.
#' # To learn more about the heartgenes214 dataset ?heartgenes214
#'
#' # Calculating SPRE statistics for 3 variants in heartgenes214
#'
#' heartgenes3 <- subset(heartgenes214, 
#'     variants %in% c("rs10139550", "rs10168194", "rs11191416")) 
#'
#' getspres_results <- getspres(beta_in = heartgenes3$beta_flipped, 
#'                                se_in = heartgenes3$gcse, 
#'                       study_names_in = heartgenes3$studies, 
#'                     variant_names_in = heartgenes3$variants)
#'
#'
#' # Explore results generated by the getspres function
#' str(getspres_results)
#'
#' # Retrieve number of studies and variants
#' getspres_results$number_variants
#' getspres_results$number_studies
#'
#' # Retrieve SPRE dataset
#' df_spres <- getspres_results$spre_dataset
#' head(df_spres)
#'
#' # Extract SPREs from SPRE dataset
#' head(spres <- df_spres[, "spre"])
#'
#' \donttest{
#'
#' # Exploring available options in the getspres function:
#' #     1. Estimate heterogeneity using "REML", default is "DL"
#' #     2. Calculate SPRE statistics verbosely
#' getspres_results <- getspres(beta_in = heartgenes3$beta_flipped, 
#'                                se_in = heartgenes3$gcse, 
#'                       study_names_in = heartgenes3$studies, 
#'                     variant_names_in = heartgenes3$variants,
#'                          tau2_method = "REML",
#'                       verbose_output = TRUE)
#'
#' str(getspres_results)
#'
#' }
#'
#' @export
getspres <- function(beta_in, se_in, study_names_in, variant_names_in, ...) UseMethod("getspres")

#' @describeIn getspres Computes \emph{SPRE} statistics in genetic association meta-analyses
#' @export
getspres.default <- function(beta_in, se_in, study_names_in, 
                                  variant_names_in, tau2_method = "DL",
                                  verbose_output = FALSE, ...) {

    # Check whether all required variables are present
    if (missing(beta_in))
        stop("Beta values missing, need to specify a numeric vector of observed study effects.")
 
    if (missing(se_in))
        stop("Standard errors for study effect-size estimates missing, need to specify a numeric vector of standard errors.")

    if (missing(study_names_in))
        stop("Study names missing, need to specify a character vector of study names.")

    if (missing(variant_names_in))
        stop("Variant names missing, need to specify a character vector of variant names.")

    
    # Verify datatypes of required variables
    if (!is.numeric(c(beta_in, se_in))) {

        stop("beta_in and se_in should be of type, numeric.")

    }


    if (!is.character(c(variant_names_in, study_names_in))) {

        stop("variant_names_in and study_names_in should be of type, character.")

    }
    
    compute_spre_results <- compute_spre_statistics(beta_in, se_in, study_names_in, variant_names_in, 
                                              tau2_method, verbose_output)
    
    compute_spre_results$call <- match.call()
    
    class(compute_spre_results) <- "getspres"
    
    compute_spre_results
    
}

#' @export
print.getspres <- function(x, ..., verbose_output = FALSE) {

    cat("Call:\n")
    print(x$call)

    cat("\nNumber of studies:\n")
    print(x$number_studies)

    cat("\nNumber of variants:\n")
    print(x$number_variants)

    cat("\nDataset structure showing SPRE statistics:\n")
    print(str(x$spre_dataset))


}