R/read_emmax_ps.R

Defines functions read_emmax_ps

Documented in read_emmax_ps

# unusual in that MAF is needed to correct signs

#' Read EMMAX statistics table
#'
#' Parses the output generated by EMMAX  (genetic assocation test).
#' Returned table has standard column names.
#'
#' @param file The file path to read, possibly missing the extension.
#' @param maf Optional vector of allele frequencies (misnamed, these cannot be *minor* allele frequencies or nothing gets corrected), used to correct signs of regression coefficients (EMMAX reports them in terms of the major allele, not the original allele).  If missing, no corrections are applied.
#' @param ext The expected file extension.
#' Set to `NA` to prevent adding any extension to `file`.
#' The default "ps" extension matches what EMMAX generates.
#' @param verbose If `TRUE` (default) function reports the path of the file being loaded (after autocompleting the extensions).
#'
#' @return The table as a `tibble`.
#' The original does not have a header line.
#' Output has 3 columns named: "id", "beta", "p".
#'
#' @examples 
#' \dontrun{
#' data <- read_emmax_ps( file, maf )
#' }
#'
#' @seealso
#' [emmax_lmm()] wrapper for executing EMMAX and reading the results.
#' 
#' @export
read_emmax_ps <- function(
                          file,
                          maf = NULL,
                          ext = 'ps',
                          verbose = TRUE
                          ) {
    if ( missing( file ) )
        stop( '`file` is required!' )

    # add .ext and/or .gz if missing and needed
    file <- genio:::add_ext_read(file, ext)
    
    # announce it if needed
    if (verbose)
        message('Reading: ', file)

    # read table!
    data <- readr::read_tsv(
                       file,
                       col_types = 'cdd',
                       col_names = c('id', 'beta', 'p')
                   )
    
    # emmax-specific: correct beta signs if MAF is available
    if ( !is.null( maf ) ) {
        # make sure lengths match
        if ( length( maf ) != nrow( data ) )
            stop( 'Length of `maf` (', length( maf ), ') must equal number of rows of association table (', nrow( data ), ')!' )
        indexes <- maf < 0.5
        data$beta[ indexes ] <- - data$beta[ indexes ]
    }
    
    # return tibble
    return( data )
}
OchoaLab/genbin documentation built on Nov. 14, 2024, 7:33 p.m.