R/read_gcta_mlma.R

Defines functions read_gcta_mlma

Documented in read_gcta_mlma

#' Read GCTA MLMA statistics table
#'
#' Parses the output generated by GCTA with option `--mlma` (genetic assocation test).
#' Returned table has column names standardized for convenience.
#' Validated with GCTA version 1.93.2 beta.
#'
#' @param file The file path to read, possibly missing the extension.
#' @param ext The expected file extension.
#' Set to `NA` to prevent adding any extension to `file`.
#' The default extension "mlma" agrees with the output of [gcta_mlma()].
#' @param verbose If `TRUE` (default) function reports the path of the file being loaded (after autocompleting the extensions).
#'
#' @return The table as a `tibble`.
#' Original names (as they appear in the header line of the file) are modified by lowercasing all, followed by these specific mappings to ensure that columns shared by BIM table have the same names as those returned by [genio::read_bim()], the association p-value is "p", and the regression coefficient is "beta" ("orig" -> "new"):
#' - "snp" -> "id"
#' - "bp" -> "pos"
#' - "a1" -> "alt"
#' - "a2" -> "ref"
#' - "b" -> "beta"
#'
#' @examples 
#' \dontrun{
#' data <- read_gcta_mlma( file )
#' }
#'
#' @seealso
#' [gcta_mlma()] wrapper for executing GCTA MLMA and reading the results.
#' 
#' @export
read_gcta_mlma <- function( file, ext = 'mlma', verbose = TRUE ) {
    if ( missing( file ) )
        stop( '`file` is required!' )

    # add .ext and/or .gz if missing and needed
    file <- genio:::add_ext_read(file, ext)
    
    # announce it if needed
    if (verbose)
        message('Reading: ', file)

    # read table!
    data <- readr::read_tsv(
                       file,
                       col_types = 'cciccdddd'
                   )
    
    # normalize column names
    # just lowercasing gets us far!
    names( data ) <- tolower( names( data ) )
    # other things to rename
    names( data )[ names( data ) == 'snp' ] <- 'id'
    names( data )[ names( data ) == 'bp' ] <- 'pos'
    names( data )[ names( data ) == 'a1' ] <- 'alt'
    names( data )[ names( data ) == 'a2' ] <- 'ref'
    names( data )[ names( data ) == 'b' ] <- 'beta'
    
    # return tibble
    return( data )
}
OchoaLab/genbin documentation built on Nov. 14, 2024, 7:33 p.m.