#' Read BOLT-LMM statistics table
#'
#' Parses the output generated by BOLT-LMM with option `--lmm` (genetic assocation test) and specified with the option `--statsFile <file>`.
#' Returned table has column names standardized for convenience.
#' Validated with BOLT-LMM version 2.3.5.
#'
#' @param file The file path to read, possibly missing the extension.
#' @param ext The expected file extension.
#' Set to `NA` to prevent adding any extension to `file`.
#' BOLT-LMM does not create an output with a standard extension, so I chose the default extension to match what is generated by [bolt_lmm()].
#' @param verbose If `TRUE` (default) function reports the path of the file being loaded (after autocompleting the extensions).
#'
#' @return The table as a `tibble`.
#' Original names (as they appear in the header line of the file) are modified by lowercasing all, followed by these specific mappings to ensure that columns shared by BIM table have the same names as those returned by [genio::read_bim()], the association p-value is "p", and the regression coefficient is "beta" ("orig" -> "new"):
#' - "snp" -> "id"
#' - "bp" -> "pos"
#' - "genpos" -> "posg"
#' - "allele1" -> "alt"
#' - "allele0" -> "ref"
#' - "p_bolt_lmm_inf" -> "p_inf"
#' - "p_bolt_lmm" -> "p"
#'
#' @examples
#' \dontrun{
#' data <- read_bolt_lmm( file )
#' }
#'
#' @seealso
#' [bolt_lmm()] wrapper for executing BOLT-LMM and reading the results.
#'
#' @export
read_bolt_lmm <- function( file, ext = 'bolt.txt', verbose = TRUE ) {
if ( missing( file ) )
stop( '`file` is required!' )
# add .ext and/or .gz if missing and needed
file <- genio:::add_ext_read(file, ext)
# announce it if needed
if (verbose)
message('Reading: ', file)
# read table!
data <- readr::read_tsv(
file,
col_types = 'ccidccdddddd'
)
# normalize column names
# just lowercasing gets us far!
names( data ) <- tolower( names( data ) )
# other things to rename
names( data )[ names( data ) == 'snp' ] <- 'id'
names( data )[ names( data ) == 'bp' ] <- 'pos'
names( data )[ names( data ) == 'genpos' ] <- 'posg'
names( data )[ names( data ) == 'allele1' ] <- 'alt'
names( data )[ names( data ) == 'allele0' ] <- 'ref'
names( data )[ names( data ) == 'p_bolt_lmm_inf' ] <- 'p_inf'
names( data )[ names( data ) == 'p_bolt_lmm' ] <- 'p' # this is the best/main p-value
# return tibble
return( data )
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.