EXPECTED_REGENIE_COLNAMES <- c(
'CHROM',
'GENPOS',
'ID',
'ALLELE0',
'ALLELE1',
'A1FREQ',
'INFO',
'N',
'TEST',
'BETA',
'SE',
'CHISQ',
'LOG10P',
'EXTRA'
)
# Manhattan plots ---------------------------------------------------------
#' Read REGENIE results files into R
#'
#' Reads \code{.regenie} GWAS results files into a data table in R, filtering on
#' minor allele frequency (appended) and info score. Also appends a p value
#' column (\code{P}).
#'
#' @param path File path to directory containing \code{.regenie} files. There
#' should be one file for each of 22 chromosomes (i.e. excluding sex
#' chromosomes), otherwise and error is raised. An error is also raised if the
#' column names do not match those expected for REGENIE: 'CHROM', 'GENPOS',
#' 'ID', 'ALLELE0', 'ALLELE1', 'A1FREQ', 'INFO', 'N', 'TEST', 'BETA', 'SE',
#' 'CHISQ', 'LOG10P', 'EXTRA'.
#' @param maf_threshold Minor allele frequencies below this threshold will be
#' removed. Default is 0.01.
#' @param info_threshold Info scores below this threshold will be removed.
#' Default is 0.9.
#'
#' @return A data table.
#' @export
read_regenie <- function(path,
maf_threshold = 0.01,
info_threshold = 0.9) {
start_time <- proc.time()
# get regenie results file paths
regenie_filepaths <- fs::dir_ls(path = path,
glob = "*.regenie")
# check that .regenie files for all 22 chromosomes are present (excludes sex chromosomes)
assertthat::assert_that(
length(regenie_filepaths) == 22,
msg = paste0(
"Error! 22 files expected, but only ",
length(regenie_filepaths),
" .regenie files detected at `path`"
)
)
# read into R
message("Reading regenie files into R")
regenie_results <- regenie_filepaths %>%
purrr::map(data.table::fread) %>%
dplyr::bind_rows()
# check that expected column names are present
assertthat::assert_that(
all(names(regenie_results) == EXPECTED_REGENIE_COLNAMES),
msg = paste0(
"Error! Unexpected column names. Expected: ",
stringr::str_c(
EXPECTED_REGENIE_COLNAMES,
sep = "",
collapse = ", "
),
". Got: ",
stringr::str_c(names(regenie_results),
sep = "",
collapse = ", ")
)
)
message("Appending P and MAF cols")
# append p and maf columns
regenie_results <- regenie_results %>%
dplyr::mutate("P" = 1 / (10 ^ .data[["LOG10P"]]),
"MAF" = ifelse(.data[["A1FREQ"]] > 0.5,
yes = 1 - .data[["A1FREQ"]],
no = .data[["A1FREQ"]]))
# filter on maf and info
message("Filtering on MAF and INFO")
regenie_results <- regenie_results %>%
dplyr::filter(.data[["MAF"]] > !!maf_threshold &
.data[["INFO"]] > !!info_threshold) %>%
# select/rename required cols (match defaults for `qqman::manhattan()`)
dplyr::select(CHR = CHROM,
BP = GENPOS,
P,
SNP = ID,
MAF,
INFO)
# return result
message("Success!")
time_taken_message(start_time)
return(regenie_results)
}
# Plots -------------------------------------------------------------------
#' Generate a Manhattan plot
#'
#' A thin wrapper around \code{\link[qqman]{manhattan}} with preferred default
#' settings.
#'
#' @inheritParams qqman::manhattan
#'
#' @return A Manhattan plot
#' @export
plot_manhattan_regenie <- function(x,
chr = "CHR",
bp = "BP",
p = "P",
snp = "SNP",
col = c("blue4", "orange3"),
chrlabs = NULL,
suggestiveline = FALSE,
genomewideline = -log10(5e-08),
highlight = NULL,
logp = TRUE,
annotatePval = NULL,
annotateTop = TRUE,
cex = 0.4,
cex.axis = 0.6,
...) {
qqman::manhattan(x,
chr = chr,
bp = bp,
p = p,
snp = snp,
col = col,
chrlabs = chrlabs,
suggestiveline = suggestiveline,
genomewideline = genomewideline,
highlight = highlight,
logp = logp,
annotatePval = annotatePval,
annotateTop = annotateTop,
cex = cex,
cex.axis = cex.axis,
...)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.