#' Iteratively run coloc on merged GWAS-QTL datatables
#'
#' Runs colocalization tests (\link[coloc]{coloc.abf})
#' on merged GWAS-QTL \emph{data.tables}
#' generated by \link[catalogueR]{eQTLcatalogue_query}.
#' Iteratively runs coloc across each:
#' \itemize{
#' \item{QTL dataset}
#' \item{GWAS locus}
#' \item{QTL gene}
#' }
#' \emph{NOTE:} Assumes that each file is within a subfolder named after
#' the QTL dataset it came from.
#'
#' @param save_path Where to save results to.
#' @param top_snp_only Only include the SNP (with the highest SNP-wise PP.H4,
#' which is usually the one with the smallest p-value) instead of all SNPs.
#' Can be useful for reducing data size.
#' @param split_by_group Split files by QTL group when saving.
#' @inheritParams COLOC_merge_res
#' @inheritParams eQTLcatalogue_query
#' @inheritParams echodata::get_sample_size
#'
#' @return
#' If \code{top_snp_only=TRUE}, returns SNP-level stats for only the SNP
#' with the highest colocalization probability (\emph{SNP.PP.H4})
#' If \code{top_snp_only=FALSE}, returns SNP-level stats for every SNP.
#' In either case, summary-level coloc stats are added in the columns
#' \emph{PP.H0}, \emph{PP.H1}, \emph{PP.H2}, \emph{PP.H3}, \emph{PP.H4}.
#' @family coloc
#' @export
#' @importFrom echodata get_sample_size
#' @importFrom data.table data.table rbindlist fwrite
#' @importFrom dplyr top_n
#' @importFrom parallel mclapply
#' @examples
#' gwas.qtl_paths <- catalogueR::eQTLcatalogue_example_queries()
#' coloc_QTLs <- catalogueR::COLOC_run(gwas.qtl_paths = gwas.qtl_paths)
COLOC_run <- function(gwas.qtl_paths,
save_path = tempfile(pattern = "coloc_results",
fileext = ".tsv.gz"),
top_snp_only = TRUE,
split_by_group = FALSE,
method = "abf",
coloc_thresh = .8,
compute_n = NULL,
nThread = 1,
verbose = TRUE) {
qtl.groups <- unique(unlist(lapply(unname(gwas.qtl_paths),
parse_gwas.qtl_path)))
#### Iterate over QTL groups ####
coloc_QTLs <- timeit(lapply(qtl.groups, function(group) {
messager("+ QTL Group = ", group, v=verbose)
gwas.qtl_paths_select <- gwas.qtl_paths[
basename(dirname(gwas.qtl_paths)) == group
]
# ---- Iterate over QTL datasets
coloc_qtls <- parallel::mclapply(
gwas.qtl_paths_select,
function(qtl.path) {
qtl_ID <- parse_gwas.qtl_path(
gwas.qtl_path = qtl.path,
get_locus = FALSE,
get_qtl_id = TRUE
)
gwas.locus <- parse_gwas.qtl_path(
gwas.qtl_path = qtl.path,
get_locus = TRUE,
get_qtl_id = FALSE
)
coloc_eGenes <- data.table::data.table()
try({
qtl.dat <- data.table::fread(qtl.path)
messager(
"++ GWAS =", gwas.locus, "x",
formatC(length(unique(qtl.dat$gene.QTL)),
big.mark = ","),
"eGenes",
v = verbose
)
#### Check column names ####
#### Check sample size ####
# infer N from N_cases and N_controls
qtl.dat <- echodata::get_sample_size(
dat = qtl.dat,
compute_n = compute_n,
verbose = verbose
)
qtl.dat <- check_sample_size(
qtl.dat = qtl.dat,
sample_size = compute_n
)
if (!"qtl_id" %in% colnames(qtl.dat)) {
qtl.dat <- cbind(qtl.dat, qtl_id = qtl_ID)
}
qtl.dataset <- subset(qtl.dat, qtl_id == qtl_ID &
!is.na(gene.QTL) & gene.QTL != "")
remove(qtl.dat)
#### Iterate over QTL eGenes ####
coloc_eGenes <- lapply(unique(qtl.dataset$gene.QTL),
function(eGene) {
messager("+++ QTL eGene =", eGene, v = verbose)
#### Extract QTL cols ####
qtl.egene <- subset(qtl.dataset, gene.QTL == eGene)
# Extract the GWAS cols
if ("Effect" %in% colnames(qtl.egene)) {
gwas_cols <- c(
"Locus", "Locus.GWAS", "SNP", "CHR", "POS",
"P", "Effect", "StdErr", "Freq",
"MAF", "N", "N_cases", "N_controls",
"proportion_cases", "A1", "A2"
)
gwas_cols <- gwas_cols[
gwas_cols %in% colnames(qtl.egene)
]
gwas.region <- subset(qtl.egene,
select = gwas_cols)
}
#### Run coloc ####
coloc_res <- COLOC_get_res(
qtl.egene = qtl.egene,
gwas.region = gwas.region,
merge_by_rsid = TRUE,
coloc_thresh = coloc_thresh,
method = method,
verbose = FALSE
)
coloc_summary <- as.list(coloc_res$summary)
coloc_results <- coloc_res$results
if (top_snp_only) {
coloc_results <- (
dplyr::top_n(coloc_results,
n = 1, wt = SNP.PP.H4
))[1, ]
}
#### Rename columns coloc_results ####
colnames(coloc_results) <- gsub(
"\\.df1", ".QTL",
colnames(coloc_results)
)
colnames(coloc_results) <- gsub(
"\\.df2", ".GWAS",
colnames(coloc_results)
)
#### Create results data.table ####
coloc_DT <- data.table::data.table(
Locus.GWAS = coloc_res$Locus,
qtl_id = qtl_ID,
gene.QTL = eGene,
P = gwas.region$P,
CHR = gwas.region$CHR,
POS = gwas.region$POS,
pvalue.QTL = qtl.egene$pvalue.QTL,
coloc_results,
PP.H0 = coloc_summary$PP.H0.abf,
PP.H1 = coloc_summary$PP.H1.abf,
PP.H2 = coloc_summary$PP.H2.abf,
PP.H3 = coloc_summary$PP.H3.abf,
PP.H4 = coloc_summary$PP.H4.abf
)
return(coloc_DT)
}
) |> data.table::rbindlist(fill = TRUE)
# END ITERATE OVER eGenes
}) # end try()
return(coloc_eGenes)
},
mc.cores = nThread
) |> data.table::rbindlist(fill = TRUE)
# END ITERATE OVER gwas.qtl_paths_select
if (split_by_group) {
split_path <- file.path(dirname(save_path), group)
messager("Saving split file ==>", split_path)
dir.create(dirname(split_path),
showWarnings = FALSE, recursive = TRUE
)
data.table::fwrite(
x = coloc_qtls,
file = split_path,
nThread = 1
)
return(split_path)
} else {
return(coloc_qtls)
}
})) # end timeit()
if (split_by_group) {
messager("+ Returning split file paths.")
} else {
coloc_QTLs <- data.table::rbindlist(coloc_QTLs, fill = TRUE)
# Save all coloc results in one data.table
if (save_path != FALSE) {
dir.create(dirname(save_path),
showWarnings = FALSE, recursive = TRUE
)
data.table::fwrite(coloc_QTLs,
file = save_path,
sep = "\t",
nThread = 1
)
}
}
return(coloc_QTLs)
}
#### Deprecation function #####
run_coloc <- function(...){
.Deprecated("COLOC_run")
COLOC_run(...)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.