#' Merge and filter \pkg{motifbreakR} + \pkg{echolocatoR} results
#'
#' For each SNP we have at least one allele achieving a p-value<1e-4 threshold
#' that we required.
#' The \emph{seqMatch} column shows what the reference genome sequence
#' is at that location,
#' with the variant position appearing in an uppercase letter.
#' \emph{pctRef} and \emph{pctAlt} display the the score for the motif
#' in the sequence as a percentage of the best score that motif
#' could achieve on an ideal sequence.
#' In other words, \emph{(scoreVariant−minscorePWM)/(maxscorePWM−minscorePWM)}.
#' We can also see the absolute scores for our method in
#' \emph{scoreRef} and \emph{scoreAlt} and their respective p-values.
#'
#' @param mb_res Results generated by \link[echoannot]{MOTIFBREAKR}, in
#' \link[GenomicRanges]{GRanges} format.
#' @param pct_threshold Remove rows below the percentage of
#' the optimal binding score (PCT) threshold.
#' @param pvalue_threshold Remove rows below the raw
#' significance value (p-value) threshold.
#' @param qvalue_threshold Remove rows below the multiple testing-corrected
#' significance value (q-value) threshold.
#' @param remove_NA_geneSymbol Remove results where \code{geneSymbol==NA}.
#' @param top_geneSymbol_hits Only include N top results per gene symbol
#' based on absolute \code{risk_score}, where N=\code{top_geneSymbol_hits}.
#' If \code{top_geneSymbol_hits=NULL}, no such filtering is performed.
#' @param effect_strengths Only include results with certain effect strengths.
#' @param filter_by_locus Filter \code{mb_res}
#' to only include SNPs present in a given \emph{Locus} (e.g. \code{"BST1"}).
#' Set to \code{NULL} (default) for not perform any filtering.
#' Requires \code{merged_DT} argument to be supplied.
#' @param merged_DT Table with columns \emph{Locus} and
#' \emph{SNP} to filter \code{mb_res} by.
#' @param snp_filter Condition to filter SNPs by,
#' after \code{mb_res} and \code{merged_DT} have been merged together
#' into one table.
#' @param no_no_loci Filter out SNPs contained within specific loci
#' in the \code{merged_DT} table.
#' @param verbose Print messages.
#' @returns \link[data.table]{data.table}
#' with the filtered motif disruption results
#' after merging with \code{merged_DT}.
#'
#' @export
#' @importFrom BiocGenerics %in%
#' @importFrom stats p.adjust
#' @importFrom data.table as.data.table merge.data.table
#' @importFrom dplyr mutate group_by arrange slice desc
#' @examples
#' merged_DT <- echodata::get_Nalls2019_merged()
#' mb_res <- MOTIFBREAKR(rsid_list = c("rs11175620"),
#' # limit the number of datasets tests
#' # for demonstration purposes only
#' pwmList_max = 4,
#' calculate_pvals = FALSE)
#' mb_res_filt <- MOTIFBREAKR_filter(mb_res = mb_res,
#' merged_DT = merged_DT)
MOTIFBREAKR_filter <- function(mb_res,
merged_DT,
filter_by_locus=NULL,
remove_NA_geneSymbol=TRUE,
pct_threshold=NULL,
pvalue_threshold=1e-4,
qvalue_threshold=.05,
effect_strengths=NULL,
snp_filter="Support>0",
top_geneSymbol_hits=NULL,
no_no_loci=NULL,
verbose=TRUE){
# Quickstart
# pct_threshold=.7; verbose=T;
# pvalue_threshold=1e-4;
# mb_res <- MOTIFBREAKR_filter_by_metadata(mb_res = mb_res,
# Organism = "Hsapiens")
geneSymbol <- effect <- SNP_id <- Locus <- risk_allele <- A1 <- REF <-
ALT <- pctRef <- pctAlt <- risk_pvalue <- risk_score <- risk_qvalue <-
Refpvalue <- Altpvalue <- scoreRef <- scoreAlt <- SNP <- risk_pct <-
NULL;
#### Filter ####
if(isTRUE(remove_NA_geneSymbol)){
mb_res <- subset(mb_res, !is.na(geneSymbol))
}
if(!is.null(effect_strengths)){
mb_res <- subset(mb_res, effect %in% effect_strengths)
messager("+ MOTIFBREAKR",nrow(mb_res),"SNPs in results @",
paste0("`effect_strengths=",
paste(effect_strengths, collapse=", "),"`"),
v=verbose)
}
if(!is.null(filter_by_locus) && !is.null(merged_DT)){
mb_res <- subset(mb_res, SNP_id %in% subset(merged_DT,
Locus==filter_by_locus)$SNP)
}
#### Remove loci ####
if(!is.null(no_no_loci)) {
merged_DT <- subset(merged_DT, !(Locus %in% no_no_loci))
}
#### Merge with fine-mapping results ####
mb_res$SNP <- names(mb_res)
if(!is.null(merged_DT)){
mb_merge <- data.table::merge.data.table(
x = merged_DT,
y = data.table::as.data.table(mb_res),
by="SNP", all = TRUE) |>
dplyr::mutate(risk_allele=ifelse(A1==REF,"REF",
ifelse(A1==ALT,"ALT",NA))) |>
dplyr::mutate(
risk_pct=ifelse(risk_allele=="REF", pctRef, pctAlt),
# nonrisk_pct=ifelse(risk_allele=="ALT", pctRef, pctAlt),
risk_score=ifelse(risk_allele=="REF", scoreRef, scoreAlt)
# nonrisk_score=ifelse(risk_allele=="ALT", scoreRef, scoreAlt),
) |>
subset(!is.na(risk_allele))
#### Create "risk_" columns ####
if(MOTIFBREAKR_has_pvals(mb_merge)){
messager("Creating 'risk_' columns",v=verbose)
mb_merge <- mb_merge |>
dplyr::mutate(
risk_pvalue=ifelse(risk_allele=="REF", Refpvalue, Altpvalue)
) |>
dplyr::mutate(
risk_qvalue = stats::p.adjust(p = risk_pvalue,
method = "bonferroni")
)
} else {
messager("pvalues have not yet been calculated.",
"Run echoannot::MOTIFBREAKR_calc_pvals()",
"first to filter by qvalues.",v=verbose)
}
} else {
messager("merged_DT not provided,",
"simply converting mb_res to data.table.",v=verbose)
mb_merge <- data.table::as.data.table(mb_res)
}
messager("+ MOTIFBREAKR",nrow(mb_merge),"SNPs in results.", v=verbose)
if(!is.null(snp_filter)){
mb_merge <- subset(mb_merge, eval(parse(text=snp_filter)))
messager("+ MOTIFBREAKR",nrow(mb_merge),"SNPs in results @",
paste0("`snp_filter='",snp_filter,"'`"),v=verbose)
}
if(!is.null(effect_strengths)){
mb_merge <- subset(mb_merge, effect %in% effect_strengths)
messager("+ MOTIFBREAKR",nrow(mb_merge),"SNPs in results @",
paste0("`effect_strengths=",
paste(effect_strengths, collapse=", "),"`"),v=verbose)
}
if(!is.null(pvalue_threshold)){
mb_merge <- subset(mb_merge, risk_pvalue < pvalue_threshold)
messager("+ MOTIFBREAKR",nrow(mb_merge),"SNPs in results @",
paste0("`pvalue_threshold=",pvalue_threshold,"`"),v=verbose)
}
if(!is.null(qvalue_threshold) && ("risk_qvalue" %in% names(mb_merge))){
mb_merge <- subset(mb_merge, risk_qvalue < qvalue_threshold)
messager("+ MOTIFBREAKR",nrow(mb_merge),"SNPs in results @",
paste0("`qvalue_threshold=",qvalue_threshold,"`"),v=verbose)
}
if(!is.null(pct_threshold)){
# Filter where pval is sig AND it's the effect allele in the GWAS
mb_merge <- subset(mb_merge,
# (pctRef<pct_threshold & A1==REF) |
# (pctAlt<pct_threshold & A1==ALT)
risk_pct < pct_threshold
)
messager("+ MOTIFBREAKR",nrow(mb_merge),"SNPs in results @",
paste0("`pct_threshold=",pct_threshold,"`"),v=verbose)
}
if(!is.null(top_geneSymbol_hits)){
if(!is.numeric(top_geneSymbol_hits)) {
stop("top_geneSymbol_hits must be numeric or NULL.")
}
mb_merge <- mb_merge |>
dplyr::group_by(Locus, SNP, geneSymbol) |>
dplyr::arrange(risk_pvalue, dplyr::desc(abs(risk_score))) |>
dplyr::slice(top_geneSymbol_hits) |>
data.table::data.table()
}
return(mb_merge)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.