#' Get overlap between datatable of SNPs and scATAC peaks
#'
#' Can optionally add \code{Cicero} coaccessibility scores,
#' which are also derived from scATAC-seq data.
#' @param query_dat Genomic summary statistics data to query with.
#' @param FDR_filter Correct p-value threshold.
#' @param add_cicero Whether to include
#' \href{https://www.bioconductor.org/packages/release/bioc/html/cicero.html}{
#' cicero} results as well.
#' @param cell_type_specific Whether to use bulk or cell-type-specific data.
#' @param verbose Print messages.
#'
#' @export
#' @importFrom GenomicRanges mcols
#' @family CORCES2020
#' @source \url{https://doi.org/10.1038/s41588-020-00721-x}
#' @examples
#' query_dat <- echodata::BST1[1:100,]
#' gr.hits <- echoannot::CORCES2020_get_ATAC_peak_overlap(query_dat = query_dat)
CORCES2020_get_ATAC_peak_overlap <- function(query_dat,
FDR_filter = NULL,
add_cicero = TRUE,
cell_type_specific = TRUE,
verbose = TRUE) {
FDR <- Cicero <- NULL;
if (cell_type_specific) {
messager("CORCES2020:: Extracting overlapping",
"cell-type-specific scATAC-seq peaks",
v = verbose
)
dat <- get_CORCES2020_scATACseq_celltype_peaks()
Assay <- "scATAC"
} else {
messager("CORCES2020:: Extracting overlapping",
"bulkATAC-seq peaks from brain tissue",
v = verbose
)
dat <- get_CORCES2020_bulkATACseq_peaks()
Assay <- "bulkATAC"
}
gr.peaks_lifted <- echotabix::liftover(
dat = dat,
query_genome = "hg38",
target_genome = "hg19",
query_chrom_col = "hg38_Chromosome",
query_start_col = "hg38_Start",
query_end_col = "hg38_Stop",
as_granges = TRUE,
style = "NCBI",
verbose = FALSE
)
#### Get overlap with PEAKS ####
gr.hits <- granges_overlap(
dat1 = query_dat,
chrom_col.1 = "CHR",
start_col.1 = "POS",
dat2 = gr.peaks_lifted
)
GenomicRanges::mcols(gr.hits)["Assay"] <- Assay
if (!is.null(FDR_filter)) {
gr.hits <- subset(gr.hits, FDR < FDR_filter)
}
if (add_cicero & cell_type_specific) {
try({
# Pretty sure the Peak_IDs are shared between the
# sc-ATACseq data and cicero,
# because Cicero derives coaccess from sc-ATAC-seq data:
# http://www.cell.com/molecular-cell/retrieve/pii/S1097276518305471?_returnURL=https%3A%2F%2Flinkinghub.elsevier.com%2Fretrieve%2Fpii%2FS1097276518305471%3Fshowall%3Dtrue
## Also pretty sure that checking for cicero overlap
# only in the scATACseq gr.hits object is ok
# bc you can only test for coaccessibility if there's a
# peak to begin with.
cicero <- get_CORCES2020_cicero_coaccessibility()
cicero_dict <- c(
stats::setNames(
cicero$Coaccessibility,
cicero$Peak_ID_Peak1
),
stats::setNames(
cicero$Coaccessibility,
cicero$Peak_ID_Peak2
)
)
if (any(!is.na(cicero_dict[gr.hits$Peak_ID]))) {
gr.hits$Cicero <- cicero_dict[gr.hits$Peak_ID]
gr.cicero <- subset(gr.hits, !is.na(Cicero))
gr.cicero$Assay <- "Cicero"
messager(
"+ CORCES2020:: Cicero coaccessibility scores",
"identified for",
length(gr.cicero), "/", length(gr.hits), "peak hits.",
v = verbose
)
gr.hits <- rbind_granges(
gr1 = gr.hits,
gr2 = gr.cicero
)
} else {
messager("+ CORCES2020:: No Cicero hits found.", v = verbose)
}
})
}
if (cell_type_specific == FALSE) {
GenomicRanges::mcols(gr.hits)["brain"] <- 1
}
return(gr.hits)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.