R/link_recomb.R

Defines functions get_recomb query_recomb link_recomb

Documented in link_recomb

#' Query UCSC for Recombination data 
#' 
#' Adds recombination data to a 'locus' object by querying UCSC genome browser.
#' 
#' @param loc Object of class 'locus' generated by [locus()]
#' @param genome Either `"hg38"` or `"hg19"`
#' @param table Optional character value specifying which recombination table to
#'   use.
#' @param recomb Optional `GRanges` class object of recombination data.
#' @details
#' Uses the `rtracklayer` package to query UCSC genome browser for recombination
#' rate data.
#' 
#' Possible options for `table` for hg19 are `"hapMapRelease24YRIRecombMap"`,
#' `"hapMapRelease24CEURecombMap"`, `"hapMapRelease24CombinedRecombMap"` (the
#' default). The only option for `table` for hg38 is `"recomb1000GAvg"` (the
#' default).
#' 
#' If you are doing many queries, it may be much faster to download the entire 
#' recombination track data (around 30 MB for hg38) from the Recombination Rate 
#' Tracks page at
#' [UCSC genome browser](https://genome.ucsc.edu/cgi-bin/hgTrackUi?g=recombRate2).
#' The link to the hg38 download folder is 
#' <http://hgdownload.soe.ucsc.edu/gbdb/hg38/recombRate/> and for hg19 is 
#' <http://hgdownload.soe.ucsc.edu/gbdb/hg19/decode/>. These .bw files can be
#' converted to useable `GRanges` objects using `rtracklayer::import.bw()` (see
#' the vignette).
#' 
#' Sometimes `rtracklayer` generates intermittent API errors or warnings: try
#' calling `link_recomb()` again. If warnings persist restart your R session.
#' Errors are handled gracefully using `try()` to allow users to wrap
#' `link_recomb()` in a loop without quitting halfway. Error messages are still
#' shown. Successful API calls are cached using `memoise` to reduce API
#' requests.
#' 
#' @returns A list object of class 'locus'. Recombination data is added as list
#'   element `recomb`.
#' @importFrom GenomeInfoDb genome<- seqnames
#' @importFrom GenomicRanges GRanges
#' @importFrom IRanges IRanges
#' @importFrom rtracklayer browserSession ucscTableQuery getTable
#' @importFrom memoise drop_cache
#' @export
#'
link_recomb <- function(loc, genome = "hg38", table = NULL, 
                        recomb = NULL) {
  if (!inherits(loc, "locus")) stop("Not a locus object")
  if (!is.null(recomb)) {
    seqname <- loc$seqname
    if (!grepl("chr", seqname)) seqname <- paste0("chr", seqname)
    rec <- recomb[seqnames(recomb) == seqname, ]
    rec <- rec[end(rec) > loc$xrange[1] & start(rec) < loc$xrange[2], ]
    rec <- as.data.frame(rec)[, c("start", "end", "score")]
    colnames(rec)[3] <- "value"
    loc$recomb <- rec
    return(loc)
  }
  loc$recomb <- get_recomb(genome, loc$xrange, loc$seqname, table)
  loc
}


query_recomb <- function(gen, xrange, seqname, table = NULL) {
  if (is.null(table)) {
    table <- if (gen == "hg38") {"recomb1000GAvg"
    } else if (gen == "hg19") "hapMapRelease24CombinedRecombMap"
  }
  if (!grepl("chr", seqname)) seqname <- paste0("chr", seqname)
  gr <- GRanges(ranges = IRanges(start = xrange[1], end = xrange[2]),
                seqnames = seqname)
  message("Retrieving recombination data from UCSC")
  session <- browserSession("UCSC")
  genome(session) <- gen
  query <- ucscTableQuery(session, table = table, range = gr)
  gtab <- try(getTable(query))
  if (inherits(gtab, "try-error")) return(NULL)
  gtab
}

# use memoise to reduce calls to UCSC API
mem_query_recomb <- memoise(query_recomb)

# drop memoise cache if error occurs
get_recomb <- function(gen, xrange, seqname, table = NULL) {
  ret <- mem_query_recomb(gen, xrange, seqname, table)
  if (is.null(ret)) drop_cache(mem_query_recomb)(gen, xrange, seqname, table)
  ret
}
myles-lewis/locuszoomr documentation built on April 16, 2024, 11:13 p.m.