R/link_LD.R

Defines functions link_LD

Documented in link_LD

#' Obtain LD at a locus from LDlink
#' 
#' Adds LD information to a 'locus' class object. It queries LDlink
#' (https://ldlink.nci.nih.gov/) via the `LDlinkR` package to retrieve linkage
#' disequilibrium (LD) information on a reference SNP.
#' 
#' @param loc Object of class 'locus' generated by [locus()] 
#' @param pop A 1000 Genomes Project population, (e.g. YRI or CEU), multiple 
#' allowed, default = "CEU". Passed to `LDlinkR::LDmatrix()`.
#' @param r2d Either "r2" for LD r^2 or "d" for LD D', default = "r2". Passed 
#' to `LDlinkR::LDmatrix()` or `LDproxy()`.
#' @param token Personal access token for accessing 1000 Genomes LD data via 
#' LDlink API. See `LDlinkR` package documentation.
#' @param method Either `"proxy"` or `"matrix"`. Controls whether to use
#'    `LDproxy()` or `LDmatrix()` to obtain LD data.
#' @param ... Optional arguments such as `genome_build` which are passed on to
#'   `LDlinkR::LDmatrix()` or `LDlinkR::LDproxy()`
#' @return Returns a list object of class 'locus'. LD information is added as a
#'   column `ld` in list element `data`.
#' @seealso [locus()]
#' @details
#' The argument `method` controls which LDlinkR function is used to retrieve LD
#' data. `LDmatrix()` is slower but usually more complete for small queries
#' (<1000 SNPs). However, it has a limit of 1000 SNPs which can be queried.
#' `LDproxy()` is faster but data on some SNPs may be absent.
#' 
#' Note SNPs have to be correctly formatted as required by LDlinkR, either as
#' rsID or chromosome coordinate e.g. "chr7:24966446". Default genome build is
#' `grch37`, see `LDproxy()` or `LDmatrix()`.
#' 
#' @importFrom LDlinkR LDmatrix LDexpress LDproxy
#' @export

link_LD <- function(loc,
                    pop = "CEU",
                    r2d = "r2",
                    token = "",
                    method = c("proxy", "matrix"), ...) {
  if (!inherits(loc, "locus")) stop("Not a locus object")
  if (!requireNamespace("LDlinkR", quietly = TRUE)) {
    stop("Package 'LDlinkR' must be installed to use this feature",
         call. = FALSE)
  }
  if (token == "") stop("token is missing")
  
  start <- Sys.time()
  labs <- loc$labs
  index_snp <- loc$index_snp
  rslist <- loc$data[, labs]
  if (length(rslist) > 1000) {
    rslist <- rslist[order(loc$data$logP, decreasing = TRUE)]
    rslist <- unique(c(index_snp, rslist))[seq_len(1000)]
  }
  method <- match.arg(method)
  if (method == "proxy") {
    ldp <- try(mem_LDproxy(index_snp, pop = pop, r2d = r2d, token = token, ...))
    if (!inherits(ldp, "try-error")) {
      loc$data$ld <- ldp[match(loc$data[, labs], ldp$RS_Number), "R2"]
    }
  } else {
    message("Obtaining LD on ", length(rslist), " SNPs. ", appendLF = FALSE)
    ldm <- mem_LDmatrix(rslist, pop = pop, r2d = r2d, token = token, ...)
    if (index_snp %in% colnames(ldm)) {
      ld <- ldm[, index_snp]
      loc$data$ld <- ld[match(loc$data[, labs], ldm$RS_number)]
    } else message("Index SNP not found in LDlink data")
  }
  end <- Sys.time()
  m <- sum(!is.na(loc$data$ld))
  message("Matched ", m, " SNPs (", format(end - start, digits = 3),")")
  
  loc
}


# use memoise to reduce calls to LDlink API
mem_LDmatrix <- memoise(LDlinkR::LDmatrix)

mem_LDproxy <- memoise(LDlinkR::LDproxy)
myles-lewis/locuszoomr documentation built on April 16, 2024, 11:13 p.m.