R/link_LD.R

Defines functions link_LD

Documented in link_LD

#' Obtain LD at a locus from LDlink
#' 
#' Adds LD information to a 'locus' class object. It queries LDlink
#' (https://ldlink.nci.nih.gov/) via the `LDlinkR` package to retrieve linkage
#' disequilibrium (LD) information on a reference SNP.
#' 
#' @param loc Object of class 'locus' generated by [locus()] 
#' @param pop A 1000 Genomes Project population, (e.g. YRI or CEU), multiple 
#' allowed, default = "CEU". Passed to `LDlinkR::LDmatrix()`.
#' @param r2d Either "r2" for LD r^2 or "d" for LD D', default = "r2". Passed 
#' to `LDlinkR::LDmatrix()` or `LDproxy()`.
#' @param token Personal access token for accessing 1000 Genomes LD data via 
#' LDlink API. See `LDlinkR` package documentation.
#' @param method Either `"proxy"` or `"matrix"`. Controls whether to use
#'    `LDproxy()` or `LDmatrix()` to obtain LD data.
#' @param genome_build Choose between one of the three options: 'grch37' for
#'   genome build GRCh37 (hg19), 'grch38' for GRCh38 (hg38), or
#'   'grch38_high_coverage' for GRCh38 High Coverage (hg38) 1000 Genome Project
#'   data sets. Default is GRCh37 (hg19).
#' @param ... Optional arguments which are passed on to `LDlinkR::LDmatrix()` or
#'   `LDlinkR::LDproxy()`
#' @return Returns a list object of class 'locus'. LD information is added as a
#'   column `ld` in list element `data`.
#' @seealso [locus()]
#' @details
#' The argument `method` controls which LDlinkR function is used to retrieve LD
#' data. `LDmatrix()` is slower but usually more complete for small queries
#' (<1000 SNPs). However, it has a limit of 1000 SNPs which can be queried.
#' `LDproxy()` is faster but data on some SNPs may be absent.
#' 
#' Note, SNPs have to be correctly formatted as required by LDlinkR, either as
#' rsID (works with either method) or chromosome coordinate e.g. "chr7:24966446"
#' (works with LDproxy only). Default genome build is `grch37`, see `LDproxy()`
#' or `LDmatrix()`.
#' 
#' @importFrom LDlinkR LDmatrix LDexpress LDproxy
#' @export

link_LD <- function(loc,
                    pop = "CEU",
                    r2d = "r2",
                    token = "",
                    method = c("proxy", "matrix"),
                    genome_build = loc$genome, ...) {
  if (!inherits(loc, "locus")) stop("Not a locus object")
  if (!requireNamespace("LDlinkR", quietly = TRUE)) {
    stop("Package 'LDlinkR' must be installed to use this feature",
         call. = FALSE)
  }
  if (token == "") stop("token is missing")
  
  start <- Sys.time()
  labs <- loc$labs
  index_snp <- loc$index_snp
  snp_col <- if (grepl("rs", index_snp)) "RS_Number" else "Coord"
  rslist <- loc$data[, labs]
  if (length(rslist) > 1000) {
    rslist <- rslist[order(loc$data$logP, decreasing = TRUE)]
    rslist <- unique(c(index_snp, rslist))[seq_len(1000)]
  }
  method <- match.arg(method)
  genome_build <- tolower(genome_build)
  if (!grepl(loc$genome, genome_build, ignore.case = TRUE)) {
    warning("mismatched genome build")
  }
  if (method == "proxy") {
    ldp <- try(mem_LDproxy(index_snp, pop = pop, r2d = r2d, token = token,
                           genome_build = genome_build, ...))
    if (!inherits(ldp, "try-error")) {
      loc$data$ld <- ldp[match(loc$data[, labs], ldp[, snp_col]), "R2"]
    }
  } else {
    message("Obtaining LD on ", length(rslist), " SNPs. ", appendLF = FALSE)
    ldm <- mem_LDmatrix(rslist, pop = pop, r2d = r2d, token = token,
                        genome_build = genome_build, ...)
    if (index_snp %in% colnames(ldm)) {
      ld <- ldm[, index_snp]
      loc$data$ld <- ld[match(loc$data[, labs], ldm$RS_number)]
    } else message("Index SNP not found in LDlink data")
  }
  end <- Sys.time()
  m <- sum(!is.na(loc$data$ld))
  message("Matched ", m, " SNPs (", format(end - start, digits = 3),")")
  
  loc
}


# use memoise to reduce calls to LDlink API
mem_LDmatrix <- memoise(LDlinkR::LDmatrix)

mem_LDproxy <- memoise(LDlinkR::LDproxy)

Try the locuszoomr package in your browser

Any scripts or data that you put into this service are public.

locuszoomr documentation built on April 12, 2025, 2:28 a.m.