#' Obtain LD at a locus from LDlink
#'
#' Adds LD information to a 'locus' class object. It queries LDlink
#' (https://ldlink.nci.nih.gov/) via the `LDlinkR` package to retrieve linkage
#' disequilibrium (LD) information on a reference SNP.
#'
#' @param loc Object of class 'locus' generated by [locus()]
#' @param pop A 1000 Genomes Project population, (e.g. YRI or CEU), multiple
#' allowed, default = "CEU". Passed to `LDlinkR::LDmatrix()`.
#' @param r2d Either "r2" for LD r^2 or "d" for LD D', default = "r2". Passed
#' to `LDlinkR::LDmatrix()` or `LDproxy()`.
#' @param token Personal access token for accessing 1000 Genomes LD data via
#' LDlink API. See `LDlinkR` package documentation.
#' @param method Either `"proxy"` or `"matrix"`. Controls whether to use
#' `LDproxy()` or `LDmatrix()` to obtain LD data.
#' @param ... Optional arguments such as `genome_build` which are passed on to
#' `LDlinkR::LDmatrix()` or `LDlinkR::LDproxy()`
#' @return Returns a list object of class 'locus'. LD information is added as a
#' column `ld` in list element `data`.
#' @seealso [locus()]
#' @details
#' The argument `method` controls which LDlinkR function is used to retrieve LD
#' data. `LDmatrix()` is slower but usually more complete for small queries
#' (<1000 SNPs). However, it has a limit of 1000 SNPs which can be queried.
#' `LDproxy()` is faster but data on some SNPs may be absent.
#'
#' Note SNPs have to be correctly formatted as required by LDlinkR, either as
#' rsID or chromosome coordinate e.g. "chr7:24966446". Default genome build is
#' `grch37`, see `LDproxy()` or `LDmatrix()`.
#'
#' @importFrom LDlinkR LDmatrix LDexpress LDproxy
#' @export
link_LD <- function(loc,
pop = "CEU",
r2d = "r2",
token = "",
method = c("proxy", "matrix"), ...) {
if (!inherits(loc, "locus")) stop("Not a locus object")
if (!requireNamespace("LDlinkR", quietly = TRUE)) {
stop("Package 'LDlinkR' must be installed to use this feature",
call. = FALSE)
}
if (token == "") stop("token is missing")
start <- Sys.time()
labs <- loc$labs
index_snp <- loc$index_snp
rslist <- loc$data[, labs]
if (length(rslist) > 1000) {
rslist <- rslist[order(loc$data$logP, decreasing = TRUE)]
rslist <- unique(c(index_snp, rslist))[seq_len(1000)]
}
method <- match.arg(method)
if (method == "proxy") {
ldp <- try(mem_LDproxy(index_snp, pop = pop, r2d = r2d, token = token, ...))
if (!inherits(ldp, "try-error")) {
loc$data$ld <- ldp[match(loc$data[, labs], ldp$RS_Number), "R2"]
}
} else {
message("Obtaining LD on ", length(rslist), " SNPs. ", appendLF = FALSE)
ldm <- mem_LDmatrix(rslist, pop = pop, r2d = r2d, token = token, ...)
if (index_snp %in% colnames(ldm)) {
ld <- ldm[, index_snp]
loc$data$ld <- ld[match(loc$data[, labs], ldm$RS_number)]
} else message("Index SNP not found in LDlink data")
}
end <- Sys.time()
m <- sum(!is.na(loc$data$ld))
message("Matched ", m, " SNPs (", format(end - start, digits = 3),")")
loc
}
# use memoise to reduce calls to LDlink API
mem_LDmatrix <- memoise(LDlinkR::LDmatrix)
mem_LDproxy <- memoise(LDlinkR::LDproxy)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.