Nothing
#' Obtain LD at a locus from LDlink
#'
#' Adds LD information to a 'locus' class object. It queries LDlink
#' (https://ldlink.nci.nih.gov/) via the `LDlinkR` package to retrieve linkage
#' disequilibrium (LD) information on a reference SNP.
#'
#' @param loc Object of class 'locus' generated by [locus()]
#' @param pop A 1000 Genomes Project population, (e.g. YRI or CEU), multiple
#' allowed, default = "CEU". Passed to `LDlinkR::LDmatrix()`.
#' @param r2d Either "r2" for LD r^2 or "d" for LD D', default = "r2". Passed
#' to `LDlinkR::LDmatrix()` or `LDproxy()`.
#' @param token Personal access token for accessing 1000 Genomes LD data via
#' LDlink API. See `LDlinkR` package documentation.
#' @param method Either `"proxy"` or `"matrix"`. Controls whether to use
#' `LDproxy()` or `LDmatrix()` to obtain LD data.
#' @param genome_build Choose between one of the three options: 'grch37' for
#' genome build GRCh37 (hg19), 'grch38' for GRCh38 (hg38), or
#' 'grch38_high_coverage' for GRCh38 High Coverage (hg38) 1000 Genome Project
#' data sets. Default is GRCh37 (hg19).
#' @param ... Optional arguments which are passed on to `LDlinkR::LDmatrix()` or
#' `LDlinkR::LDproxy()`
#' @return Returns a list object of class 'locus'. LD information is added as a
#' column `ld` in list element `data`.
#' @seealso [locus()]
#' @details
#' The argument `method` controls which LDlinkR function is used to retrieve LD
#' data. `LDmatrix()` is slower but usually more complete for small queries
#' (<1000 SNPs). However, it has a limit of 1000 SNPs which can be queried.
#' `LDproxy()` is faster but data on some SNPs may be absent.
#'
#' Note, SNPs have to be correctly formatted as required by LDlinkR, either as
#' rsID (works with either method) or chromosome coordinate e.g. "chr7:24966446"
#' (works with LDproxy only). Default genome build is `grch37`, see `LDproxy()`
#' or `LDmatrix()`.
#'
#' @importFrom LDlinkR LDmatrix LDexpress LDproxy
#' @export
link_LD <- function(loc,
pop = "CEU",
r2d = "r2",
token = "",
method = c("proxy", "matrix"),
genome_build = loc$genome, ...) {
if (!inherits(loc, "locus")) stop("Not a locus object")
if (!requireNamespace("LDlinkR", quietly = TRUE)) {
stop("Package 'LDlinkR' must be installed to use this feature",
call. = FALSE)
}
if (token == "") stop("token is missing")
start <- Sys.time()
labs <- loc$labs
index_snp <- loc$index_snp
snp_col <- if (grepl("rs", index_snp)) "RS_Number" else "Coord"
rslist <- loc$data[, labs]
if (length(rslist) > 1000) {
rslist <- rslist[order(loc$data$logP, decreasing = TRUE)]
rslist <- unique(c(index_snp, rslist))[seq_len(1000)]
}
method <- match.arg(method)
genome_build <- tolower(genome_build)
if (!grepl(loc$genome, genome_build, ignore.case = TRUE)) {
warning("mismatched genome build")
}
if (method == "proxy") {
ldp <- try(mem_LDproxy(index_snp, pop = pop, r2d = r2d, token = token,
genome_build = genome_build, ...))
if (!inherits(ldp, "try-error")) {
loc$data$ld <- ldp[match(loc$data[, labs], ldp[, snp_col]), "R2"]
}
} else {
message("Obtaining LD on ", length(rslist), " SNPs. ", appendLF = FALSE)
ldm <- mem_LDmatrix(rslist, pop = pop, r2d = r2d, token = token,
genome_build = genome_build, ...)
if (index_snp %in% colnames(ldm)) {
ld <- ldm[, index_snp]
loc$data$ld <- ld[match(loc$data[, labs], ldm$RS_number)]
} else message("Index SNP not found in LDlink data")
}
end <- Sys.time()
m <- sum(!is.na(loc$data$ld))
message("Matched ", m, " SNPs (", format(end - start, digits = 3),")")
loc
}
# use memoise to reduce calls to LDlink API
mem_LDmatrix <- memoise(LDlinkR::LDmatrix)
mem_LDproxy <- memoise(LDlinkR::LDproxy)
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.