Nothing
#' Function to query Ensembl LD data with a single rsID
#'
#' @param rsid String. Variant ID.
#' @param r2 Float. Measure of LD. If r-squared is provided only return pairs of variants whose r-squared value is equal to or greater than the value provided.
#' @param d.prime Float. Measure of LD. If D' is provided only return pairs of variants whose D' value is equal to or greater than the value provided.
#' @param window.size Integer. Window size in kb. The maximum allowed value for the window size is 500 kb. LD is computed for the given variant and all variants that are located within the specified window.
#' @param pop String. Population for which to compute LD. Use `ensemblQueryGetPops()` to retrieve a list of all populations with LD data. Default is 1000GENOMES:phase_3:EUR.
#'
#'
#' @return A dataframe.
#'
#' @import httr
#' @import xml2
#' @importFrom jsonlite fromJSON toJSON
#' @import dplyr
#' @import tidyr
#' @import vroom
#' @importFrom magrittr %>%
#'
#' @export
#'
#' @examples
#' ensemblQueryLDwithSNPwindow(rsid="rs3851179", r2=0.8, d.prime=0.8,
#' window.size=500, pop="1000GENOMES:phase_3:EUR")
#'
ensemblQueryLDwithSNPwindow = function(rsid, pop="1000GENOMES:phase_3:EUR", r2=NA, d.prime=NA, window.size=NA){
# TEST
# load libs
# require(httr)
# require(xml2)
# if( !("tidyverse" %in% (.packages())) ){
# require(jsonlite)
# }
# require(dplyr)
# require(tidyr)
# require(purrr)
# require(vroom)
# require(magrittr)
#
# rsid="rs1042779"
# r2=NA
# d.prime=NA
# window.size=NA
# pop="1000GENOMES:phase_3:EUR"
#------------------------------ check inputs -------------------------------
stopifnot(is.character(rsid))
stopifnot(is.numeric(r2) | is.character(r2) | is.na(r2))
stopifnot(is.numeric(d.prime) | is.character(d.prime) | is.na(d.prime))
stopifnot(is.character(window.size) | is.numeric(window.size) | is.na(window.size))
stopifnot(is.character(pop))
#--------------------------------- run query -------------------------------
if(is.na(r2)){r2=""}else{r2=paste0(";r2=",r2)}
if(is.na(d.prime)){d.prime=""}else{d.prime=paste0(";r2=",d.prime)}
if(is.na(window.size)){window.size=""}else{window.size=paste0("r2=",window.size)}
server <- "https://rest.ensembl.org"
ext <- paste0("/ld/human/",rsid,"/",pop,"?",window.size,r2,d.prime)
r <- httr::GET(url=paste(server, ext, sep = ""), httr::content_type("application/json"))
#-------------------- check output and write out ---------------------------
# stop_for_status(r)
# error handling, if 400 error, set res.temp as NA
if(r$status_code == 400){
message("Error 400 thrown by httr::GET. This may not be a valid SNP rsID, check using dbSNP: https://www.ncbi.nlm.nih.gov/snp/.")
res.temp = NA
} else{
# if no error, use this if you get a simple nested list back, otherwise inspect its structure
res.temp = jsonlite::fromJSON(jsonlite::toJSON(content(r))) %>%
data.frame()
}
# deal with null search result by testing for df length
# then returning an empty df if no result
if(is.data.frame(res.temp)){
if(nrow(res.temp)==0){
data.frame(rep(NA, 5), row.names = c("variation2", "population_name", "variation1", "d_prime", "r2")) %>%
t() %>%
`rownames<-`(NULL) %>%
as.data.frame() %>%
dplyr::rename(query=variation1, snp_in_ld=variation2) %>%
tidyr::unnest(cols = c(query, snp_in_ld, r2, d_prime, population_name)) %>%
dplyr::relocate(query, snp_in_ld, r2, d_prime, population_name) %>%
tibble::tibble() %>%
return()
} else{
# if not 0-row (empty) df, then deal with it normally, format and prepare for return
res.temp %>%
data.frame() %>%
dplyr::arrange(r2) %>%
dplyr::rename(query=variation1, snp_in_ld=variation2) %>%
tidyr::unnest(cols = c(query, snp_in_ld, r2, d_prime, population_name)) %>%
dplyr::relocate(query, snp_in_ld, r2, d_prime, population_name) %>%
tibble::tibble() %>%
return()
}
# deal with NA search result (result of 400 error) by testing if res.temp is NA
# then returning an empty df of the same structure
} else{
if(is.na(res.temp)){
data.frame(rep(NA, 5), row.names = c("variation2", "population_name", "variation1", "d_prime", "r2")) %>%
t() %>%
`rownames<-`(NULL) %>%
as.data.frame() %>%
dplyr::rename(query=variation1, snp_in_ld=variation2) %>%
tidyr::unnest(cols = c(query, snp_in_ld, r2, d_prime, population_name)) %>%
dplyr::relocate(query, snp_in_ld, r2, d_prime, population_name) %>%
tibble::tibble() %>%
return()
}
}
}
#' `ensemblQueryLDwithSNPwindowDataframe` applies `ensemblQueryLDwithSNPwindow` to a data.frame of rsIDs.
#'
#' @param in.table data.frame containing SNP pairs. Columns must include `rsid1` for the first member of the pair and `rsid2` for the second member of the pair.
#' @param r2 Float. Measure of LD. If r-squared is provided only return pairs of variants whose r-squared value is equal to or greater than the value provided.
#' @param d.prime Float. Measure of LD. If D' is provided only return pairs of variants whose D' value is equal to or greater than the value provided.
#' @param window.size Integer. Window size in kb. The maximum allowed value for the window size is 500 kb. LD is computed for the given variant and all variants that are located within the specified window.
#' @param pop String. Population for which to compute LD. Use `ensemblQueryGetPops()` to retrieve a list of all populations with LD data. Default is 1000GENOMES:phase_3:EUR.
#' @param cores Integer. A value between 1 and 10 is accepted, as this prevents the server returning overload-related errors.
#'
#' @return A dataframe.
#'
#' @import purrr
#' @import parallel
#' @importFrom magrittr %>%
#'
#' @export
#'
#' @examples
#' \dontrun{
#' in.table = data.frame(rsid = rep(c("rs7153434","rs1963154","rs12672022",
#' "rs3852802","rs12324408","rs56346870"), 5))
#'
#' ensemblQueryLDwithSNPwindowDataframe(in.table=in.table,
#' pop="1000GENOMES:phase_3:EUR",
#' r2=0.8,
#' d.prime=0.8,
#' window.size=500,
#' cores=1)
#'}
ensemblQueryLDwithSNPwindowDataframe = function(in.table, pop="1000GENOMES:phase_3:EUR", r2=NA, d.prime=NA, window.size=NA, cores=1){
#------------------------------ test -------------------------------
# library(purrr)
# library(parallel)
# library(magrittr)
#
# in.table = data.frame(rsid = rep(c("rs7153434","rs1963154","rs12672022","rs3852802","rs12324408","rs56346870"), 10))
# r2=0.8
# d.prime=0.8
# window.size=500
# pop="1000GENOMES:phase_3:EUR"
# cores=2
#------------------------------ check inputs -------------------------------
stopifnot(is.data.frame(in.table))
stopifnot(is.numeric(r2) | is.character(r2) | is.na(r2))
stopifnot(is.numeric(d.prime) | is.character(d.prime) | is.na(d.prime))
stopifnot(is.numeric(window.size) | is.character(window.size))
stopifnot(is.character(pop))
stopifnot(is.numeric(cores))
#--------------------------------- main ------------------------------------
if( is.data.frame(in.table)==TRUE ){
if( ("rsid" %in% colnames(in.table)) ){
message(paste("Running ensemblQueryLDwithSNPwindowDataframe to retrieve LD metrics for" , nrow(in.table), "central variants, where: \n",
paste0("The window around each central variant is ", window.size, ","),
paste0("r-squared = ",r2, ","),
paste0("D' = ", d.prime, ".")
))
# check system
sys = Sys.info()['sysname'] %>% grepl("Windows",.)
if(sys==FALSE){
# else if cores set to more than 1, parallelise
if(cores>1){
message(paste(
"Parallelising query using", cores, "cores."
))
}
parallel::mclapply(X=c(1:nrow(in.table)), mc.cores=cores, FUN=function(x){
ensemblQueryLDwithSNPwindow(rsid=in.table$rsid[x],
r2=r2,
d.prime=d.prime,
window.size=window.size,
pop=pop) %>%
tidyr::unnest(cols = c(query, snp_in_ld, r2, d_prime, population_name)) %>%
as.data.frame()
}) %>%
do.call("rbind", .) %>%
tibble::tibble() %>%
tidyr::unnest(cols = c(query, snp_in_ld, r2, d_prime, population_name)) %>%
return(.)
}else{
message("Windows OS detected. Cannot run parallel queries using parallel::mclapply. Using lapply instead.")
lapply(X=c(1:nrow(in.table)), FUN=function(x){
ensemblQueryLDwithSNPwindow(rsid=in.table$rsid[x],
r2=r2,
d.prime=d.prime,
window.size=window.size,
pop=pop) %>%
tidyr::unnest(cols = c(query, snp_in_ld, r2, d_prime, population_name)) %>%
as.data.frame()
}) %>%
do.call("rbind", .) %>%
tidyr::unnest(cols = c(query, snp_in_ld, r2, d_prime, population_name)) %>%
tibble::tibble() %>%
return(.)
}
} else{
message("Error: column rsid does not exist in in.table.")
stop()
}
} else{
message("Error: in.table is not a data.frame.")
stop()
}
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.