#' Query an junction using snaptron
#'
#' \code{query_snaptron_junc} takes as input the junction co-ordinates and
#' returns the details of matching junctions found in the snapton database
#'
#' @param database Character scalar. Database of junctions - "srav1", "srav2",
#' "gtex"
#' @param regions Character scalar. Genomic co-ordinates of the junctions you
#' want to query in format - "chr1:1-100"
#' @param rfilter Character vector. Filters to use for returned junctions -
#' "length", "strand", "annotated" etc
#' @param rfilter_values Character vector. Corresponding to rfilter parameters,
#' need to be prefaced by the type of test or ":" - e.g. "<:5000" or ":1"
#' @param method Character scalar. Do you want all junctions within the region
#' or only those precisely matching the start and stop?
#'
#' @return df with details of junctions that match criteria from snaptron
#' @export
#'
#' @examples
#' query_snaptron_junc(database = "gtex", regions = "chr4:89729278-89771396",
#' rfilter = c("length", "annotated"), rfilter_values = c("<:10000", ":0"))
#' query_snaptron_junc(database = "srav2",
#' regions = c("chr4:89729278-89771396", "chr4:89729278-89729407"),
#' method = "exact", tmp_dir = "/home/dzhang/")
#'
#' @seealso For more details on input and output parameters see:
#' \url{http://snaptron.cs.jhu.edu/reftables.html}
query_snaptron_junc <- function(database, regions, rfilter = NULL, rfilter_values = NULL, method = "contains", tmp_dir = "."){
# build base of snaptron query
snaptron_curl_query <- stringr::str_c("curl 'http://snaptron.cs.jhu.edu/", database, "/snaptron?")
# add regions to look up
regions_query <-
stringr::str_c("regions=",
regions,
collapse = "&")
snaptron_curl_query <- stringr::str_c(snaptron_curl_query, regions_query)
# add method to extract matching junctions
if(!(method %in% c("contains", "exact"))) stop("Method must be one of: contains, exact")
snaptron_curl_query <- stringr::str_c(snaptron_curl_query, "&", method, "=1")
# filtering for junction details
if(!is.null(rfilter)){
if(!all(rfilter %in% c("length", "annotated", "left_annotated", "right_annotated", "strand", "samples_count", "coverage_sum", "coverage_avg", "coverage_median"))){
stop("Not all filters match those of snaptron - check http://snaptron.cs.jhu.edu/reftables.html")
}
if(!all(stringr::str_detect(rfilter_values, ":"))) stop("No ':' found in at least 1 rfilter_values - check http://snaptron.cs.jhu.edu/reftables.html")
rfilter_query <-
stringr::str_c("rfilter=",
rfilter,
rfilter_values,
collapse = "&")
snaptron_curl_query <-
snaptron_curl_query %>%
stringr::str_c("&", rfilter_query)
}
# add output path to tmp save results
snaptron_curl_query_w_output_path <-
snaptron_curl_query %>%
stringr::str_c("' > ", tmp_dir, "/tmp_snaptron.txt")
print(stringr::str_c("Searching junction in ", database , " using snaptron"))
system(command = snaptron_curl_query_w_output_path)
suppressMessages(
snaptron_result <-
readr::read_delim(stringr::str_c(tmp_dir, "/tmp_snaptron.txt"), delim = "\t")
)
# delete the tmp
if(file.exists(stringr::str_c(tmp_dir, "/tmp_snaptron.txt"))){
file.remove(stringr::str_c(tmp_dir, "/tmp_snaptron.txt"))
}
return(snaptron_result)
}
#' Query samples using snaptron
#'
#' \code{query_snaptron_samples} find the metadata for the samples of interest
#'
#' @param sample_info either df outputed from \code{\link{query_snaptron_junc}}
#' or integer vector of rail ids or list of integer vectors of rail ids
#' @inheritParams query_snaptron_junc
#'
#' @return list containing df, with each element correponding to sample metadata
#' for 1 junction
#' @export
#'
#' @examples
#' query_snaptron_samples(database = "srav2", sample_info = c(1,2))
#' query_snaptron_samples(database = "srav2", sample_info = list(c(1, 2), c(3, 4)))
#' query_snaptron_samples(database = "gtex", sample_info = query_snaptron_junc_output)
#'
#' @seealso For more details on input and output parameters see:
#' \url{http://snaptron.cs.jhu.edu/reftables.html}
#' @seealso Sample metadata downloaded from
#' \url{http://snaptron.cs.jhu.edu/data/}
query_snaptron_samples <- function(database, sample_info){
if(database == "srav2"){
suppressMessages(
suppressWarnings(
sample_metadata <-
readr::read_delim("/data/snaptron/sample_meta/samples.tsv", delim = "\t")
))
}else{
suppressMessages(
suppressWarnings(
sample_metadata <-
readr::read_delim("/data/recount/GTEx_SRP012682/gtex_recount_meta_data_tidy.txt", delim = "\t") %>%
dplyr::rename(rail_id = recount_id)
))
}
# convert any input format into list of rail ids as int vectors
if(is.data.frame(sample_info)){
if(!("samples" %in% colnames(sample_info))) stop("sample column not found in input dataframe")
sample_info_ids_list <-
sample_info$samples %>%
stringr::str_replace("^,", "") %>% # remove the "," that's always at the start
stringr::str_split(",") %>%
lapply(FUN = stringr::str_replace, pattern = ":.*", replacement = "") %>%
lapply(FUN = as.integer)
}else if(is.list(sample_info)){
sample_info_ids_list <-
lapply(sample_info, FUN = as.integer)
}else if(is.vector(sample_info)){
sample_info_ids_list <-
sample_info %>%
as.integer() %>%
list()
}
filter_sample_metadata <- function(sample_info_ids, sample_metadata){
sample_metadata_filtered <-
sample_metadata %>%
dplyr::filter(rail_id %in% sample_info_ids)
print(stringr::str_c("Found metadata for ", nrow(sample_metadata_filtered), "/", length(sample_info_ids), " samples"))
return(sample_metadata_filtered)
}
sample_metadata_list <-
sample_info_ids_list %>%
lapply(X = ., FUN = filter_sample_metadata, sample_metadata = sample_metadata)
return(sample_metadata_list)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.