R/qtlCredibleSet.R

Defines functions qtlCredibleSet

Documented in qtlCredibleSet

#' Retrieve calculated QTL summary statistics for credible variant set.
#'
#' In Open Targets Genetics, the lead variants are expanded into a more comprehensive set of candidate causal variants referred to as the tag variants.
#' This function retrieves calculated summary statistics for tag variants included in a lead variant colocalization analysis for a
#' given study (which links a top loci with a trait). The user can filter the results by desired biofeature (e.g tissue, cell type,...)
#' the function obtains tag variant information.
#'
#' @param study_id Character:  Study ID(s) generated by Open Targets Genetics (e.g GCST90002357).
#' @param variant_id Character: generated ID for variants by Open Targets Genetics (e.g. 1_154119580_C_A) or rsId (rs2494663).
#' @param gene Character: Gene ENSEMBL ID (e.g. ENSG00000169174) or gene symbol (e.g. PCSK9).
#' @param biofeature Character: Represents either a tissue, cell type, aggregation type, protein type, etc.
#'
#' @return Returns a data frame of results from the QTL credible set of variants consisting of the following columns:
#' \itemize{
#'   \item{\code{tagVariant.id}:} \emph{Character vector}. Tag variant ID.
#'   \item{\code{tagVariant.rsId}:} \emph{Character vector}. Tag variant rsID.
#'   \item{\code{pval}:} \emph{Numeric}. P-value.
#'   \item{\code{se}:} \emph{Numeric}. Standard error.
#'   \item{\code{beta}:} \emph{Numeric}. Beta value.
#'   \item{\code{postProb}:} \emph{Numeric}. Posterior probability.
#'   \item{\code{MultisignalMethod}:} \emph{Character vector}. Multisignal method.
#'   \item{\code{logABF}:} \emph{Numeric}. Logarithm of approximate Bayes factor.
#'   \item{\code{is95}:} \emph{Logical}. Indicates if the variant has a 95% confidence.
#'   \item{\code{is99}:} \emph{Logical}. Indicates if the variant has a 99% confidence.
#' }
#'
#' @examples
#' \dontrun{
#' result <- qtlCredibleSet(study_id = "Braineac2", variant_id = "1_55053079_C_T",
#'     gene = "ENSG00000169174", biofeature = "SUBSTANTIA_NIGRA")
#' result <- qtlCredibleSet(study_id = "Braineac2", variant_id = "rs7552841",
#'     gene = "PCSK9", biofeature = "SUBSTANTIA_NIGRA")
#'}
#'
#' @importFrom magrittr %>%
#' @export
#'
#'

qtlCredibleSet <- function(study_id, variant_id, gene, biofeature) {
  # Check if arguments are empty
  if (missing(study_id) || is.null(study_id) ||
      missing(variant_id) || is.null(variant_id) ||
      missing(gene) || is.null(gene) ||
      missing(biofeature) || is.null(biofeature)) {
    message("Please provide values for all the arguments: study_id, variant_id, gene, and biofeature.")
    return(NULL)
  }

  ## Set up to query Open Targets Genetics API

tryCatch({
  cli::cli_progress_step("Connecting the database...", spinner = TRUE)
  otg_cli <- ghql::GraphqlClient$new(url = "https://api.genetics.opentargets.org/graphql")
  otg_qry <- ghql::Query$new()



  # Check gene format
  # Query for gene name search
  query_search <- "query convertnametoid($queryString:String!) {
    search(queryString:$queryString){
      genes{
        id
        symbol
      }
      }
    }"

  if (!grepl(pattern = "ENSG\\d{11}", gene)){
    variables <- list(queryString = gene)
    otg_qry$query(name = "convertnametoid", x = query_search)
    id_result <- jsonlite::fromJSON(otg_cli$exec(otg_qry$queries$convertnametoid, variables), flatten = TRUE)$data
    id <- as.data.frame(id_result$search$genes)
    if (nrow(id)!=0){
      name_match <- id[id$symbol == gene, ]
      gene_input <- name_match$id
    } else{
      stop ("\nPlease provide Ensemble gene ID or gene name")
    }
  } else{
    gene_input <- gene
  }

  # Check variant id format
  if (grepl(pattern = "rs\\d+", variant_id)) {

    # Convert rs id to variant id
    query_searchid <- "query ConvertRSIDtoVID($queryString:String!) {
    search(queryString:$queryString){
      totalVariants
      variants{
        id
        }
      }
    }"

    variables <- list(queryString = variant_id)
    otg_qry$query(name = "convertid", x = query_searchid)
    id_result <- jsonlite::fromJSON(otg_cli$exec(otg_qry$queries$convertid, variables), flatten=TRUE)$data
    input_variant_id <- id_result$search$variants$id
  }

  else if (grepl(pattern = "\\d+_\\d+_[a-zA-Z]+_[a-zA-Z]+", variant_id))
  {
    input_variant_id <- variant_id
  }
  else
  {
    stop("\nPlease provide a variant ID")
  }

  query <- "query qtlcredsetquery($studyId: String!, $variantId: String!, $geneId: String!, $bioFeature: String!){
  qtlCredibleSet(studyId: $studyId, variantId: $variantId, geneId: $geneId, bioFeature: $bioFeature) {
  tagVariant {
      id
      rsId
    }
  pval
  se
  beta
  postProb
  MultisignalMethod
  logABF
  is95
  is99
}
}"

variables <- list(studyId = study_id, variantId = input_variant_id, geneId = gene_input, bioFeature = biofeature)
otg_qry$query(name = "qtlcredset_query", x = query)

## Execute the query

cli::cli_progress_step("Downloading data...", spinner = TRUE)
qtl_cred_set <- jsonlite::fromJSON(otg_cli$exec(otg_qry$queries$qtlcredset_query, variables, flatten = TRUE))$data
df_qtl_cred <- as.data.frame(qtl_cred_set)
base::colnames(df_qtl_cred) <- stringr::str_replace_all(colnames(df_qtl_cred),"qtlCredibleSet.","")

return(df_qtl_cred)

}, error = function(e) {
  # Handling connection timeout
  if(grepl("Timeout was reached", e$message)) {
    stop("Connection timeout reached while connecting to the Open Targets Genetics GraphQL API.")
  } else {
    stop(e) # Handle other types of errors
  }
})
}

Try the otargen package in your browser

Any scripts or data that you put into this service are public.

otargen documentation built on Sept. 30, 2024, 9:43 a.m.