R/studyLocus2GeneTable.R

Defines functions studyLocus2GeneTable

Documented in studyLocus2GeneTable

#' Retrieve the locus-to-gene (L2G) data table for loci genes.
#'
#' This function fetches the locus-to-gene (L2G) pipeline summary data table for
#' the neighboring genes of a variant in a GWAS study.
#'
#' @param study_id Character: Study ID(s) generated by Open Targets Genetics (e.g GCST90002357).
#' @param variant_id Character: generated ID for variants by Open Targets Genetics (e.g. 1_154119580_C_A) or rsId (rs2494663).
#'
#' @return Returns a data frame with the summary statistics of the study and a data table containing various
#' calculated scores and features for any lead variant. The output table has the following data structure:
#'
#' \itemize{
#'   \item{\code{studyId}:} \emph{Character}. Study ID.
#'   \item{\code{variant.id}:} \emph{Character}. Variant ID.
#'   \item{\code{variant.rsId}:} \emph{Character}. Variant rsID.
#'   \item{\code{yProbaDistance}:} \emph{Numeric}. Distance score.
#'   \item{\code{yProbaModel}:} \emph{Numeric}. Model score.
#'   \item{\code{yProbaMolecularQTL}:} \emph{Numeric}. Molecular QTL score.
#'   \item{\code{yProbaPathogenicity}:} \emph{Numeric}. Pathogenicity score.
#'   \item{\code{yProbaInteraction}:} \emph{Numeric}. Interaction score.
#'   \item{\code{hasColoc}:} \emph{Logical}. Indicates if colocalization data is available.
#'   \item{\code{distanceToLocus}:} \emph{Numeric}. Distance to the locus.
#'   \item{\code{gene.id}:} \emph{Character}. Gene ID.
#'   \item{\code{gene.symbol}:} \emph{Character}. Gene symbol.
#' }
#'
#' @examples
#' \dontrun{
#' result <- studyLocus2GeneTable(study_id = "GCST90002357", variant_id = "1_154119580_C_A")
#' result <- studyLocus2GeneTable(study_id = "GCST90002357", variant_id = "rs2494663")
#'}
#' @importFrom magrittr %>%
#' @export
#'

studyLocus2GeneTable <- function(study_id, variant_id) {
  ## Set up to query Open Targets Genetics API
  tryCatch({
    cli::cli_progress_step("Connecting to the Open Targets Genetics GraphQL API...", spinner = TRUE)
    otg_cli <- ghql::GraphqlClient$new(url = "https://api.genetics.opentargets.org/graphql")
    otg_qry <- ghql::Query$new()

    # Check variant id format
    if (grepl(pattern = "rs\\d+", variant_id)) {
      # Convert rs id to variant id
      query_searchid <- "query rsi2vid($queryString:String!) {
        search(queryString:$queryString){
          totalVariants
          variants{
            id
          }
        }
      }"

      variables <- list(queryString = variant_id)
      otg_qry$query(name = "rsi2vid", x = query_searchid)
      id_result <- jsonlite::fromJSON(otg_cli$exec(otg_qry$queries$rsi2vid, variables), flatten = TRUE)$data
      input_variant_id <- id_result$search$variants$id
    } else if (grepl(pattern = "\\d+_\\d+_[a-zA-Z]+_[a-zA-Z]+", variant_id)) {
      input_variant_id <- variant_id
    } else {
      stop("\n Please provide a variant Id")
    }

    ## Query for GWAS study locus details
    query <- "query l2gQuery($studyId: String!, $variantId: String!){
      studyLocus2GeneTable(studyId: $studyId, variantId: $variantId){
        study{
          studyId
          traitReported
        }
        variant {
          id
          rsId
        }
        rows {
          gene {
            id
            symbol
          }
          yProbaDistance
          yProbaModel
          yProbaMolecularQTL
          yProbaPathogenicity
          yProbaInteraction
          hasColoc
          distanceToLocus
        }
      }
    }"

    ## Execute the query

    variables <- list(studyId = study_id, variantId = input_variant_id)

    otg_qry$query(name = "l2g_query", x = query)

    cli::cli_progress_step(paste("Downloading data for ", study_id, ",", variant_id, "..."), spinner = TRUE)

    study_l2g <- jsonlite::fromJSON(otg_cli$exec(otg_qry$queries$l2g_query, variables), flatten = TRUE)$data
    df_l2g <- data.frame()
    df_rows <- as.data.frame(study_l2g$studyLocus2GeneTable$rows)
    if (nrow(df_rows) != 0) {
      if (is.null(study_l2g$studyLocus2GeneTable$variant$rsId)) {
        study_l2g$studyLocus2GeneTable$variant$rsId <- NA
      }
      df_l2g <- as.data.frame(study_l2g$studyLocus2GeneTable)
      df_l2g <- df_l2g %>% dplyr::mutate(across(where(is.numeric), ~ round(., 2)))
      base::colnames(df_l2g) <- stringr::str_replace_all(colnames(df_l2g), "rows.", "")
    }
    df_l2g <- df_l2g %>% dplyr::as_tibble()
    return(df_l2g)
  }, error = function(e) {
    # Handling connection timeout
    if(grepl("Timeout was reached", e$message)) {
      stop("Connection timeout reached while connecting to the Open Targets Genetics GraphQL API.")
    } else {
      stop(e) # Handle other types of errors
    }
  })
}

Try the otargen package in your browser

Any scripts or data that you put into this service are public.

otargen documentation built on Sept. 30, 2024, 9:43 a.m.