R/studyInfo.R

Defines functions studyInfo

Documented in studyInfo

#' Retrieve study summary information.
#'
#' For a given study id, this function returns a data frame of relevant information about
#' the GWAS study, such as PubMed ID, studied trait EFO ID, case/control size, etc.
#'
#' @param study_id Character:  Study ID(s) generated by Open Targets Genetics (e.g GCST90002357).
#'
#' @return Returns a data frame (in tibble format) containing the summary iformation about a GWAS study. The data frame has the following data structure:
#' \itemize{
#'   \item{\code{studyId}:} \emph{Character}. Study ID.
#'   \item{\code{traitReported}:} \emph{Character}. Reported trait.
#'   \item{\code{source}:} \emph{Character}. Source.
#'   \item{\code{traitEfos}:} \emph{Character}. Trait EFO ID.
#'   \item{\code{pmid}:} \emph{Character}. PubMed ID.
#'   \item{\code{pubDate}:} \emph{Character}. Publication date.
#'   \item{\code{pubJournal}:} \emph{Character}. Publication journal.
#'   \item{\code{pubTitle}:} \emph{Character}. Publication title.
#'   \item{\code{pubAuthor}:} \emph{Character}. Publication author.
#'   \item{\code{hasSumstats}:} \emph{Character}. Indicates if the study has summary statistics.
#'   \item{\code{ancestryInitial}:} \emph{Character}. Initial ancestry.
#'   \item{\code{nInitial}:} \emph{Character}. Initial sample size.
#'   \item{\code{nReplication}:} \emph{Character}. Replication sample size.
#'   \item{\code{traitCategory}:} \emph{Character}. Trait category.
#'   \item{\code{numAssocLoci}:} \emph{Character}. Number of associated loci.
#'   \item{\code{nTotal}:} \emph{Character}. Total sample size.
#' }
#'
#' @examples
#' \dontrun{
#' result <- studyInfo(study_id = "GCST90002357")
#'}
#' @importFrom magrittr %>%
#' @export
#'
#'

studyInfo <- function(study_id) {

  # Check if the study ID argument is empty or null
  if (missing(study_id) || is.null(study_id) || study_id == "") {
    message("Please provide a value for the study ID argument.")
    return(NULL)
  }

  # Set up to query Open Targets Genetics API
  variables <- list(studyId = study_id )
tryCatch({
  cli::cli_progress_step("Connecting to the Open Targets Genetics GrpahQL API...", spinner = TRUE)
  otg_cli <- ghql::GraphqlClient$new(url = "https://api.genetics.opentargets.org/graphql")
  otg_qry <- ghql::Query$new()

  # Define the query
  query <- "query studyInfoQuery($studyId: String!){
    studyInfo(studyId: $studyId){
      studyId
      traitReported
      source
      traitEfos
      pmid
      pubDate
      pubJournal
      pubTitle
      pubAuthor
      hasSumstats
      ancestryInitial
      ancestryReplication
      nInitial
      nReplication
      nCases
      traitCategory
      numAssocLoci
      nTotal
    }
  }"

  # Execute the query
  output_tb <- data.frame()
  otg_qry$query(name = "studyInfoQuery", x = query)

  cli::cli_progress_step("Downloading data...", spinner = TRUE)

  result <- jsonlite::fromJSON(otg_cli$exec(otg_qry$queries$studyInfoQuery, variables),
                               simplifyDataFrame = TRUE, flatten = TRUE)$data
  output <- result$studyInfo

  output[output == "NULL"] <- NA # replacing NULL elements with NA

  if (length(output) != 0) {
    output_tb <- tibble::as_tibble(stack(unlist(output)) %>%
                                     tidyr::spread(ind, values)) # converting list of information key/value pairs to tibble format
  }

  return(output_tb)
}, error = function(e) {
  # Handling connection timeout
  if(grepl("Timeout was reached", e$message)) {
    stop("Connection timeout reached while connecting to the Open Targets Genetics GraphQL API.")
  } else {
    stop(e) # Handle other types of errors
  }
})
}

Try the otargen package in your browser

Any scripts or data that you put into this service are public.

otargen documentation built on Sept. 30, 2024, 9:43 a.m.