R/ctg_bulk_fetch.R
In clintrialx: Connect and Work with Clinical Trials Data Sources

Documented in ctg_bulk_fetch

#' Bulk Fetch Clinical Trial Data from ClinicalTrials.gov API
#'
#' This function retrieves clinical trial data in bulk from the ClinicalTrials.gov API based on
#' specified parameters. It handles pagination and returns a combined dataset.
#'
#' @param condition Character string specifying the condition to search for.
#' @param location Character string specifying the location to search in.
#' @param title Character string specifying the title to search for.
#' @param intervention Character string specifying the intervention to search for.
#' @param status A character vector specifying the recruitment status of the trials. Allowed values are:
#'               Valid values include:
#'               \itemize{
#'                 \item \code{ACTIVE_NOT_RECRUITING} - Studies that are actively conducting but not
#'                       recruiting participants.
#'                 \item \code{COMPLETED} - Studies that have completed all phases.
#'                 \item \code{ENROLLING_BY_INVITATION} - Studies that are enrolling participants
#'                       by invitation only.
#'                 \item \code{NOT_YET_RECRUITING} - Studies that have not yet started recruiting.
#'                 \item \code{RECRUITING} - Studies that are actively recruiting participants.
#'                 \item \code{SUSPENDED} - Studies that are temporarily halted.
#'                 \item \code{TERMINATED} - Studies that have been terminated before completion.
#'                 \item \code{WITHDRAWN} - Studies that have been withdrawn before enrollment.
#'                 \item \code{AVAILABLE} - Studies that are available.
#'                 \item \code{NO_LONGER_AVAILABLE} - Studies that are no longer available.
#'                 \item \code{TEMPORARILY_NOT_AVAILABLE} - Studies that are temporarily not available.
#'                 \item \code{APPROVED_FOR_MARKETING} - Studies that have been approved for marketing.
#'                 \item \code{WITHHELD} - Studies that have data withheld.
#'                 \item \code{UNKNOWN} - Studies with an unknown status.
#'               }
#'
#' @return A data frame containing the fetched clinical trial data.
#'
#' @import httr
#' @import readr
#' @import dplyr
#' @import progress
#'
#'
#' @examples
#' \dontrun{
#' trials <- ctg_bulk_fetch(location="india")
#' }
#' @export


ctg_bulk_fetch <- function(condition = NULL, location = NULL, title = NULL,
                           intervention = NULL, status = NULL) {

  allowed_status <- c("ACTIVE_NOT_RECRUITING", "COMPLETED", "ENROLLING_BY_INVITATION",
                      "NOT_YET_RECRUITING", "RECRUITING", "SUSPENDED", "TERMINATED",
                      "WITHDRAWN", "AVAILABLE", "NO_LONGER_AVAILABLE",
                      "TEMPORARILY_NOT_AVAILABLE", "APPROVED_FOR_MARKETING",
                      "WITHHELD", "UNKNOWN")

  validate_status(status, allowed_status)

  base_url <- "https://clinicaltrials.gov/api/v2/studies"
  query_params <- build_query_params(condition, location, title, intervention, status)

  total_count <- get_total_count(base_url, query_params)
  total_pages <- ceiling(total_count / 1000)

  pb <- create_progress_bar(total_pages)

  all_data <- fetch_all_pages(base_url, query_params, total_pages, pb)

  pb$terminate()

  dplyr::bind_rows(all_data)
}

validate_status <- function(status, allowed_status) {
  if (!is.null(status)) {
    invalid_status <- setdiff(status, allowed_status)
    if (length(invalid_status) > 0) {
      warning(sprintf("Invalid status value(s) provided: %s\nThese values will be ignored. Allowed values are: %s",
                      paste(invalid_status, collapse = ", "),
                      paste(allowed_status, collapse = ", ")))
      status <- intersect(status, allowed_status)
    }
  }
  status
}

build_query_params <- function(condition, location, title, intervention, status) {
  params <- list(
    format = "csv",
    markupFormat = "markdown",
    countTotal = "true",
    pageSize = 1000
  )

  if (!is.null(condition)) params[["query.cond"]] <- condition
  if (!is.null(location)) params[["query.locn"]] <- location
  if (!is.null(title)) params[["query.titles"]] <- title
  if (!is.null(intervention)) params[["query.intr"]] <- intervention
  if (length(status) > 0) params[["filter.overallStatus"]] <- paste(status, collapse = "|")

  params
}

get_total_count <- function(base_url, query_params) {
  response <- httr::GET(url = base_url, query = query_params, httr::add_headers(accept = "application/json"))
  httr::stop_for_status(response)
  as.numeric(httr::headers(response)[["x-total-count"]])
}

create_progress_bar <- function(total_pages) {
  progress::progress_bar$new(
    format = "Fetching Page :current/:total :spin :bar Completed :percent :elapsedfull ",
    total = total_pages,
    clear = FALSE,
    width = 80
  )
}

fetch_all_pages <- function(base_url, query_params, total_pages, pb) {
  all_data <- vector("list", total_pages)
  next_page_token <- NULL

  for (page_number in seq_len(total_pages)) {
    page_info <- fetch_data(base_url, query_params, page_number, next_page_token)
    all_data[[page_number]] <- page_info$data
    next_page_token <- page_info$next_page_token
    pb$tick(tokens = list(eta = format(Sys.time() + (pb$total - pb$current) * 5, "%H:%M:%S")))

    if (is.null(next_page_token)) break
  }

  all_data
}

fetch_data <- function(base_url, query_params, page_number, next_page_token) {
  if (!is.null(next_page_token)) {
    query_params[["pageToken"]] <- next_page_token
  }

  response <- httr::GET(url = base_url, query = query_params, httr::add_headers(accept = "application/json"))
  httr::stop_for_status(response)

  content <- httr::content(response, as = "text", encoding = "UTF-8")

  if (page_number != 1) {
    header_line <- "NCT Number,Study Title,Study URL,Acronym,Study Status,Brief Summary,Study Results,Conditions,Interventions,Primary Outcome Measures,Secondary Outcome Measures,Other Outcome Measures,Sponsor,Collaborators,Sex,Age,Phases,Enrollment,Funder Type,Study Type,Study Design,Other IDs,Start Date,Primary Completion Date,Completion Date,First Posted,Results First Posted,Last Update Posted,Locations,Study Documents"
    content <- paste(header_line, content, sep = "\n")
  }

  parsed_data <- readr::read_csv(content, show_col_types = FALSE)
  next_page_token <- httr::headers(response)[["x-next-page-token"]]

  list(data = parsed_data, next_page_token = next_page_token)
}

Any scripts or data that you put into this service are public.

clintrialx documentation built on April 3, 2025, 6:47 p.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

clintrialx
Connect and Work with Clinical Trials Data Sources

R/ctg_bulk_fetch.R
In clintrialx: Connect and Work with Clinical Trials Data Sources

Defines functions fetch_data fetch_all_pages create_progress_bar get_total_count build_query_params validate_status ctg_bulk_fetch

Documented in ctg_bulk_fetch

Try the clintrialx package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

clintrialx Connect and Work with Clinical Trials Data Sources

R/ctg_bulk_fetch.R In clintrialx: Connect and Work with Clinical Trials Data Sources

Defines functions fetch_data fetch_all_pages create_progress_bar get_total_count build_query_params validate_status ctg_bulk_fetch

Documented in ctg_bulk_fetch

Try the clintrialx package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

clintrialx
Connect and Work with Clinical Trials Data Sources

R/ctg_bulk_fetch.R
In clintrialx: Connect and Work with Clinical Trials Data Sources