R/query.R

Defines functions ctgov_get_query_terms ctgov_query_api ctgov_query_endpoint ctgov_query_outcome ctgov_query_references ctgov_query_intervention ctgov_query_design ctgov_query

Documented in ctgov_get_query_terms ctgov_query ctgov_query_api ctgov_query_design ctgov_query_endpoint ctgov_query_intervention ctgov_query_outcome ctgov_query_references

#' Query the ClinicalTrials.gov dataset
#'
#' This function selects a subset of the clinical trials data by using a
#' a variety of different search parameters. These include free text search
#' keywords, range queries for the continuous variables, and exact matches for
#' categorical fields. The function \code{ctgov_get_query_terms} shows the
#' categorical levels for the latter. The function will either take the entire
#' dataset loaded into the package environment or a previously queried input.
#'
#'
#' @param data                    a dataset to search over; set to \code{NULL}
#'                                to use the full dataset that is currently
#'                                loaded
#'
#' @param description_kw          character vector of keywords to search in the
#'                                intervention description field. Set to
#'                                \code{NULL} to avoid searching this field.
#'
#' @param sponsor_kw              character vector of keywords to search in the
#'                                sponsor (the company that submitted the study).
#'                                Set to \code{NULL} to avoid searching this
#'                                field.
#'
#' @param brief_title_kw          character vector of keywords to search in the
#'                                brief title field. Set to
#'                                \code{NULL} to avoid searching this field.
#'
#' @param official_title_kw       character vector of keywords to search in the
#'                                official title field. Set to
#'                                \code{NULL} to avoid searching this field.
#'
#' @param criteria_kw             character vector of keywords to search in the
#'                                criteria field. Set to
#'                                \code{NULL} to avoid searching this field.
#'
#' @param intervention_kw         character vector of keywords to search in the
#'                                intervention names field. Set to
#'                                \code{NULL} to avoid searching this field.
#'
#' @param intervention_desc_kw    character vector of keywords to search in the
#'                                intervention description field. Set to
#'                                \code{NULL} to avoid searching this field.
#'
#' @param conditions_kw           character vector of keywords to search in the
#'                                conditions field. Set to
#'                                \code{NULL} to avoid searching this field.
#'
#' @param population_kw           character vector of keywords to search in the
#'                                population field. Set to
#'                                \code{NULL} to avoid searching this field.
#'
#' @param date_range              string of length two formatted as "YYYY-MM-DD"
#'                                describing the earliest and latest data to
#'                                include in the results. Use a missing value
#'                                for either value search all dates. Set to
#'                                \code{NULL} to avoid searching this field.
#'
#' @param enrollment_range        numeric of length two describing the smallest
#'                                and largest enrollment sizes to
#'                                include in the results. Use a missing value
#'                                for either value to avoid filtering. Set to
#'                                \code{NULL} to avoid searching this field.
#'
#' @param minimum_age_range       numeric of length two describing the smallest
#'                                and largest minmum age (in years) to
#'                                include in the results. Use a missing value
#'                                for either value to avoid filtering. Set to
#'                                \code{NULL} to avoid searching this field.
#'
#' @param maximum_age_range       numeric of length two describing the smallest
#'                                and largest maximum age (in years) to
#'                                include in the results. Use a missing value
#'                                for either value to avoid filtering. Set to
#'                                \code{NULL} to avoid searching this field.
#'
#' @param study_type              character vector of study types to include
#'                                in the output. Set to \code{NULL} to avoid
#'                                searching this field.
#'
#' @param allocation              character vector of allocations to include
#'                                in the output. Set to \code{NULL} to avoid
#'                                searching this field.
#'
#' @param intervention_model      character vector of interventions to include
#'                                in the output. Set to \code{NULL} to avoid
#'                                searching this field.
#'
#' @param observational_model     character vector of observations to include
#'                                in the output. Set to \code{NULL} to avoid
#'                                searching this field.
#'
#' @param primary_purpose         character vector of primary purposes to
#'                                include in the output. Set to \code{NULL} to
#'                                avoid searching this field.
#'
#' @param time_perspective        character vector of time perspectives to
#'                                include in the output. Set to \code{NULL} to
#'                                avoid searching this field.
#'
#' @param masking_description     character vector of maskings to include
#'                                in the output. Set to \code{NULL} to avoid
#'                                searching this field.
#'
#' @param sampling_method         character vector of sampling methods to
#'                                include in the output. Set to \code{NULL} to
#'                                avoid searching this field.
#'
#' @param phase                   character vector of phases to include
#'                                in the output. Set to \code{NULL} to avoid
#'                                searching this field.
#'
#' @param gender                  character vector of genders to include
#'                                in the output. Set to \code{NULL} to avoid
#'                                searching this field.
#'
#' @param sponsor_type            character vector of sponsor types to include
#'                                in the output. Set to \code{NULL} to avoid
#'                                searching this field.
#'
#' @param ignore_case             logical. Should the search ignore
#'                                capitalization. The default is \code{TRUE}.
#'
#' @param match_all               logical. Should the results required matching
#'                                all the keywords? The default is \code{FALSE}.
#'
#' @param max_rows                integer. The maximum number of rows to return;
#'                                The default \code{NULL} returns all rows.
#'
#'
#' @author Taylor B. Arnold, \email{taylor.arnold@@acm.org}
#' @return a tibble object queried from the loaded database
#'
#' @importFrom tibble as_tibble
#' @importFrom DBI dbReadTable
#' @importFrom rlang .data .env
#' @importFrom dplyr filter collect tbl
#' @importFrom utils head
#' @export
ctgov_query <- function(
  data = NULL,
  description_kw = NULL,
  sponsor_kw = NULL,
  brief_title_kw = NULL,
  official_title_kw = NULL,
  criteria_kw = NULL,
  intervention_kw = NULL,
  intervention_desc_kw = NULL,
  conditions_kw = NULL,
  population_kw = NULL,
  date_range = NULL,
  enrollment_range = NULL,
  minimum_age_range = NULL,
  maximum_age_range = NULL,
  study_type = NULL,
  allocation = NULL,
  intervention_model = NULL,
  observational_model = NULL,
  primary_purpose =  NULL,
  time_perspective = NULL,
  masking_description = NULL,
  sampling_method = NULL,
  phase = NULL,
  gender = NULL,
  sponsor_type = NULL,
  ignore_case = TRUE,
  match_all = FALSE,
  max_rows = NULL
) {

  ############################################################################
  # check query input types
  assert(is.null(description_kw) | is.character(description_kw))
  assert(is.null(sponsor_kw) | is.character(sponsor_kw))
  assert(is.null(brief_title_kw) | is.character(brief_title_kw))
  assert(is.null(official_title_kw) | is.character(official_title_kw))
  assert(is.null(intervention_kw) | is.character(intervention_kw))
  assert(is.null(intervention_desc_kw) | is.character(intervention_desc_kw))
  assert(is.null(conditions_kw) | is.character(conditions_kw))
  assert(is.null(population_kw) | is.character(population_kw))

  assert(is.null(date_range) |
         (is.character(date_range) & length(date_range) == 2L))
  assert(is.null(enrollment_range) |
          (is.numeric(enrollment_range) & length(enrollment_range) == 2L))
  assert(is.null(minimum_age_range) |
         (is.numeric(minimum_age_range) & length(minimum_age_range) == 2L))
  assert(is.null(maximum_age_range) |
         (is.numeric(maximum_age_range) & length(maximum_age_range) == 2L))

  assert(is.null(study_type) | is.character(study_type))
  assert(is.null(allocation) | is.character(allocation))
  assert(is.null(intervention_model) | is.character(intervention_model))
  assert(is.null(observational_model) | is.character(observational_model))
  assert(is.null(primary_purpose) | is.character(primary_purpose))
  assert(is.null(time_perspective) | is.character(time_perspective))
  assert(is.null(masking_description) | is.character(masking_description))
  assert(is.null(phase) | is.character(phase))
  assert(is.null(gender) | is.character(gender))
  assert(is.null(sponsor_type) | is.character(sponsor_type))

  ############################################################################
  # check that search options match choices; do this first to makes sure any
  # errors are caught fast
  if (!is.null(study_type)) { match.arg(study_type, .volatiles$ol$study_type, TRUE) }
  if (!is.null(allocation)) { match.arg(allocation, .volatiles$ol$allocation, TRUE) }
  if (!is.null(intervention_model))
  {
    match.arg(intervention_model, .volatiles$ol$intervention_model, TRUE)
  }
  if (!is.null(observational_model)) {
    match.arg(observational_model, .volatiles$ol$observational_model, TRUE)
  }
  if (!is.null(primary_purpose)) {
    match.arg(primary_purpose, .volatiles$ol$primary_purpose, TRUE)
  }
  if (!is.null(time_perspective))
  {
    match.arg(time_perspective, .volatiles$ol$time_perspective, TRUE)
  }
  if (!is.null(masking_description))
  {
    match.arg(masking_description, .volatiles$ol$masking_description, TRUE)
  }
  if (!is.null(sampling_method))
  {
    match.arg(sampling_method, .volatiles$ol$sampling_method, TRUE)
  }
  if (!is.null(phase)) { match.arg(phase, .volatiles$ol$phase, TRUE) }
  if (!is.null(gender)) { match.arg(phase, .volatiles$ol$gender, TRUE) }
  if (!is.null(sponsor_type)) { match.arg(sponsor_type, .volatiles$ol$sponsor_type, TRUE) }

  ############################################################################
  # convert the date range to a date object; again, do this first to make sure
  # errors are caught fast
  if (!is.null(date_range)) { date_range <- lubridate::ymd(date_range) }

  ############################################################################
  # if no data was given, grab the current version of the data
  if (is.null(data))
  {
    assert_data_loaded()
    z <- dplyr::tbl(.volatiles$con, "join")
  } else {
    duckdb::duckdb_register(.volatiles$memory, "join", data, overwrite = TRUE)
    z <- dplyr::tbl(.volatiles$memory, "join")
  }

  ############################################################################
  # apply each of the categorical filters; these are fast so do them first
  if (!is.null(study_type))
  {
    z <- filter(z, .data$study_type %in% .env$study_type)
  }
  if (!is.null(allocation))
  {
    z <- filter(z, .data$allocation %in% .env$allocation)
  }
  if (!is.null(intervention_model))
  {
    z <- filter(z, .data$intervention_model %in% .env$intervention_model)
  }
  if (!is.null(observational_model))
  {
    z <- filter(z, .data$observational_model %in% .env$observational_model)
  }
  if (!is.null(primary_purpose))
  {
    z <- filter(z, .data$primary_purpose %in% .env$primary_purpose)
  }
  if (!is.null(time_perspective))
  {
    z <- filter(z, .data$time_perspective %in% .env$time_perspective)
  }
  if (!is.null(masking_description))
  {
    z <- filter(z, .data$masking_description %in% .env$masking_description)
  }
  if (!is.null(sampling_method))
  {
    z <- filter(z, .data$sampling_method %in% .env$sampling_method)
  }
  if (!is.null(phase))
  {
    z <- filter(z, .data$phase %in% .env$phase)
  }
  if (!is.null(gender))
  {
    z <- filter(z, .data$gender %in% .env$gender)
  }
  if (!is.null(sponsor_type))
  {
    z <- filter(z, .data$sponsor_type %in% .env$sponsor_type)
  }

  ############################################################################
  # apply each of continuous range filters; these are also fast
  if (!is.null(date_range))
  {
    if (!is.na(date_range[1]))
    {
      val <- date_range[1]
      z <- filter(z, .data$start_date >= .env$val)
    }
    if (!is.na(date_range[2]))
    {
      val <- date_range[2]
      z <- filter(z, .data$start_date <= .env$val)
    }
  }
  if (!is.null(enrollment_range))
  {
    if (!is.na(enrollment_range[1]))
    {
      val <- enrollment_range[1]
      z <- filter(z, .data$enrollment >= .env$val)
    }
    if (!is.na(enrollment_range[2]))
    {
      val <- enrollment_range[2]
      z <- filter(z, .data$enrollment <= .env$val)
    }
  }
  if (!is.null(minimum_age_range))
  {
    if (!is.na(minimum_age_range[1]))
    {
      val <- minimum_age_range[1]
      z <- filter(z, .data$minimum_age >= .env$val)
    }
    if (!is.na(minimum_age_range[2]))
    {
      val <- minimum_age_range[2]
      z <- filter(z, .data$minimum_age <= .env$val)
    }
  }
  if (!is.null(maximum_age_range))
  {
    if (!is.na(maximum_age_range[1]))
    {
      val <- maximum_age_range[1]
      z <- filter(z, .data$maximum_age >= .env$val)
    }
    if (!is.na(maximum_age_range[2]))
    {
      val <- maximum_age_range[2]
      z <- filter(z, .data$maximum_age <= .env$val)
    }
  }

  ############################################################################
  # finally, do the keyword searches
  if (!is.null(description_kw))
  {
    z <- query_kwds(z, description_kw, "description", ignore_case, match_all)
  }
  if (!is.null(sponsor_kw))
  {
    z <- query_kwds(z, sponsor_kw, "sponsor", ignore_case, match_all)
  }
  if (!is.null(brief_title_kw))
  {
    z <- query_kwds(z, brief_title_kw, "brief_title", ignore_case, match_all)
  }
  if (!is.null(official_title_kw))
  {
    z <- query_kwds(z, official_title_kw, "official_title", ignore_case, match_all)
  }
  if (!is.null(criteria_kw))
  {
    z <- query_kwds(z, criteria_kw, "criteria", ignore_case, match_all)
  }
  if (!is.null(intervention_desc_kw))
  {
    z <- query_kwds(
      z,
      intervention_desc_kw,
      "intervention_model_description",
      ignore_case,
      match_all
    )
  }
  if (!is.null(conditions_kw))
  {
    z <- query_kwds(z, conditions_kw, "conditions", ignore_case, match_all)
  }
  if (!is.null(population_kw))
  {
    z <- query_kwds(z, population_kw, "population", ignore_case, match_all)
  }

  ############################################################################
  # limit number of results, if needed
  if (!is.null(max_rows))
  {
    z <- utils::head(z, max_rows)
  }

  # return the results
  return(dplyr::collect(z))
}


#' Query design data from the ClinicalTrials.gov dataset
#'
#' This function selects a subset of the clinical trials data by using a
#' a variety of different search parameters. These include free text search
#' keywords, range queries for the continuous variables, and exact matches for
#' categorical fields. The function \code{ctgov_get_query_terms} shows the
#' categorical levels for the latter. The function will either take the entire
#' dataset loaded into the package environment or a previously queried input.
#'
#'
#' @param data                    a dataset to search over; set to \code{NULL}
#'                                to use the full dataset that is currently
#'                                loaded
#'
#' @param design_kw               character vector of keywords to search in the
#'                                outcome measures field. Set to
#'                                \code{NULL} to avoid searching this field.
#'
#' @param design_desc_kw          character vector of keywords to search in the
#'                                outcome description field. Set to
#'                                \code{NULL} to avoid searching this field.
#'
#' @param ignore_case             logical. Should the search ignore
#'                                capitalization. The default is \code{TRUE}.
#'
#' @param match_all               logical. Should the results required matching
#'                                all the keywords? The default is \code{FALSE}.
#'
#' @param max_rows                integer. The maximum number of rows to return;
#'                                The default \code{NULL} returns all rows.
#'
#'
#' @author Taylor B. Arnold, \email{taylor.arnold@@acm.org}
#' @return a tibble object queried from the loaded database
#'
#' @importFrom tibble as_tibble
#' @importFrom DBI dbReadTable
#' @importFrom dplyr collect tbl
#' @importFrom utils head
#' @export
ctgov_query_design <- function(
  data = NULL,
  design_kw = NULL,
  design_desc_kw = NULL,
  ignore_case = TRUE,
  match_all = FALSE,
  max_rows = NULL
) {
  ############################################################################
  # check query input types
  assert(is.null(design_kw) | is.character(design_kw))
  assert(is.null(design_desc_kw) | is.character(design_desc_kw))

  ############################################################################
  # if no data was given, grab the current version of the data
  if (is.null(data))
  {
    assert_data_loaded()
    z <- dplyr::tbl(.volatiles$con, "design")
  } else {
    duckdb::duckdb_register(.volatiles$memory, "design", data, overwrite = TRUE)
    z <- dplyr::tbl(.volatiles$memory, "design")
  }

  ############################################################################
  # do the keyword searches
  if (!is.null(design_kw))
  {
    z <- query_kwds(z, design_kw, "measure", ignore_case, match_all)
  }
  if (!is.null(design_desc_kw))
  {
    z <- query_kwds(z, design_desc_kw, "description", ignore_case, match_all)
  }

  ############################################################################
  # limit number of results, if needed
  if (!is.null(max_rows))
  {
    z <- utils::head(z, max_rows)
  }

  # return the results
  return(dplyr::collect(z))
}


#' Query intervention data from the ClinicalTrials.gov dataset
#'
#' This function selects a subset of the clinical trials data by using a
#' a variety of different search parameters. These include free text search
#' keywords, range queries for the continuous variables, and exact matches for
#' categorical fields. The function \code{ctgov_get_query_terms} shows the
#' categorical levels for the latter. The function will either take the entire
#' dataset loaded into the package environment or a previously queried input.
#'
#'
#' @param data                    a dataset to search over; set to \code{NULL}
#'                                to use the full dataset that is currently
#'                                loaded
#'
#' @param intervention_kw         character vector of keywords to search in the
#'                                intervention names field. Set to
#'                                \code{NULL} to avoid searching this field.
#'
#' @param ignore_case             logical. Should the search ignore
#'                                capitalization. The default is \code{TRUE}.
#'
#' @param match_all               logical. Should the results required matching
#'                                all the keywords? The default is \code{FALSE}.
#'
#' @param max_rows                integer. The maximum number of rows to return;
#'                                The default \code{NULL} returns all rows.
#'
#'
#' @author Taylor B. Arnold, \email{taylor.arnold@@acm.org}
#' @return a tibble object queried from the loaded database
#'
#' @importFrom tibble as_tibble
#' @importFrom DBI dbReadTable
#' @importFrom dplyr collect tbl
#' @importFrom utils head
#' @export
ctgov_query_intervention <- function(
  data = NULL,
  intervention_kw = NULL,
  ignore_case = TRUE,
  match_all = FALSE,
  max_rows = NULL
) {
  ############################################################################
  # check query input types
  assert(is.null(intervention_kw) | is.character(intervention_kw))

  ############################################################################
  # if no data was given, grab the current version of the data
  if (is.null(data))
  {
    assert_data_loaded()
    z <- dplyr::tbl(.volatiles$con, "inter")
  } else {
    duckdb::duckdb_register(.volatiles$memory, "inter", data, overwrite = TRUE)
    z <- dplyr::tbl(.volatiles$memory, "inter")
  }

  ############################################################################
  # do the keyword searches
  if (!is.null(intervention_kw))
  {
    z <- query_kwds(z, intervention_kw, "name", ignore_case, match_all)
  }

  ############################################################################
  # limit number of results, if needed
  if (!is.null(max_rows))
  {
    z <- utils::head(z, max_rows)
  }

  # return the results
  return(dplyr::collect(z))
}


#' Query references table from the ClinicalTrials.gov dataset
#'
#' This function selects a subset of the clinical trials data by using a
#' a variety of different search parameters. These include free text search
#' keywords, range queries for the continuous variables, and exact matches for
#' categorical fields. The function \code{ctgov_get_query_terms} shows the
#' categorical levels for the latter. The function will either take the entire
#' dataset loaded into the package environment or a previously queried input.
#'
#'
#' @param data                    a dataset to search over; set to \code{NULL}
#'                                to use the full dataset that is currently
#'                                loaded
#'
#' @param ignore_case             logical. Should the search ignore
#'                                capitalization. The default is \code{TRUE}.
#'
#' @param match_all               logical. Should the results required matching
#'                                all the keywords? The default is \code{FALSE}.
#'
#' @param max_rows                integer. The maximum number of rows to return;
#'                                The default \code{NULL} returns all rows.
#'
#'
#' @author Taylor B. Arnold, \email{taylor.arnold@@acm.org}
#' @return a tibble object queried from the loaded database
#'
#' @importFrom tibble as_tibble
#' @importFrom DBI dbReadTable
#' @importFrom dplyr collect tbl
#' @importFrom utils head
#' @export
ctgov_query_references <- function(
  data = NULL,
  ignore_case = TRUE,
  match_all = FALSE,
  max_rows = NULL
) {
  ############################################################################
  # check query input types

  ############################################################################
  # if no data was given, grab the current version of the data
  if (is.null(data))
  {
    assert_data_loaded()
    z <- dplyr::tbl(.volatiles$con, "refs")
  } else {
    duckdb::duckdb_register(.volatiles$memory, "refs", data, overwrite = TRUE)
    z <- dplyr::tbl(.volatiles$memory, "refs")
  }

  ############################################################################
  # do the keyword searches

  ############################################################################
  # limit number of results, if needed
  if (!is.null(max_rows))
  {
    z <- utils::head(z, max_rows)
  }

  # return the results
  return(dplyr::collect(z))
}


#' Query outcome table from the ClinicalTrials.gov dataset
#'
#' This function selects a subset of the clinical trials data by using a
#' a variety of different search parameters. These include free text search
#' keywords, range queries for the continuous variables, and exact matches for
#' categorical fields. The function \code{ctgov_get_query_terms} shows the
#' categorical levels for the latter. The function will either take the entire
#' dataset loaded into the package environment or a previously queried input.
#'
#'
#' @param data                    a dataset to search over; set to \code{NULL}
#'                                to use the full dataset that is currently
#'                                loaded
#'
#' @param ignore_case             logical. Should the search ignore
#'                                capitalization. The default is \code{TRUE}.
#'
#' @param match_all               logical. Should the results required matching
#'                                all the keywords? The default is \code{FALSE}.
#' @param max_rows                integer. The maximum number of rows to return;
#'                                The default \code{NULL} returns all rows.
#'
#' @author Taylor B. Arnold, \email{taylor.arnold@@acm.org}
#' @return a tibble object queried from the loaded database
#'
#' @importFrom tibble as_tibble
#' @importFrom DBI dbReadTable
#' @importFrom dplyr collect tbl
#' @importFrom utils head
#' @export
ctgov_query_outcome <- function(
  data = NULL,
  ignore_case = TRUE,
  match_all = FALSE,
  max_rows = NULL
) {
  ############################################################################
  # check query input types

  ############################################################################
  # if no data was given, grab the current version of the data
  if (is.null(data))
  {
    assert_data_loaded()
    z <- dplyr::tbl(.volatiles$con, "outcome")
  } else {
    duckdb::duckdb_register(.volatiles$memory, "outcome", data, overwrite = TRUE)
    z <- dplyr::tbl(.volatiles$memory, "outcome")
  }

  ############################################################################
  # do the keyword searches

  ############################################################################
  # limit number of results, if needed
  if (!is.null(max_rows))
  {
    z <- utils::head(z, max_rows)
  }

  # return the results
  return(dplyr::collect(z))
}


#' Query endpoint table from the ClinicalTrials.gov dataset
#'
#' This function selects a subset of the clinical trials data by using a
#' a variety of different search parameters. These include free text search
#' keywords, range queries for the continuous variables, and exact matches for
#' categorical fields. The function \code{ctgov_get_query_terms} shows the
#' categorical levels for the latter. The function will either take the entire
#' dataset loaded into the package environment or a previously queried input.
#'
#'
#' @param data                    a dataset to search over; set to \code{NULL}
#'                                to use the full dataset that is currently
#'                                loaded
#'
#' @param ignore_case             logical. Should the search ignore
#'                                capitalization. The default is \code{TRUE}.
#'
#' @param match_all               logical. Should the results required matching
#'                                all the keywords? The default is \code{FALSE}.
#'
#' @param max_rows                integer. The maximum number of rows to return;
#'                                The default \code{NULL} returns all rows.
#'
#'
#' @author Taylor B. Arnold, \email{taylor.arnold@@acm.org}
#' @return a tibble object queried from the loaded database
#'
#' @importFrom tibble as_tibble
#' @importFrom dplyr collect tbl
#' @importFrom DBI dbReadTable
#' @importFrom utils head
#' @export
ctgov_query_endpoint <- function(
  data = NULL,
  ignore_case = TRUE,
  match_all = FALSE,
  max_rows = NULL
) {
  ############################################################################
  # check query input types

  ############################################################################
  # if no data was given, grab the current version of the data
  if (is.null(data))
  {
    assert_data_loaded()
    z <- dplyr::tbl(.volatiles$con, "epoint")
  } else {
    duckdb::duckdb_register(.volatiles$memory, "epoint", data, overwrite = TRUE)
    z <- dplyr::tbl(.volatiles$memory, "epoint")
  }

  ############################################################################
  # do the keyword searches

  ############################################################################
  # limit number of results, if needed
  if (!is.null(max_rows))
  {
    z <- utils::head(z, max_rows)
  }

  # return the results
  return(dplyr::collect(z))
}


#' Wrapper function for the API to return all tables
#'
#' Passes arguments to the \code{ctgov_query} function, but returns the result
#' as a list that joins all of the matching data from the other tables. Note
#' that a few options have different defaults for the API.
#'
#' @param max_rows                integer. The maximum number of rows to return;
#'                                Set to \code{NULL} to return all rows.
#'
#' @param ...                     Options passed to \code{ctgov_query}.
#'
#' @return a named list of tables
#'
#' @importFrom dplyr collect semi_join tbl
#' @export
ctgov_query_api <- function(max_rows = 100L, ...)
{
  studies <- ctgov_query(max_rows = max_rows, ...)

  obj <- list(
    studies = studies,
    design = dplyr::collect(dplyr::semi_join(
      dplyr::tbl(.volatiles$con, "design"), studies, by = "nct_id", copy = TRUE
    )),
    interventions = dplyr::collect(dplyr::semi_join(
      dplyr::tbl(.volatiles$con, "inter"), studies, by = "nct_id", copy = TRUE
    )),
    references = dplyr::collect(dplyr::semi_join(
      dplyr::tbl(.volatiles$con, "refs"), studies, by = "nct_id", copy = TRUE
    )),
    outcomes = dplyr::collect(dplyr::semi_join(
      dplyr::tbl(.volatiles$con, "outcome"), studies, by = "nct_id", copy = TRUE
    )),
    end_points = dplyr::collect(dplyr::semi_join(
      dplyr::tbl(.volatiles$con, "epoint"), studies, by = "nct_id", copy = TRUE
    ))
  )

  return(obj)
}


#' Query terms for the ClinicalTrials.gov dataset
#'
#' Returns a list showing the available category levels for querying the data
#' with the \code{ctgov_query} function.
#'
#' @return a named list of allowed categorical values for the query
#'
#' @export
ctgov_get_query_terms <- function()
{
  return(.volatiles$ol)
}
presagia-analytics/ctrialsgov documentation built on March 25, 2024, 2:10 p.m.