R/extract_publications.R

Defines functions extract_publications

Documented in extract_publications

#' Takes a data frame of the type provided by
#' `clinicaltrials_gov_download()` and returns a new data frame
#' containing one row per publication of the publication type
#' specified indexed on ClinicalTrials.gov for every version of the
#' clinical trial record provided.
#'
#' This function does not connect to ClinicalTrials.gov, and only
#' interprets data that has already been downloaded by expanding the
#' nested JSON-encoded data in the `references` column provided by
#' `clinicaltrial_gov_version`.
#'
#' @param df A data frame containing at least the following columns:
#'     `nctid`, `version_number`, `total_versions`, `version_date`,
#'     and `references`. The `references` column should contain a
#'     nested JSON-encoded table with three columns: `pmid`, `type`
#'     and `citation`. This data frame can be generated by the use of
#'     `clinicaltrials_gov_download`.
#'
#' @param types A list of types to be returned or a character string
#'     if only one type specified, e.g. "RESULT" or c("RESULT",
#'     "BACKGROUND"). Allowed types: "RESULT", "BACKGROUND",
#'     "DERIVED".
#'
#' @return A data frame with all the original columns, as well as an
#'     additional three columns: `pmid`, `type` and `citation`. The
#'     new data frame will have one row per publication.
#'
#' @export
#'
#' @importFrom magrittr %>%
#' @importFrom rlang .data
#'
#' @examples
#'
#' \donttest{
#' hv <- clinicaltrials_gov_download("NCT00942747", latest=TRUE)
#' extract_publications(hv)
#' }

extract_publications <- function(
                                 df,
                                 types=c(
                                     "RESULT",
                                     "BACKGROUND",
                                     "DERIVED"
                                 )
                                 ) {
    out <- tryCatch({

        ## Check that types is one of the allowed ones
        if (mean(types %in% c("RESULT", "BACKGROUND", "DERIVED")) != 1) {
            stop("You tried to filter for a citation type that isn't allowed")
        }

        ## Check that the data frame contains the necessary columns
        if (
            mean(
                c(
                    "nctid",
                    "version_number",
                    "total_versions",
                    "version_date",
                    "references"
                ) %in% colnames(df)
            ) != 1
        ) {
            stop("Missing required column(s) in data frame")
        }

        df_processed <- df %>%
            dplyr::filter(.data$references != "[]") %>%
            dplyr::filter(! is.na(.data$references)) %>%
            dplyr::select(
                "nctid",
                "version_number",
                "total_versions",
                "version_date",
                "references"
            ) %>%
            dplyr::mutate(
                rtable = purrr::map(
                                    .data$references,
                                    jsonlite::fromJSON
                                )
            ) %>%
            dplyr::mutate(
                rtable = purrr::map(
                                    .data$rtable,
                                    tibble::as_tibble
                                )
            ) %>%
            dplyr::select(! "references") %>%
            tidyr::unnest("rtable")

        if (nrow(df_processed) == 0) {
            stop("No indexed clinical trial publications found")
        }

        df_processed %>%
            dplyr::filter(
                       .data$type %in% types
                   ) %>%
            return()            
        
    },
    error = function(cond) {
        message("Here's the original error message:")
        message(paste(cond, "\n"))
        ## Choose a return value in case of error
        return("Error")
    },
    warning = function(cond) {
        message("Here's the original warning message:")
        message(paste(cond, "\n"))
        ## Choose a return value in case of warning
        return("Warning")
    },
    finally = {
        ## To execute regardless of success or failure
    })

    return (out)
}

Try the cthist package in your browser

Any scripts or data that you put into this service are public.

cthist documentation built on Sept. 11, 2024, 5:31 p.m.