Nothing
## ----setup, include=FALSE-----------------------------------------------------
knitr::opts_chunk$set(
eval = FALSE,
warning = FALSE,
message = FALSE,
results = "hide"
)
## ----connect_mongodb----------------------------------------------------------
# db <- nodbi::src_mongo(
# url = "mongodb://localhost",
# db = "my_database_name",
# collection = "my_collection_name"
# )
# db
# # MongoDB 8.0.4 (uptime: 298113s)
# # URL: mongodb://localhost
# # Database: my_database_name
# # Collection: my_collection_name
#
# # empty collection if exists
# nodbi::docdb_delete(db, db$collection)
## ----ctrLoadQueryIntoDb-------------------------------------------------------
# # Load package
# library(ctrdata)
#
# # Model queries
# queries <- ctrGenerateQueries(
# condition = "neuroblastoma",
# population = "P"
# )
#
# # Load trials from all queries into collection
# result <- lapply(queries, ctrLoadQueryIntoDb, euctrresults = TRUE, con = db)
#
# # Show results of loading
# sapply(result, "[[", "n")
# # EUCTR CTGOV2 ISRCTN CTIS
# # 390 613 3 19
## ----dbFindFields-------------------------------------------------------------
# #
# dbFindFields(namepart = "date", sample = FALSE, con = db)
# # Finding fields in database collection (may take some time) . . . . .
# # Field names cached for this session.
# # [...]
# # EUCTR
# # "e231_full_title_date_and_version_of_each_substudy_and_their_related_objectives"
# # [...]
# # CTGOV2
# # "annotationSection.annotationModule.unpostedAnnotation.unpostedEvents.date"
# # CTGOV2
# # "protocolSection.statusModule.studyFirstSubmitQcDate"
# # [...]
# # CTIS
# # "publishDate"
# # CTIS
# # "startDateEU"
# # [...]
# # ISRCTN
# # "trialDesign.overallEndDate"
## ----dbGetFieldsIntoDf--------------------------------------------------------
# # Define vector of fields
# fieldsOfInterest <- c(
# # EUCTR protocol-related information
# "f41_in_the_member_state",
# "f422_in_the_whole_clinical_trial",
# "a1_member_state_concerned",
# #
# # EUCTR results-related information
# "trialInformation.recruitmentStartDate",
# "trialInformation.globalEndOfTrialDate",
# #
# # CTGOV2
# "protocolSection.statusModule.startDateStruct.date",
# "trialInformation.recruitmentStartDate",
# "protocolSection.statusModule.primaryCompletionDateStruct.date"
# )
#
# # Create data frame with records of trials
# # which for at least one field have a value
# result <- dbGetFieldsIntoDf(
# fields = fieldsOfInterest,
# con = db
# )
# # Querying database (7 fields)...
#
# # dim(result)
# # [1] 984 8
## ----attributes---------------------------------------------------------------
# attributes(result)
# # [...]
# #
# # $class
# # [1] "data.frame"
# #
# # $`ctrdata-dbname`
# # [1] "my_database_name"
# #
# # $`ctrdata-table` <-- this attribute will be retired by end 2024
# # [1] "my_collection_name"
# #
# # $`ctrdata-table-note`
# # [1] "^^^ attr ctrdata-table will be removed by end 2024"
# #
# # $`ctrdata-collection`
# # [1] "my_collection_name"
# #
# # $`ctrdata-dbqueryhistory`
# # query-timestamp query-register query-records
# # 1 2025-03-02 15:26:22 EUCTR 390
# # 2 2025-03-02 15:26:25 CTGOV2 613
# # 3 2025-03-02 15:26:26 ISRCTN 3
# # 4 2025-03-02 15:26:28 CTIS 19
# # query-term
# # 1 query=neuroblastoma&age=under-18
# # 2 cond=neuroblastoma&aggFilters=ages:child
# # 3 &filters=condition:neuroblastoma,ageRange:Child&q=
# # 4 searchCriteria={"medicalCondition":"neuroblastoma","ageGroupCode":[2]}
## ----dbFindIdsUniqueTrials----------------------------------------------------
# # Obtain de-duplicated trial record ids
# ids <- dbFindIdsUniqueTrials(
# preferregister = "EUCTR",
# con = db
# )
# # Searching for duplicate trials...
# # - Getting all trial identifiers (may take some time), 1025 found in collection
# # - Finding duplicates among registers' and sponsor ids...
# # - 284 EUCTR _id were not preferred EU Member State record for 109 trials
# # - Keeping 106 / 0 / 423 / 1 / 19 records from EUCTR / CTGOV / CTGOV2 / ISRCTN / CTIS
# # = Returning keys (_id) of 549 records in collection "my_collection_name"
#
# # Eliminate duplicate trials records:
# result <- result[result[["_id"]] %in% ids, ]
#
# nrow(result)
# # [1] 513
# #
# # Note that "ids" are the identifiers of unique trials in the whole collection,
# # whereas the data frame "result" only includes those trials in which any of
# # the fields of interest had a value, thus explaining why "result" has fewer
# # rows than "ids" has identifiers.
## ----unique-------------------------------------------------------------------
# # Obtain data of interest
# result <- dbGetFieldsIntoDf(
# fields = fieldsOfInterest,
# calculate = "f.isUniqueTrial",
# con = db
# )
# # Querying database (30 fields)...
# # - Finding duplicates among registers' and sponsor ids...
# # - 132 EUCTR _id were not preferred EU Member State record for 83 trials
# # - Keeping 613 / 50 / 0 / 1 / 19 records from CTGOV2 / EUCTR / CTGOV / ISRCTN / CTIS
#
# # Eliminate duplicate trials records:
# result <- result[result[[".isUniqueTrial"]], ]
#
# nrow(result)
# # [1] 267
# #
# # Note this has used a different register as priority.
# # Also, the data frame result includes all trials which
# # had a value in at least one of the fields of interest
# # or the fields needed to calculate the trial concept.
#
# # See description of concept .isUniqueTrial:
# help("f.isUniqueTrial")
#
# # See how concept .isUniqueTrial is implemented
# f.isUniqueTrial
## ----single_trial_widget------------------------------------------------------
# # Opens a web browser for user interaction.
# # If the trial is not found in the database,
# # it will be loaded from the register.
# #
# # The search is for both, field names and values
# ctrShowOneTrial("2022-501725-21-00", con = db)
## ----single_trial_widget_jsonview---------------------------------------------
# # Requires additional package for visualisation
# remotes::install_github("hrbrmstr/jsonview")
#
# # Works with DuckDb, SQLite, PostgreSQL, MongoDB
# oneTrial <- nodbi::docdb_query(
# src = db,
# key = db$collection,
# query = '{"_id":"2022-501725-21-00"}',
# limit = 1L
# )
#
# # Interactive widget where nodes can be expanded,
# # note that fields and values cannot be searched
# jsonview::json_tree_view(oneTrial)
## ----str_data_frame-----------------------------------------------------------
# # Get data of interest
# result <- dbGetFieldsIntoDf(
# fields = c("ctrname"),
# calculate = c("f.isUniqueTrial", "f.startDate"),
# con = db
# )
# # Querying database (33 fields)...
# # - Finding duplicates among registers' and sponsor ids...
# # - 132 EUCTR _id were not preferred EU Member State record for 83 trials
# # - Keeping 613 / 50 / 0 / 1 / 19 records from CTGOV2 / EUCTR / CTGOV / ISRCTN / CTIS
# # Calculating .startDate...
#
# str(result)
# # 'data.frame': 1025 obs. of 4 variables:
# # $ _id : chr "2004-004386-15-DE" "2004-004386-15-ES" "2004-004386-15-GB" "2004-004386-15-IT" ...
# # $ ctrname : chr "EUCTR" "EUCTR" "EUCTR" "EUCTR" ...
# # $ .isUniqueTrial: logi FALSE FALSE FALSE FALSE FALSE FALSE ...
# # $ .startDate : Date, format: "2005-07-26" "2005-11-15" "2005-07-26" ...
#
# # Open file for saving
# png("vignettes/nb1.png")
#
# # De-duplicate and visualise start date
# hist(
# result[result$.isUniqueTrial, ".startDate"],
# breaks = "years"
# )
# box()
# dev.off()
## ----dfcalculate--------------------------------------------------------------
# # Introduction and overview of available trial concepts
# help("ctrdata-trial-concepts")
#
# # Show documentation of a specific trial concept
# help("f.isMedIntervTrial")
#
# # List concepts available at this time
# as.character(utils::ls.str(
# getNamespace("ctrdata"),
# all.names = TRUE,
# pattern = "^f[.][a-z]"))
# # [1] "f.controlType" "f.isMedIntervTrial" "f.isUniqueTrial"
# # [4] "f.likelyPlatformTrial" "f.numSites" "f.numTestArmsSubstances"
# # [7] "f.primaryEndpointDescription" "f.primaryEndpointResults" "f.resultsDate"
# # [10] "f.sampleSize" "f.sponsorType" "f.startDate"
# # [13] "f.statusRecruitment" "f.trialObjectives" "f.trialPhase"
# # [16] "f.trialPopulation"
## ----results='asis'-----------------------------------------------------------
# '
# [
# {
# "_id":"NCT01234567",
# "title": "Current title",
# "clinical_results": ...,
# ...,
# "history": [
# {
# "history_version": {
# "version_number": 1,
# "version_date": "2020-21-22 10:11:12"},
# "title": "Original title",
# "clinical_results": ...,
# ...
# },
# {
# "history_version": {
# "version_number": 2,
# "version_date": "2021-22-23 11:13:13"},
# "title": "Later title",
# "clinical_results": ...,
# ...
# }
# ]
# },
# {
# "_id":"2022-502051-56-00",
# "title": "Current title",
# "ctrname": "CTIS",
# "lastUpdated": "2025-04-07"
# "authorizedPartsII": ...,
# ...,
# "history": [
# {
# "history_version": {
# "version_number": 0,
# "version_date": "2025-04-06"},
# "title": "Intermediate title",
# "ctrname": "CTIS",
# "lastUpdated": "2025-04-06"
# "authorizedPartsII": ...,
# ...
# },
# {
# "history_version": {
# "version_number": 0,
# "version_date": "2025-04-05"},
# "title": "Original title",
# "ctrname": "CTIS",
# "lastUpdated": "2025-04-05"
# "authorizedPartsII": ...,
# ...
# }
# ]
# },
# ...
# ]
# '
## ----sample_size_over_time----------------------------------------------------
# # Load previous query (above), specifying that
# # for each trial, 5 versions should be retrieved
# ctrLoadQueryIntoDb(
# queryterm = queries["CTGOV2"],
# con = db,
# ctgov2history = 5L
# )
#
# # Get relevant fields
# result <- dbGetFieldsIntoDf(
# fields = c(
# # only CTGOV2 has structured historic information
# "history.protocolSection.designModule.enrollmentInfo.count",
# "history.history_version.version_date"
# ),
# calculate = "f.statusRecruitment",
# con = db
# )
#
# # Helper packages
# library(dplyr)
# library(tidyr)
# library(ggplot2)
#
# # Mangle and plot
# result %>%
# unnest(cols = starts_with("history.")) %>%
# filter(.statusRecruitment == "completed") %>%
# filter(!is.na(history.protocolSection.designModule.enrollmentInfo.count)) %>%
# filter(history.protocolSection.designModule.enrollmentInfo.count > 0L) %>%
# group_by(`_id`) %>%
# ggplot(
# mapping = aes(
# x = history.history_version.version_date,
# y = history.protocolSection.designModule.enrollmentInfo.count,
# colour = `_id`)
# ) +
# geom_step() +
# geom_point() +
# theme_light() +
# scale_y_log10() +
# guides(colour = "none") +
# labs(
# title = "Sample sizes in trials including patients with a neuroblastoma",
# subtitle = "Source: CTGOV2 records labelled as phase 3 and completed",
# caption = Sys.Date()
# )
#
# ggsave("vignettes/samplesizechanges.png", width = 6, height = 4)
## ----ctrShowOneTrial----------------------------------------------------------
# # Since version 1.20.0, an interactive widget is built into ctrdata
# # and can be used to search in all field names and all values
# ctrShowOneTrial("NCT02139397", con = db)
## ----show_nesting-------------------------------------------------------------
# # Helper package
# remotes::install_github("https://github.com/hrbrmstr/jsonview")
#
# # Get relevant data
# result <- dbGetFieldsIntoDf("resultsSection.outcomeMeasuresModule", con = db)
#
# # Create interactive widget
# jsonview::json_tree_view(result[result[["_id"]] == "NCT02139397", -1])
## ----analyse_nested_data------------------------------------------------------
# #### 1. Create data frame from results fields
# # These are key results fields from
# # CTGOV2, CTGOV and from EUCTR:
# result <- dbGetFieldsIntoDf(
# fields = c(
# # EUCTR - note this requires to set parameter
# # euctrresults = TRUE in ctrLoadQueryIntoDb()
# # as shown above in section "User annotations"
# "trialInformation.populationAgeGroup",
# "subjectDisposition.recruitmentDetails",
# "baselineCharacteristics.baselineReportingGroups.baselineReportingGroup",
# "endPoints.endPoint",
# "subjectAnalysisSets",
# "adverseEvents.seriousAdverseEvents.seriousAdverseEvent",
# # CTGOV2
# "resultsSection.outcomeMeasuresModule",
# "protocolSection.designModule.designInfo.allocation",
# "resultsSection.participantFlowModule",
# # CTGOV
# "clinical_results.baseline.analyzed_list.analyzed.count_list.count",
# "clinical_results.baseline.group_list.group",
# "clinical_results.baseline.analyzed_list.analyzed.units",
# "clinical_results.outcome_list.outcome",
# "study_design_info.allocation"
# ),
# calculate = "f.isUniqueTrial",
# con = db
# )
#
# # Keep only unique trial records
# result <- result[result[[".isUniqueTrial"]], ]
#
#
# #### 2. All nested data are transformed to a long,
# # Name value data frame (which has several hundred
# # rows for each trial, with one row per field):
# #
# long_result <- dfTrials2Long(df = result)
# # Total 89253 rows, 159 unique names of variables
# long_result[c(100, 10000, 80000), ]
# # # A tibble: 3 × 4
# # `_id` identifier name value
# # <chr> <chr> <chr> <chr>
# # 1 2007-000371-42-PL 36 adverseEvents.seriousAdverseEvents.seriousAdverseEvent.dic… false
# # 2 2013-003595-12-CZ 13.3 adverseEvents.seriousAdverseEvents.seriousAdverseEvent.val… 0
# # 3 NCT02124772 25.3 resultsSection.outcomeMeasuresModule.outcomeMeasures.denom… OG002
#
#
# #### 3. and 4. Obtain values for measures of interest
# #
# # The parameters can be regular expressions
# clinicalDuration <- dfName2Value(
# df = long_result,
# # 3. Identify measures of interest
# wherename = paste0(
# "endPoints.endPoint.title|",
# "resultsSection.outcomeMeasuresModule.outcomeMeasures.title"
# ),
# wherevalue = paste0(
# "duration of response|DOR|",
# "free survival|DFS|PFS|EFS"
# ),
# # 4. Obtain result values for measure
# valuename = paste0(
# "resultsSection.*outcomeMeasures.classes.categories.measurements.value|",
# "endPoints.*armReportingGroup.tendencyValues.tendencyValue.value|",
# "resultsSection.outcomeMeasuresModule.outcomeMeasures.unitOfMeasure|",
# "endPoints.endPoint.unit|",
# "resultsSection.outcomeMeasuresModule.outcomeMeasures.groups.title|",
# "endPoints.*armReportingGroup.armId"
# )
# )
# # Returning values for 53 out of 683 trials
#
#
# #### 5. Tabulate the results
#
# # PFS / EFS duration has been reported with various units:
# sort(unique(clinicalDuration[
# grepl("unit", clinicalDuration$name), "value", drop = TRUE]))
# # [1] "3 year EFS" "Days"
# # [3] "Estimated probability" "L/hr"
# # [5] "months" "Months"
# # [7] "number of participants with No VOD/Death" "Participants"
# # [9] "percent of probability" "percent probability"
#
# # Helper packages for convenience
# library(dplyr)
# library(tidyr)
#
# # Mangle data for tabulation
# clinicalDuration %>%
# as_tibble() %>%
# mutate(
# group_id = paste0(`_id`, "_", sub("([0-9]+)[.]?.*", "\\1", identifier)),
# name_short = sub(".*[.](.+)", "\\1", name),
# name_short = if_else(name_short == "unitOfMeasure", "unit", name_short)
# ) %>%
# group_by(group_id) %>%
# mutate(
# is_duration = any(grepl("day|month|week|year", value, ignore.case = TRUE))) %>%
# ungroup() %>%
# filter(is_duration) %>%
# select(name_short, value, where, group_id) %>%
# pivot_wider(id_cols = c(group_id, where), names_from = name_short, values_fn = list) %>%
# unnest(c(value, unit)) %>%
# filter(!grepl("999[9]*", value)) %>%
# rowwise() %>%
# mutate(
# value = as.numeric(value),
# arm_names = paste(armId, title, collapse = " / "),
# ) %>%
# ungroup() %>%
# mutate(
# days = case_when(
# grepl("[wW]eek", unit) ~ value * 7,
# grepl("[mM]onth", unit) ~ value * 30,
# grepl("[yY]ear", unit) ~ value * 30,
# .default = value
# )) %>%
# select(!c(value, unit, armId, title)) -> clinicalDuration
#
# clinicalDuration[sample(seq_len(nrow(clinicalDuration)), 10L), ]
# # # A tibble: 10 × 4
# # group_id where arm_names days
# # <chr> <chr> <chr> <dbl>
# # 1 NCT01962103_22 Phase 2: Progression-Free Survival (PFS) " Phase … 91
# # 2 NCT02162732_4 Progression Free Survival (PFS) Interval Will be Measur… " Neurob… 283
# # 3 2014-004685-25-DE_11 PFS as Determined by the Investigator using RANO criter… "" NA
# # 4 NCT01355679_4 Activity of Treatments Chosen Based on Progression Free… " Guided… 59
# # 5 2014-004697-41-IT_16 Duration of Response (DOR) as Determined by the Investi… "Arm-104… 396
# # 6 NCT02162732_4 Progression Free Survival (PFS) Interval Will be Measur… " Neurob… 163
# # 7 NCT01483820_3 Number of Days Participants Experienced Progression Fre… " TPI 28… 46
# # 8 NCT01505608_5 Progression Free Survival (PFS) of Participants Using D… " Arm A-… 125
# # 9 NCT02139397_3 Determine the Progression Free Survival (PFS) of Partic… " Phase … 51.2
# # 10 NCT04029688_14 Part 1b: PFS in Participants With Neuroblastoma From SE… " Part 1… 54
## ----plot_endpoint_frequencies------------------------------------------------
# # Get relevant data
# result <- dbGetFieldsIntoDf(
# calculate = c(
# "f.isUniqueTrial",
# "f.sampleSize",
# "f.trialPopulation",
# "f.primaryEndpointDescription"
# ),
# con = db
# )
# # Querying database (75 fields)...
# # - Finding duplicates among registers' and sponsor ids...
# # - 132 EUCTR _id were not preferred EU Member State record for 83 trials
# # - Keeping 613 / 50 / 0 / 1 / 19 records from CTGOV2 / EUCTR / CTGOV / ISRCTN / CTIS
# # Calculating .primaryEndpointDescription...
#
# # De-duplicate
# result <- result[result[[".isUniqueTrial"]], ]
#
# # For primary endpoint of interest, use regular expression on text to
# # identify time to the respective event (mostly includes death events)
# regex <- "((progression|event|relapse|recurrence|disease)[- ]free)|pfs|dfs|efs)"
#
# # .primaryEndpointDescription is in each cell a list of one or more
# # items with an endpoint description; grepl works on each such item
# result$pep_is_efs <- grepl(
# pattern = regex,
# x = result$.primaryEndpointDescription,
# ignore.case = TRUE
# )
#
# # Tabulate
# table(result$pep_is_efs)
# # FALSE TRUE
# # 606 76
#
# # Plot
# library(ggplot2)
# ggplot(
# data = result,
# aes(
# x = .sampleSize,
# y = pep_is_efs
# )
# ) +
# geom_boxplot() +
# scale_x_log10()
# # Warning message:
# # Removed 90 rows containing non-finite values (`stat_boxplot()`).
#
# ggsave("vignettes/boxpep.png", width = 6, height = 4)
## ----analyse_results_data-----------------------------------------------------
# # Get result set
# result <- dbGetFieldsIntoDf(
# calculate = c(
# "f.isUniqueTrial",
# "f.sampleSize",
# "f.primaryEndpointResults"
# ),
# con = db
# )
# # Querying database (53 fields)...
#
# # De-duplicate
# result <- result[result[[".isUniqueTrial"]], ]
#
# # Helper package
# library(ggplot2)
#
# # Plot p values ECDF
# ggplot(
# data = result,
# aes(x = .primaryEndpointFirstPvalue)) +
# stat_ecdf(geom = "step") +
# labs(
# title = "Trials with children with a neuroblastoma",
# x = "Range of p values",
# y = "Empirical cumulative density of p values\nfrom primary endpoint primary analysis") +
# geom_vline(
# xintercept = 0.05,
# linetype = 3)
#
# ggsave("vignettes/phase23_paed_p_values.png", width = 6, height = 4)
#
# # Plot sample size vs. p value
# ggplot(
# data = result,
# aes(
# x = .sampleSize,
# y = .primaryEndpointFirstPvalue)) +
# geom_point() +
# ylim(0, 1) +
# xlim(0, 1000) +
# scale_x_log10() +
# geom_hline(yintercept = 0.05, linetype = 3)
#
# ggsave("vignettes/phase23_paed_p_values_participants.png", width = 6, height = 4)
#
#
# # Statistical method used for primary endpoint analysis
# tmp <- table(result$.primaryEndpointFirstPmethod)
# tmp <- tmp[rev(order(tmp))]
# tmp <- data.frame(tmp)
# knitr::kable(tmp)
## ----product_status-----------------------------------------------------------
# # Helper package
# library(dplyr)
#
# # Get results
# result <- dbGetFieldsIntoDf(
# fields = c(
# "a1_member_state_concerned",
# "n_date_of_competent_authority_decision",
# "dimp.d21_imp_to_be_used_in_the_trial_has_a_marketing_authorisation",
# "x6_date_on_which_this_record_was_first_entered_in_the_eudract_database",
# "f422_in_the_whole_clinical_trial",
# "a2_eudract_number"
# ),
# calculate = c(
# "f.isUniqueTrial",
# "f.startDate"
# ),
# con = db
# )
#
# # De-duplicate
# result <- result[result[[".isUniqueTrial"]], ]
#
# # How many of the investigational medicinal product(s)
# # that are being used in the trial are authorised?
# number_authorised <- sapply(
# result[["dimp.d21_imp_to_be_used_in_the_trial_has_a_marketing_authorisation"]],
# function(i) if (all(is.na(i))) NA else sum(i, na.rm = TRUE)
# )
# table(number_authorised, exclude = "")
# # number_authorised
# # 0 1 2 3 4 5 6 7 13 <NA>
# # 19 12 5 2 3 3 1 1 1 636
#
# result[["any_authorised"]] <- number_authorised > 0L
#
# # Helper packages
# library(ggplot2)
# library(scales)
# library(dplyr)
#
# # Plot
# ggplot(
# data = result %>%
# filter(!is.na(any_authorised)),
# aes(
# x = .startDate,
# fill = any_authorised
# )
# ) +
# scale_x_date(
# breaks = breaks_width(width = "2 years"),
# labels = date_format("%Y")
# ) +
# geom_histogram(binwidth = 2 * 365.25) +
# labs(
# title = "Selected clinical trials in EU",
# x = "Year of trial start",
# y = "Number of trials",
# fill = "Medicine\nauthorised?"
# )
#
# ggsave("vignettes/nbtrials.png", width = 6, height = 4)
## ----mongolite----------------------------------------------------------------
# # Load package for database access
# library(mongolite)
#
# # Create R object m to access the
# # collection db created above
# m <- mongo(
# url = paste0(db[["url"]], "/", db[["db"]]),
# collection = db[["collection"]]
# )
#
# # Number of records in collection
# m$count()
# # [1] 1026
#
# # Number of EUCTR records, using JSON for query
# m$count(query = '{"_id": {"$regex": "[0-9]{4}-[0-9]{6}-[0-9]{2}-[3A-Z]{2,3}", "$options": "i"}}')
# # [1] 390
#
# # Alternative
# m$count(query = '{"ctrname": "EUCTR"}')
# # [1] 390
#
# # Number of CTGOV records
# m$count(query = '{"_id": {"$regex": "NCT[0-9]{8}", "$options": "i"}}')
# # [1] 613
#
# # Alternative
# m$count(query = '{"ctrname": "CTGOV2"}')
# # [1] 613
#
# # To best define regular expressions for analyses, inspect the field:
# head(
# m$distinct(
# key = "protocolSection.outcomesModule.primaryOutcomes.measure",
# query = '{"ctrname": "CTGOV2"}'
# )
# )
# # [1] "- To demonstrate that 123I-mIBG planar scintigraphy is sensitive and ...
# # [2] "1-year Progression-free Survival"
# # [3] "18F-mFBG PET Scan identification of Neuroblastoma on the LAFOV PET/CT"
# # [4] "1p36 deletion status and its association with UBE4B"
# # [5] "2-year progression free survival"
# # [6] "2-year progression free survival (PFS)"
## ----mongodb_aggregation_pipeline---------------------------------------------
# #
# # Total count of PFS, EFS, RFS or DFS
# out <- m$aggregate(
# # Count number of documents in collection that
# # matches in primary_outcome.measure the
# # regular expression,
# pipeline =
# '[{"$match": {"protocolSection.outcomesModule.primaryOutcomes.measure":
# {"$regex": "(progression|event|relapse|recurrence|disease)[- ]free",
# "$options": "i"}}},
# {"$group": {"_id": "null", "count": {"$sum": 1}}}]'
# )
# out
# # _id count
# # 1 null 64
#
# # List records of trials with overall survival
# # as primary endpoint, and list start date
# out <- m$aggregate(
# pipeline =
# '[{"$match": {"protocolSection.outcomesModule.primaryOutcomes.measure":
# {"$regex": "overall survival", "$options": "i"}}},
# {"$project": {"_id": 1, "protocolSection.statusModule.startDateStruct.date": 1}}]'
# )
# head(out)
# # _id date
# # 1 NCT00445965 2006-01
# # 2 NCT00801931 2007-09-06
# # 3 NCT05303727 2022-08
# # 4 NCT02176967 2014-08-08
# # 5 NCT02558244 2016-01
# # 6 NCT04897880 2019-01-09
## ----mongodb_mapreduce--------------------------------------------------------
# # Count number of trials by number of study
# # participants in bins of hundreds of participants:
# m$aggregate(pipeline = '
# [
# {
# "$project": {
# "flooredNumber": {
# "$multiply": [
# {
# "$floor": {
# "$divide": [
# {
# "$toInt": "$protocolSection.designModule.enrollmentInfo.count"
# },
# 100
# ]
# }
# },
# 100
# ]
# }
# }
# },
# {
# "$group": {
# "_id": "$flooredNumber",
# "count": {
# "$count": {}
# }
# }
# },
# {
# "$sort": {
# "_id": 1
# }
# }
# ]
# ')
# # _id count
# # 1 NA 463
# # 2 0 419
# # 3 100 67
# # 4 200 18
# # 5 300 12
# # 6 400 8
# # 7 500 9
# # 8 600 6
# # 9 700 1
# # 10 800 2
# # 11 900 1
# # 12 1000 5
# # [...]
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.