knitr::opts_chunk$set(echo = TRUE) options(kableExtra.latex.load_packages = FALSE) library(kableExtra) #knitr::knit_hooks$set(document = function(x) {sub('\\usepackage[]{color}', '\\usepackage[table]{xcolor}', x, fixed = TRUE)}) library(dplyr) options(knitr.kable.NA = "") pdf2png <- function(path) { # only do the conversion for non-LaTeX output if (knitr::is_latex_output()) { return(path) } path2 <- xfun::with_ext(path, "png") img <- magick::image_read_pdf(path) magick::image_write(img, path2, format = "png") path2 } latex_table_font_size <- 8 source("PrintCohortDefinitions.R")
abbreviations <- readr::read_delim(col_names = FALSE, delim = ";", trim_ws = TRUE, file = " CDM; Common data model IRB; Institutional review board OHDSI; Observational Health Data Science and Informatics OMOP; Observational Medical Outcomes Partnership PS; Propensity score RCT; Randomized controlled trial ") tab <- kable(abbreviations, col.names = NULL, linesep = "", booktabs = TRUE) if (knitr::is_latex_output()) { tab %>% kable_styling(latex_options = "striped", font_size = latex_table_font_size) } else { tab %>% kable_styling(bootstrap_options = "striped") }
parties <- readr::read_delim(col_names = TRUE, delim = ";", trim_ws = TRUE, file = " Investigator; Institution/Affiliation George Hripcsak; Department of Biomedical Informatics, Columbia University, New York, NY, USA Daniel Prieto-Alhambra *; Centre for Statistics in Medicine, NDORMS, University of Oxford, Oxford, UK Patrick B. Ryan; Observational Health Data Analytics, Janssen Research and Development, Titusville, NJ, USA Martijn J. Schuemie; Observational Health Data Analytics, Janssen Research and Development, Titusville, NJ, USA Marc A. Suchard; Department of Biostatistics, University of California, Los Angeles, Los Angeles, CA, USA ") tab <- kable(parties, booktabs = TRUE, linesep = "") %>% column_spec(1, width = "10em") %>% column_spec(2, width = "30em") %>% footnote(general = "* Principal Investigator", general_title = "") if (knitr::is_latex_output()) { tab %>% kable_styling(latex_options = "striped", font_size = latex_table_font_size) } else { tab %>% kable_styling(bootstrap_options = "striped") }
This study is undertaken within Observational Health Data Sciences and Informatics (OHDSI), an open collaboration. GH receives grant funding from the US National Institutes of Health and the US Food & Drug Administration and contracts from Janssen Research and Development. DPA's research group receive funding from the UK National Institue for Health Research (NIHR), the European Medicines Agency (EMA), UCB Biopharma, Amgen, and Chiesi. PBR and MJS are employees of Janssen Research and Development and shareholders in John & Johnson. MAS receives grant funding from the US National Institutes of Health and the US Food & Drug Administration and contracts from Janssen Research and Development and IQVIA.
Background and Significance: At least 4 COVID19 vaccines have been approved to date for human use in the US and/or Europe, and many more are in pipeline and/or under review by regulatory authorities. All phase 3 randomised controlled trials (RCT) to date have compared these to either a placebo or an 'inactive' (against COVID19) vaccine. It is unlikely that head-to-head RCTs of COVID19 vaccines will be completed any time soon. It is therefore likely that real world data (electronic medical records, health claims) will be a key source of information for comparative effectiveness of COVID19 vaccines.
Study Aims: We aim to evaluate the performance of different study designs and analytical methods for the study of the comparative effectiveness of various (flu?) vaccines. A secondary aim examines comparative design and a user vs non-user design.
A / B indirect: later research, could use SC designs) -->
Study Description:
Design choices International, network cohorts of new user or matched-non-users of alternative vaccines, ie different products against a common virus.
Population: People registered in the contributing data sources in the OHDSI open science network for at least 1 year who then receive a vaccine against influenza in 2010-2018. Participants receiving different vaccines in a same year will be compared.
Comparators: The most commonly observed influenza vaccine in a given year will be chosen as 'reference', and compared to the second most common.
Positive control outcomes: synthetic positive control outcomes will be generated and added to the analysis following the approach proposed by (TODO MARC to add).
Timeframe: Follow up from vaccination date (index) until the earliest of: loss to follow-up, death, outcome, or summer solstice of the following year. 2019, 2018, 2017
Analyses: The following analytical approaches will be compared:
We will get the following once time-varying and competing risks are (demonstrably) working - A variation of the prior event rate ratio, where the HR/IRR of the incidence of events during the first 10 days after T/C will be used to inform of residual confounding.
Cox regression models (stratified by matched sets where applicable) will be fit to estimate Hazard Ratios and 95% Confidence Intervals according to exposure.
amendments <- readr::read_delim(col_names = TRUE, delim = ";", trim_ws = TRUE, file = " Number; Date; Section of study protocol; Amendment or update; Reason None;;;; ") tab <- kable(amendments, booktabs = TRUE, linesep = "") if (knitr::is_latex_output()) { tab %>% kable_styling(latex_options = "striped", font_size = latex_table_font_size) } else { tab %>% kable_styling(bootstrap_options = "striped") }
dates <- readr::read_delim(col_names = TRUE, delim = ";", trim_ws = TRUE, file = " Milestone; Planned / actual date EU PAS Registration; Start of analysis; End of analysis; Results presentation; ") tab <- kable(dates, booktabs = TRUE, linesep = "") if (knitr::is_latex_output()) { tab %>% kable_styling(latex_options = "striped", font_size = latex_table_font_size) } else { tab %>% kable_styling(bootstrap_options = "striped") }
https://academic.oup.com/cid/advance-article/doi/10.1093/cid/ciaa1727/5992287?login=true (Rich writes one every year)
To inform critical decisions facing healthcare researchers and regulatory agencies in designing comparative effectiveness studies of vaccines, we will launch the TODO-NAME initiative. TODO-NAME benchmarks a large collection of study design choices to estimate relative outcome rates of health events between two vaccines using negative and synthetic positive controls. Specifically, this benchmark aims
Characterize the operating characteristics of all designs and choices to guide future studies of COVID-19 vaccine effectiveness and safety
Need something about how useful this will be because of execution across many different data sources
General outline (?):
Design evaluation will center on XXX historical-based vaccine comparisons for specific time periods (start date to end date):
eois <- readr::read_csv(system.file("settings", "ExposuresOfInterest.csv", package = "VaccineEffectivenessEvaluation"), col_types = readr::cols()) colnames(eois) <- SqlRender::camelCaseToTitleCase(colnames(eois)) tab <- eois %>% select(-1, -7) %>% kable(booktabs = TRUE, linesep = "", caption = "Exposures of interest.") %>% kable_styling(bootstrap_options = "striped", latex_options = "striped") if (knitr::is_latex_output()) { tab %>% column_spec(1, width = "30em") %>% column_spec(2, width = "8em") %>% column_spec(3, width = "8em") %>% column_spec(4, width = "8em") %>% column_spec(5, width = "8em") %>% kable_styling(font_size = latex_table_font_size) } else { tab }
The table above needs to be updated; here are some choices TODO DANI:
For some methods the period between historic start and historic end date will be used to estimate the historic incidence rate. The formal cohort definitions of each exposure can be found in Appendix \@ref(exposure-cohort-definitions).
Negative controls are outcomes believed not to be caused by any of the vaccines, and therefore ideally would not be flagged as a signal by a safety surveillance system. Any effect size estimates for negative control ideally should be close to the null.
A single set of negative control outcomes is defined for all four vaccine groups. To identify negative control outcomes that match the severity and prevalence of suspected vaccine adverse effects, a candidate list of negative controls was generated based on similarity of prevalence and percent of diagnoses that were recorded in an inpatient setting (as a proxy for severity). Manual review of this list by clinical experts created the final list of 127 negative control outcomes. The full list of negative control outcomes can be found in Appendix \@ref(negative-controls)
Negative control outcomes are defined as any occurrence of the negative control concept or any of its descendants.
Positive controls are outcomes known to be caused by vaccines, and ideally would be detected as signals by a safety surveillance system as early as possible. For various reasons, real positive controls are problematic.[@Schuemie2018-zi] Instead, here we will rely on synthetic positive controls,[@Schuemie2020-he;@Schuemie2018-hq] created by modifying a negative control through injection of additional, simulated occurrences of the outcome. To preserve (measured) confounding, simulated outcome occurrences are sampled from the probability distribution derived from a predictive model fitted on the data. Target true hazard ratios for the positive control synthesis are 1.5, 2, and 4, so using the 127 negative controls we are able to construct 127 * 3 = 381 positive control outcomes. The hazard for the outcome is simulated to be increased by the target ratio for the period starting 1 day after vaccination until 28 days after vaccinations, with a constant hazard ratio during that time. This increased risk is applied both for the first and second injection of multi-dose vaccines.
We will execute TODO-NAME as an OHDSI network stuy. All data partners within OHDSI are encouraged to participate voluntarily and can do so conveniently, because of the community's shared Observational Medical Outcomes Partnership (OMOP) common data model (CDM) and OHDSI tool-stack. Many OHDSI community data partners have already committed to participate and we will recruit further data partners through OHDSI’s standard recruitment process, which includes protocol publication on OHDSI’s GitHub, an announcement in OHDSI’s research forum, presentation at the weekly OHDSI all-hands-on meeting and direct requests to data holders.
Table \@ref(tab:data-sources) lists the TODO already committed data sources for TODO-NAME; these sources encompass a large variety of practice types and populations. For each data source, we report a brief description and size of the population it represents. All data sources will receive institutional review board approval or exemption for their participation before executing TODO-NAME
data_sources <- readr::read_delim(col_names = TRUE, delim = ";", trim_ws = TRUE, file = " Data source ; Population ; Patients ; History ; Data capture process and short description IBM MarketScan Commercial Claims and Encounters (CCAE) ; Commercially insured, < 65 years ; 142M ; 2000 -- ; Adjudicated health insurance claims (e.g. inpatient, outpatient, and outpatient pharmacy) from large employers and health plans who provide private healthcare coverage to employees, their spouses and dependents. IBM MarketScan Medicare Supplemental Database (MDCR) ; Commercially insured, 65$+$ years ; 10M ; 2000 -- ; Adjudicated health insurance claims of retirees with primary or Medicare supplemental coverage through privately insured fee-for-service, point-of-service or capitated health plans. IBM MarketScan Multi-State Medicaid Database (MDCD) ; Medicaid enrollees, racially diverse ; 26M ; 2006 -- ; Adjudicated health insurance claims for Medicaid enrollees from multiple states and includes hospital discharge diagnoses, outpatient diagnoses and procedures, and outpatient pharmacy claims. Japan Medical Data Center (JMDC) ; Japan, general ; 5.5M ; 2005 -- ; Data from 60 society-managed health insurance plans covering workers aged 18 to 65 and their dependents. Optum Clinformatics Data Mart (Optum) ; Commercially or Medicare insured ; 85M ; 2000 -- ; Inpatient and outpatient healthcare insurance claims. Optum Electronic Health Records (OptumEHR) ; US, general ; 93M ; 2006 -- ; Clinical information, prescriptions, lab results, vital signs, body measurements, diagnoses and procedures derived from clinical notes using natural language processing. ") tab <- kable(data_sources, booktabs = TRUE, linesep = "", caption = "Committed TODO-NAME data sources and the populations they cover.") %>% kable_styling(bootstrap_options = "striped", latex_options = "striped") %>% pack_rows("Administrative claims", 1, 5, latex_align = "c", indent = FALSE) %>% pack_rows("Electronic health records (EHRs)", 6, 6, latex_align = "c", indent = FALSE) if (knitr::is_latex_output()) { tab %>% column_spec(1, width = "10em") %>% column_spec(2, width = "10em") %>% column_spec(5, width = "25em") %>% kable_styling(font_size = latex_table_font_size) } else { tab }
Vaccine safety surveillance methods can be broken down in to four components: construction of a counterfactual (often referred to as the 'expected count'), a time-at-risk, the statistic to estimate, and potentially a decision rule on the estimate to classify signals from non-signals.
The time-at-risk is the time window, relative to the vaccination date, when outcomes will potentially be attributed to the vaccine. We define three time-at-risk windows, all starting on the day after vaccination, and ending 28, 42, and 90 days after vaccination. Time-at-risk windows will be constructed both for the first and second dose. The time-at-risk for one dose will be censored at the time of the next dose.
Effect-size estimates will be computed both with and without empirical calibration. [@Schuemie2014-bv;@Schuemie2018-hq] Empirical calibration will be done using leave-one-out: when calibrating the estimate for a control, the systematic error distribution will be fitted uses all controls except the one being calibrated.
Similar to our previous study, we will compute the following metrics based on the effect size estimates: [@Schuemie2020-wx]
In addition, based on the PMaxSPRT decision rule, we will compute sensitivity and specificity.
To understand the time it takes for a method the identify signals, the study period for each vaccine will be divided into calendar months. For each month the methods will be executed using the data that had accumulated up to the end of that month, and the performance metrics will be reported for each month.
For those vaccines requiring multiple doses (zoster, HPV), metrics will be computed three times:
TODO DANI
Cohort studies allow direct estimation of incidence rates following exposure of interest, and the new-user design can capture early events following treatment exposures while avoiding confounding from previous treatment effects; new use allows for a clear exposure index date.
TODO DANI
Even though many potential confounders will be included in this study, there may be residual bias due to unmeasured or misspecified confounders, such as confounding by indication, differences in physician characteristics that may be associated with drug choice, concomitant use of other drugs started after the index date, and informative censoring at the end of the on-treatment periods. To minimize this risk, we used methods to detect residual bias through our negative and positive controls.
TODO-NAME does not involve human subjects research. The project does, however, use de-identified human data collected during routine healthcare provision. All data partners executing the TODO-NAME studies within their data sources will have received institutional review board (IRB) approval or waiver for participation in accordance to their institutional governance prior to execution (see Table ADD-REF). TODO-NAME executes across a federated and distributed data network, where analysis code is sent to participating data partners and only aggregate summary statistics are returned, with no sharing of patient-level data between organizations.
data_sources <- readr::read_delim(col_names = TRUE, delim = "&", trim_ws = TRUE, file = " Data source & Statement IBM MarketScan Commercial Claims and Encounters (CCAE) & New England Institutional Review Board and was determined to be exempt from broad IRB approval, as this research project did not involve human subject research. IBM MarketScan Medicare Supplemental Database (MDCR) & New England Institutional Review Board and was determined to be exempt from broad IRB approval, as this research project did not involve human subject research. IBM MarketScan Multi-State Medicaid Database (MDCD) & New England Institutional Review Board and was determined to be exempt from broad IRB approval, as this research project did not involve human subject research. IQVIA Open Claims (IOC) & This is a retrospective database study on de-identified data and is deemed not human subject research. Approval is provided for OHDSI network studies. Japan Medical Data Center (JMDC) & New England Institutional Review Board and was determined to be exempt from broad IRB approval, as this research project did not involve human subject research. Optum Clinformatics Data Mart (Optum) & New England Institutional Review Board and was determined to be exempt from broad IRB approval, as this research project did not involve human subject research. Columbia University Irving Medical Center (CIUMC) & Use of the CUIMC data source was approved by the Columbia University Institutional Review Board as an OHDSI network study (IRB\\# AAAO7805). Department of Veterans Affairs (VA) & Use of the VA-OMOP data source was reviewed by the Department of Veterans Affairs Central Institutional Review Board (IRB) and was determined to meet the criteria for exemption under Exemption Category 4(3) and approved the request for Waiver of HIPAA Authorization. Information System for Research in Primary Care (SIDIAP) & Use of the SIDIAP data source was approved by the Clinical Research Ethics Committee of IDIAPJGol (project code: 20/070-PCV) IQVIA Disease Analyzer Germany (DAG) & This is a retrospective database study on de-identified data and is deemed not human subject research. Approval is provided for OHDSI network studies. Optum Electronic Health Records (OptumEHR) & New England Institutional Review Board and was determined to be exempt from broad IRB approval, as this research project did not involve human subject research. Yale New Haven Health System (YNHHS) & Use of the YNHHS EHR data source was approved by the Yale University Institutional Review Board as an OHDSI network study (IRB\\# pending). ") tab <- kable(data_sources, booktabs = TRUE, linesep = "", caption = "IRB approval or waiver statement from partners.") %>% kable_styling(bootstrap_options = "striped", latex_options = "striped") if (knitr::is_latex_output()) { tab %>% column_spec(1, width = "15em") %>% column_spec(2, width = "40em") %>% kable_styling(font_size = latex_table_font_size) } else { tab }
TODO-NAME uses coded data that already exist in electronic databases. In these types of databases, it is not possible to link (i.e., identify a potential causal association between) a particular product and medical event for any specific individual. Thus, the minimum criteria for reporting an adverse event (i.e., identifiable patient, identifiable reporter, a suspect product and event) are not available and adverse events are not reportable as individual adverse event reports. The study results will be assessed for medically important findings.
Open science aims to make scientific research, including its data process and software, and its dissemination, through publication and presentation, accessible to all levels of an inquiring society, amateur or professional [@Woelfle2011-ss] and is a governing principle of TODO-NAME Open science delivers reproducible, transparent and reliable evidence. All aspects of TODO-NAME (except private patient data) will be open and we will actively encourage other interested researchers, clinicians and patients to participate. This differs fundamentally from traditional studies that rarely open their analytic tools or share all result artifacts, and inform the community about hard-to-verify conclusions at completion.
We will publicly register this protocol and announce its availability for feedback from stakeholders, the OHDSI community and within clinical professional societies. This protocol will link to open source code for all steps to generating diagnostics, effect estimates, figures and tables. Such transparency is possible because we will construct our studies on top of the OHDSI toolstack of open source software tools that are community developed and rigorously tested [@Schuemie2020-wx]. We will publicly host TODO-NAME source code at (https://github.com/ohdsi-studies/TODO-NAME), allowing public contribution and review, and free re-use for anyone’s future research.
TODO-NAME embodies a new approach to generating evidence from healthcare data that overcome weaknesses in the current process of answering and publishing (or not) one question at a time. Generating evidence for thousands of research and control questions using a systematic process enables us to not only evaluate that process and the coherence and consistency of the evidence, but also to avoid $p$-hacking and publication bias [@Schuemie2018-zi]. We will store and openly communicate all of these results as they become available using a user-friendly web-based app that serves up all descriptive statistics, study diagnostics and effect estimates for each cohort comparison and outcome. Open access to this app will be through a general public facing TODO-NAME web-page.
We will deliver multiple presentations annually in scientific venues including the annual meetings of the American Diabetes Association, American College of Cardiology, American Heart Association and American Medical Informatics Association. We will also prepare multiple scientific publications for clinical, informatics and statistical journals.
We believe in sharing our findings that will guide clinical care with the general public. TODO-NAME will use social-media (Twitter) to facilitate this. With dedicated support from the OHDSI communications specialist, we will deliver regular press releases at key project stages, distributed via the extensive media networks of UCLA, Columbia and Oxford.
baseCohortJson <- SqlRender::readSql(system.file("cohorts", "H1N1vaccination.json", package = "VaccineEffectivenessEvaluation")) baseCohort <- RJSONIO::fromJSON(baseCohortJson) baseCohortJson <- RJSONIO::toJSON(baseCohort, digits = 50) printCohortDefinitionFromNameAndJson(name = "H1N1 Vaccines", json = baseCohortJson)
baseCohortJson <- SqlRender::readSql(system.file("cohorts", "FluVaccination.json", package = "VaccineEffectivenessEvaluation")) baseCohort <- RJSONIO::fromJSON(baseCohortJson) baseCohortJson <- RJSONIO::toJSON(baseCohort, digits = 50) printCohortDefinitionFromNameAndJson(name = "Seasonal Flu Vaccines", json = baseCohortJson)
baseCohortJson <- SqlRender::readSql(system.file("cohorts", "HpvVaccination.json", package = "VaccineEffectivenessEvaluation")) baseCohort <- RJSONIO::fromJSON(baseCohortJson) baseCohortJson <- RJSONIO::toJSON(baseCohort, digits = 50) printCohortDefinitionFromNameAndJson(name = "HPV Vaccines", json = baseCohortJson)
baseCohortJson <- SqlRender::readSql(system.file("cohorts", "ZosterVaccination.json", package = "VaccineEffectivenessEvaluation")) baseCohort <- RJSONIO::fromJSON(baseCohortJson) baseCohortJson <- RJSONIO::toJSON(baseCohort, digits = 50) printCohortDefinitionFromNameAndJson(name = "Zoster Vaccines", json = baseCohortJson)
ncs <- readr::read_csv(system.file("settings", "NegativeControls.csv", package = "VaccineEffectivenessEvaluation"), col_types = readr::cols()) colnames(ncs) <- SqlRender::camelCaseToTitleCase(colnames(ncs)) tab <- kable(ncs, booktabs = TRUE, linesep = "", caption = "Negative control outcomes.") %>% kable_styling(bootstrap_options = "striped", latex_options = "striped") if (knitr::is_latex_output()) { tab %>% column_spec(1, width = "15em") %>% column_spec(2, width = "40em") %>% kable_styling(font_size = latex_table_font_size) } else { tab }
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.