knitr::opts_chunk$set( collapse = TRUE, comment = "#>", fig.width = 7, fig.height = 5 )
The Decision Panel Optimization module in the meddecide
package provides a comprehensive framework for optimizing diagnostic test combinations in medical decision-making. This vignette introduces the basic concepts and demonstrates core functionality.
When multiple diagnostic tests are available, they can be combined in different ways:
The module can optimize test panels based on various criteria:
# Install meddecide package install.packages("meddecide") # Or install from GitHub devtools::install_github("ClinicoPath/meddecide")
# Load required packages library(meddecide) library(dplyr) library(ggplot2) library(rpart) library(rpart.plot) library(knitr) library(forcats)
# Test Datasets for Decision Panel Optimization Module # Set seed for reproducibility set.seed(42) # Create data directory if it doesn't exist if (!dir.exists("data")) { dir.create("data") } # ============================================================================ # DATASET 1: COVID-19 SCREENING # ============================================================================ create_covid_data <- function(n = 1000, prevalence = 0.15) { # True disease status disease <- rbinom(n, 1, prevalence) # Rapid Antigen Test (RAT) # Sensitivity: 65%, Specificity: 98% rat_prob <- ifelse(disease == 1, 0.65, 0.02) rat_result <- rbinom(n, 1, rat_prob) # PCR Test # Sensitivity: 95%, Specificity: 99% pcr_prob <- ifelse(disease == 1, 0.95, 0.01) pcr_result <- rbinom(n, 1, pcr_prob) # Chest CT # Sensitivity: 90%, Specificity: 85% ct_prob <- ifelse(disease == 1, 0.90, 0.15) ct_result <- rbinom(n, 1, ct_prob) # Clinical symptoms score (0-10) # Higher in disease symptoms <- ifelse(disease == 1, pmin(10, round(rnorm(sum(disease == 1), 7, 2))), pmax(0, round(rnorm(sum(disease == 0), 3, 2)))) # Create dataset covid_data <- data.frame( patient_id = 1:n, rapid_antigen = factor(rat_result, levels = c(0, 1), labels = c("Negative", "Positive")), pcr = factor(pcr_result, levels = c(0, 1), labels = c("Negative", "Positive")), chest_ct = factor(ct_result, levels = c(0, 1), labels = c("Normal", "Abnormal")), symptom_score = symptoms, covid_status = factor(disease, levels = c(0, 1), labels = c("Negative", "Positive")), age = round(rnorm(n, 45, 15)), risk_group = factor(sample(c("Low", "Medium", "High"), n, replace = TRUE, prob = c(0.6, 0.3, 0.1))) ) # Add some missing values realistically # PCR might be missing if rapid test is negative missing_pcr <- which(covid_data$rapid_antigen == "Negative" & runif(n) < 0.3) covid_data$pcr[missing_pcr] <- NA return(covid_data) } # Generate COVID dataset covid_screening_data <- create_covid_data(n = 1000, prevalence = 0.15) # ============================================================================ # DATASET 2: BREAST CANCER SCREENING # ============================================================================ create_breast_cancer_data <- function(n = 2000, prevalence = 0.005) { # True disease status (low prevalence for screening) disease <- rbinom(n, 1, prevalence) # Clinical Breast Exam (CBE) # Sensitivity: 54%, Specificity: 94% cbe_prob <- ifelse(disease == 1, 0.54, 0.06) cbe_result <- rbinom(n, 1, cbe_prob) # Mammography # Sensitivity: 85%, Specificity: 95% mammo_prob <- ifelse(disease == 1, 0.85, 0.05) mammo_result <- rbinom(n, 1, mammo_prob) # Ultrasound # Sensitivity: 80%, Specificity: 90% us_prob <- ifelse(disease == 1, 0.80, 0.10) us_result <- rbinom(n, 1, us_prob) # MRI (for high-risk patients) # Sensitivity: 95%, Specificity: 85% mri_prob <- ifelse(disease == 1, 0.95, 0.15) mri_result <- rbinom(n, 1, mri_prob) # Create risk factors age <- round(rnorm(n, 55, 10)) age[age < 40] <- 40 age[age > 75] <- 75 family_history <- rbinom(n, 1, 0.15) brca_status <- rbinom(n, 1, 0.02) # Create dataset breast_cancer_data <- data.frame( patient_id = 1:n, clinical_exam = factor(cbe_result, levels = c(0, 1), labels = c("Normal", "Abnormal")), mammography = factor(mammo_result, levels = c(0, 1), labels = c("BIRADS 1-2", "BIRADS 3-5")), ultrasound = factor(us_result, levels = c(0, 1), labels = c("Normal", "Suspicious")), mri = factor(mri_result, levels = c(0, 1), labels = c("Normal", "Suspicious")), cancer_status = factor(disease, levels = c(0, 1), labels = c("No Cancer", "Cancer")), age = age, family_history = factor(family_history, levels = c(0, 1), labels = c("No", "Yes")), brca_mutation = factor(brca_status, levels = c(0, 1), labels = c("Negative", "Positive")), breast_density = factor(sample(c("A", "B", "C", "D"), n, replace = TRUE, prob = c(0.1, 0.4, 0.4, 0.1))) ) # MRI typically only done for high-risk low_risk_idx <- which(breast_cancer_data$family_history == "No" & breast_cancer_data$brca_mutation == "Negative") breast_cancer_data$mri[sample(low_risk_idx, length(low_risk_idx) * 0.9)] <- NA return(breast_cancer_data) } # Generate breast cancer dataset breast_cancer_data <- create_breast_cancer_data(n = 2000, prevalence = 0.005) # ============================================================================ # DATASET 3: TUBERCULOSIS DIAGNOSIS # ============================================================================ create_tb_data <- function(n = 1500, prevalence = 0.20) { # True disease status (high prevalence in TB clinic) disease <- rbinom(n, 1, prevalence) # Symptom screening (cough > 2 weeks, fever, weight loss, night sweats) # Sensitivity: 80%, Specificity: 60% symptom_prob <- ifelse(disease == 1, 0.80, 0.40) symptom_result <- rbinom(n, 1, symptom_prob) # Sputum smear microscopy # Sensitivity: 60%, Specificity: 98% smear_prob <- ifelse(disease == 1, 0.60, 0.02) smear_result <- rbinom(n, 1, smear_prob) # GeneXpert MTB/RIF # Sensitivity: 88%, Specificity: 98% genexpert_prob <- ifelse(disease == 1, 0.88, 0.02) genexpert_result <- rbinom(n, 1, genexpert_prob) # Culture (gold standard-ish) # Sensitivity: 95%, Specificity: 100% culture_prob <- ifelse(disease == 1, 0.95, 0.00) culture_result <- rbinom(n, 1, culture_prob) # Chest X-ray # Sensitivity: 85%, Specificity: 75% cxr_prob <- ifelse(disease == 1, 0.85, 0.25) cxr_result <- rbinom(n, 1, cxr_prob) # HIV status affects presentation hiv_status <- rbinom(n, 1, 0.25) # Create dataset tb_data <- data.frame( patient_id = 1:n, symptoms = factor(symptom_result, levels = c(0, 1), labels = c("No", "Yes")), sputum_smear = factor(smear_result, levels = c(0, 1), labels = c("Negative", "Positive")), genexpert = factor(genexpert_result, levels = c(0, 1), labels = c("MTB not detected", "MTB detected")), culture = factor(culture_result, levels = c(0, 1), labels = c("Negative", "Positive")), chest_xray = factor(cxr_result, levels = c(0, 1), labels = c("Normal", "Abnormal")), tb_status = factor(disease, levels = c(0, 1), labels = c("No TB", "TB")), hiv_status = factor(hiv_status, levels = c(0, 1), labels = c("Negative", "Positive")), age = round(rnorm(n, 35, 15)), contact_history = factor(rbinom(n, 1, 0.30), levels = c(0, 1), labels = c("No", "Yes")) ) # Culture takes time, might not be done for all no_culture_idx <- which(tb_data$genexpert == "MTB not detected" & tb_data$symptoms == "No" & runif(n) < 0.4) tb_data$culture[no_culture_idx] <- NA return(tb_data) } # Generate TB dataset tb_diagnosis_data <- create_tb_data(n = 1500, prevalence = 0.20) # ============================================================================ # DATASET 4: MYOCARDIAL INFARCTION RULE-OUT (FIXED) # ============================================================================ create_mi_data <- function(n = 800, prevalence = 0.10) { # True disease status disease <- rbinom(n, 1, prevalence) # ECG changes # Sensitivity: 55%, Specificity: 95% ecg_prob <- ifelse(disease == 1, 0.55, 0.05) ecg_result <- rbinom(n, 1, ecg_prob) # Initial troponin # Sensitivity: 85%, Specificity: 95% trop1_prob <- ifelse(disease == 1, 0.85, 0.05) trop1_result <- rbinom(n, 1, trop1_prob) # 3-hour troponin # Sensitivity: 98%, Specificity: 95% trop3_prob <- ifelse(disease == 1, 0.98, 0.05) trop3_result <- rbinom(n, 1, trop3_prob) # Make sure 3-hour is at least as positive as initial trop3_result <- pmax(trop1_result, trop3_result) # CT Angiography # Sensitivity: 95%, Specificity: 90% cta_prob <- ifelse(disease == 1, 0.95, 0.10) cta_result <- rbinom(n, 1, cta_prob) # Clinical risk score components age <- round(rnorm(n, 60, 15)) age[age < 30] <- 30 age[age > 90] <- 90 # FIXED: Generate chest pain type for each individual based on their disease status chest_pain_type <- character(n) pain_types <- c("Typical", "Atypical", "Non-cardiac") for (i in 1:n) { if (disease[i] == 1) { # Disease present - more likely to have typical pain chest_pain_type[i] <- sample(pain_types, 1, prob = c(0.6, 0.3, 0.1)) } else { # No disease - more likely to have non-cardiac pain chest_pain_type[i] <- sample(pain_types, 1, prob = c(0.1, 0.3, 0.6)) } } # Create dataset mi_data <- data.frame( patient_id = 1:n, ecg = factor(ecg_result, levels = c(0, 1), labels = c("Normal", "Ischemic changes")), troponin_initial = factor(trop1_result, levels = c(0, 1), labels = c("Normal", "Elevated")), troponin_3hr = factor(trop3_result, levels = c(0, 1), labels = c("Normal", "Elevated")), ct_angiography = factor(cta_result, levels = c(0, 1), labels = c("No stenosis", "Significant stenosis")), mi_status = factor(disease, levels = c(0, 1), labels = c("No MI", "MI")), age = age, chest_pain = factor(chest_pain_type), diabetes = factor(rbinom(n, 1, 0.25), levels = c(0, 1), labels = c("No", "Yes")), smoking = factor(rbinom(n, 1, 0.30), levels = c(0, 1), labels = c("No", "Yes")), prior_cad = factor(rbinom(n, 1, 0.20), levels = c(0, 1), labels = c("No", "Yes")) ) # CTA typically only for intermediate risk low_risk_idx <- which(mi_data$chest_pain == "Non-cardiac" & mi_data$ecg == "Normal" & mi_data$troponin_initial == "Normal") if (length(low_risk_idx) > 0) { mi_data$ct_angiography[sample(low_risk_idx, min(length(low_risk_idx), floor(length(low_risk_idx) * 0.8)))] <- NA } return(mi_data) } # Generate MI dataset mi_ruleout_data <- create_mi_data(n = 800, prevalence = 0.10) # ============================================================================ # DATASET 5: THYROID NODULE EVALUATION # ============================================================================ create_thyroid_data <- function(n = 600, prevalence = 0.05) { # True disease status (thyroid cancer) disease <- rbinom(n, 1, prevalence) # Ultrasound features (TI-RADS) # Sensitivity: 90%, Specificity: 70% us_prob <- ifelse(disease == 1, 0.90, 0.30) us_result <- rbinom(n, 1, us_prob) # Fine Needle Aspiration (FNA) cytology # Sensitivity: 95%, Specificity: 98% fna_prob <- ifelse(disease == 1, 0.95, 0.02) fna_result <- rbinom(n, 1, fna_prob) # Molecular testing (ThyroSeq/Afirma) # Sensitivity: 91%, Specificity: 85% molecular_prob <- ifelse(disease == 1, 0.91, 0.15) molecular_result <- rbinom(n, 1, molecular_prob) # Thyroglobulin levels # Sensitivity: 70%, Specificity: 80% tg_prob <- ifelse(disease == 1, 0.70, 0.20) tg_result <- rbinom(n, 1, tg_prob) # Create dataset thyroid_data <- data.frame( patient_id = 1:n, ultrasound = factor(us_result, levels = c(0, 1), labels = c("TI-RADS 1-3", "TI-RADS 4-5")), fna_cytology = factor(fna_result, levels = c(0, 1), labels = c("Benign/Indeterminate", "Suspicious/Malignant")), molecular_test = factor(molecular_result, levels = c(0, 1), labels = c("Benign", "Suspicious")), thyroglobulin = factor(tg_result, levels = c(0, 1), labels = c("Normal", "Elevated")), cancer_status = factor(disease, levels = c(0, 1), labels = c("Benign", "Malignant")), nodule_size = round(rlnorm(n, log(15), 0.5)), age = round(rnorm(n, 50, 15)), gender = factor(sample(c("Female", "Male"), n, replace = TRUE, prob = c(0.75, 0.25))), radiation_history = factor(rbinom(n, 1, 0.05), levels = c(0, 1), labels = c("No", "Yes")) ) # Molecular testing only for indeterminate FNA molecular_not_done <- which(thyroid_data$fna_cytology != "Benign/Indeterminate" | runif(n) < 0.5) thyroid_data$molecular_test[molecular_not_done] <- NA return(thyroid_data) } # Generate thyroid dataset thyroid_nodule_data <- create_thyroid_data(n = 600, prevalence = 0.05) # ============================================================================ # SAVE ALL DATASETS # ============================================================================ # Save as RData file save(covid_screening_data, breast_cancer_data, tb_diagnosis_data, mi_ruleout_data, thyroid_nodule_data, file = "data/decision_panel_test_data.RData") # Also save as CSV files for external use write.csv(covid_screening_data, "data/covid_screening_data.csv", row.names = FALSE) write.csv(breast_cancer_data, "data/breast_cancer_data.csv", row.names = FALSE) write.csv(tb_diagnosis_data, "data/tb_diagnosis_data.csv", row.names = FALSE) write.csv(mi_ruleout_data, "data/mi_ruleout_data.csv", row.names = FALSE) write.csv(thyroid_nodule_data, "data/thyroid_nodule_data.csv", row.names = FALSE) # Print confirmation cat("✓ All datasets generated and saved successfully!\n") # ============================================================================ # DATASET SUMMARIES FOR VERIFICATION # ============================================================================ summarize_dataset <- function(data, disease_col, test_cols) { cat("\nDataset Summary:\n") cat("Total observations:", nrow(data), "\n") # Check if disease column exists and has the expected levels if (disease_col %in% names(data)) { disease_levels <- levels(factor(data[[disease_col]])) if (length(disease_levels) >= 2) { prevalence <- mean(data[[disease_col]] == disease_levels[2], na.rm = TRUE) cat("Disease prevalence:", round(prevalence * 100, 1), "%\n") } else { cat("Disease column found but insufficient levels\n") } } else { cat("Disease column not found\n") } cat("\nTest performance:\n") for (test in test_cols) { if (test %in% names(data) && disease_col %in% names(data)) { # Remove rows with NAs in either variable complete_data <- data[!is.na(data[[test]]) & !is.na(data[[disease_col]]), ] if (nrow(complete_data) > 0) { tab <- table(complete_data[[test]], complete_data[[disease_col]]) if (nrow(tab) >= 2 && ncol(tab) >= 2) { # Assume positive is the second level (index 2) if (nrow(tab) == 2 && ncol(tab) == 2) { sens <- tab[2,2] / sum(tab[,2]) spec <- tab[1,1] / sum(tab[,1]) cat(sprintf(" %s: Sens=%.1f%%, Spec=%.1f%% (n=%d)\n", test, sens*100, spec*100, nrow(complete_data))) } else { cat(sprintf(" %s: Complex table structure (n=%d)\n", test, nrow(complete_data))) } } else { cat(sprintf(" %s: Insufficient data for analysis (n=%d)\n", test, nrow(complete_data))) } } else { cat(sprintf(" %s: No complete cases\n", test)) } } else { cat(sprintf(" %s: Column not found\n", test)) } } } # Print summaries for verification cat("\n", paste(rep("=", 60), collapse = ""), "\n") cat("DATASET SUMMARIES") cat("\n", paste(rep("=", 60), collapse = ""), "\n") cat("\n=== COVID-19 SCREENING DATA ===") summarize_dataset(covid_screening_data, "covid_status", c("rapid_antigen", "pcr", "chest_ct")) cat("\n=== BREAST CANCER SCREENING DATA ===") summarize_dataset(breast_cancer_data, "cancer_status", c("clinical_exam", "mammography", "ultrasound", "mri")) cat("\n=== TUBERCULOSIS DIAGNOSIS DATA ===") summarize_dataset(tb_diagnosis_data, "tb_status", c("symptoms", "sputum_smear", "genexpert", "culture", "chest_xray")) cat("\n=== MYOCARDIAL INFARCTION DATA ===") summarize_dataset(mi_ruleout_data, "mi_status", c("ecg", "troponin_initial", "troponin_3hr", "ct_angiography")) cat("\n=== THYROID NODULE DATA ===") summarize_dataset(thyroid_nodule_data, "cancer_status", c("ultrasound", "fna_cytology", "molecular_test", "thyroglobulin")) cat("\n", paste(rep("=", 60), collapse = ""), "\n")
Let's start with a simple example using COVID-19 screening data:
# Examine the data structure str(covid_screening_data) # Check disease prevalence table(covid_screening_data$covid_status) prop.table(table(covid_screening_data$covid_status))
# Basic decision panel analysis covid_panel <- decisionpanel( data = covid_screening_data, tests = c("rapid_antigen", "pcr", "chest_ct"), testLevels = c("Positive", "Positive", "Abnormal"), gold = "covid_status", goldPositive = "Positive", strategies = "all", optimizationCriteria = "accuracy" )
The analysis provides several key outputs:
# Simulate parallel testing with ANY rule # Positive if rapid_antigen OR pcr is positive parallel_any <- with(covid_screening_data, rapid_antigen == "Positive" | pcr == "Positive" ) # Create confusion matrix conf_matrix_any <- table( Predicted = parallel_any, Actual = covid_screening_data$covid_status == "Positive" ) print(conf_matrix_any) # Calculate metrics sensitivity_any <- conf_matrix_any[2,2] / sum(conf_matrix_any[,2]) specificity_any <- conf_matrix_any[1,1] / sum(conf_matrix_any[,1]) cat("Parallel ANY Rule:\n") cat(sprintf("Sensitivity: %.1f%%\n", sensitivity_any * 100)) cat(sprintf("Specificity: %.1f%%\n", specificity_any * 100))
# Simulate sequential testing # Start with rapid test, only do PCR if rapid is positive sequential_result <- rep("Negative", nrow(covid_screening_data)) # Those with positive rapid test rapid_pos_idx <- which(covid_screening_data$rapid_antigen == "Positive") # Among those, check PCR sequential_result[rapid_pos_idx] <- ifelse(covid_screening_data$pcr[rapid_pos_idx] == "Positive", "Positive", "Negative") # Create confusion matrix conf_matrix_seq <- table( Predicted = sequential_result == "Positive", Actual = covid_screening_data$covid_status == "Positive" ) print(conf_matrix_seq) # Calculate metrics sensitivity_seq <- conf_matrix_seq[2,2] / sum(conf_matrix_seq[,2]) specificity_seq <- conf_matrix_seq[1,1] / sum(conf_matrix_seq[,1]) cat("\nSequential Testing:\n") cat(sprintf("Sensitivity: %.1f%%\n", sensitivity_seq * 100)) cat(sprintf("Specificity: %.1f%%\n", specificity_seq * 100)) # Calculate cost savings pcr_tests_saved <- sum(covid_screening_data$rapid_antigen == "Negative") cat(sprintf("PCR tests saved: %d (%.1f%%)\n", pcr_tests_saved, pcr_tests_saved/nrow(covid_screening_data) * 100))
When costs are considered, the optimal strategy may change:
# Analysis with costs covid_panel_cost <- decisionpanel( data = covid_screening_data, tests = c("rapid_antigen", "pcr", "chest_ct"), testLevels = c("Positive", "Positive", "Abnormal"), gold = "covid_status", goldPositive = "Positive", strategies = "all", optimizationCriteria = "utility", useCosts = TRUE, testCosts = "5,50,200", # Costs for each test fpCost = 500, # Cost of false positive fnCost = 5000 # Cost of false negative )
# Create performance comparison data strategies <- data.frame( Strategy = c("Rapid Only", "PCR Only", "Parallel ANY", "Sequential"), Sensitivity = c(65, 95, 98, 62), Specificity = c(98, 99, 97, 99.9), Cost = c(5, 50, 55, 15) ) # Plot sensitivity vs specificity ggplot(strategies, aes(x = 100 - Specificity, y = Sensitivity)) + geom_point(aes(size = Cost), alpha = 0.6) + geom_text(aes(label = Strategy), vjust = -1) + scale_size_continuous(range = c(3, 10)) + xlim(0, 5) + ylim(60, 100) + labs( title = "Testing Strategy Comparison", x = "False Positive Rate (%)", y = "Sensitivity (%)", size = "Cost ($)" ) + theme_minimal()
Decision trees provide clear algorithms for clinical use:
# Generate decision tree covid_tree <- decisionpanel( data = covid_screening_data, tests = c("rapid_antigen", "pcr", "chest_ct", "symptom_score"), testLevels = c("Positive", "Positive", "Abnormal", ">5"), gold = "covid_status", goldPositive = "Positive", createTree = TRUE, treeMethod = "cart", maxDepth = 3 )
A typical decision tree output might look like:
1. Start with Rapid Antigen Test ├─ If Positive (2% of patients) │ └─ Confirm with PCR │ ├─ If Positive → COVID Positive (PPV: 95%) │ └─ If Negative → COVID Negative (NPV: 98%) └─ If Negative (98% of patients) ├─ If Symptoms > 5 │ └─ Perform Chest CT │ ├─ If Abnormal → Perform PCR │ └─ If Normal → COVID Negative └─ If Symptoms ≤ 5 → COVID Negative
Validate panel performance using k-fold cross-validation:
# Run with cross-validation covid_panel_cv <- decisionpanel( data = covid_screening_data, tests = c("rapid_antigen", "pcr", "chest_ct"), testLevels = c("Positive", "Positive", "Abnormal"), gold = "covid_status", goldPositive = "Positive", crossValidate = TRUE, nFolds = 5, seed = 123 )
Get uncertainty estimates for performance metrics:
# Run with bootstrap covid_panel_boot <- decisionpanel( data = covid_screening_data, tests = c("rapid_antigen", "pcr", "chest_ct"), testLevels = c("Positive", "Positive", "Abnormal"), gold = "covid_status", goldPositive = "Positive", bootstrap = TRUE, bootReps = 1000, seed = 123 )
The Decision Panel Optimization module provides a systematic approach to combining diagnostic tests. By considering various strategies, costs, and constraints, it helps identify practical testing algorithms that balance performance with resource utilization.
sessionInfo()
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.