knitr::opts_chunk$set( collapse = TRUE, comment = "#>", fig.width = 8, fig.height = 6, warning = FALSE, message = FALSE )
This vignette demonstrates real-world clinical applications of the Decision Panel Optimization module across different medical specialties and scenarios.
# Load required packages library(meddecide) library(dplyr) library(ggplot2) library(rpart) library(rpart.plot) library(knitr) library(forcats)
# Test Datasets for Decision Panel Optimization Module # Set seed for reproducibility set.seed(42) # Create data directory if it doesn't exist if (!dir.exists("data")) { dir.create("data") } # ============================================================================ # DATASET 1: COVID-19 SCREENING # ============================================================================ create_covid_data <- function(n = 1000, prevalence = 0.15) { # True disease status disease <- rbinom(n, 1, prevalence) # Rapid Antigen Test (RAT) # Sensitivity: 65%, Specificity: 98% rat_prob <- ifelse(disease == 1, 0.65, 0.02) rat_result <- rbinom(n, 1, rat_prob) # PCR Test # Sensitivity: 95%, Specificity: 99% pcr_prob <- ifelse(disease == 1, 0.95, 0.01) pcr_result <- rbinom(n, 1, pcr_prob) # Chest CT # Sensitivity: 90%, Specificity: 85% ct_prob <- ifelse(disease == 1, 0.90, 0.15) ct_result <- rbinom(n, 1, ct_prob) # Clinical symptoms score (0-10) # Higher in disease symptoms <- ifelse(disease == 1, pmin(10, round(rnorm(sum(disease == 1), 7, 2))), pmax(0, round(rnorm(sum(disease == 0), 3, 2)))) # Create dataset covid_data <- data.frame( patient_id = 1:n, rapid_antigen = factor(rat_result, levels = c(0, 1), labels = c("Negative", "Positive")), pcr = factor(pcr_result, levels = c(0, 1), labels = c("Negative", "Positive")), chest_ct = factor(ct_result, levels = c(0, 1), labels = c("Normal", "Abnormal")), symptom_score = symptoms, covid_status = factor(disease, levels = c(0, 1), labels = c("Negative", "Positive")), age = round(rnorm(n, 45, 15)), risk_group = factor(sample(c("Low", "Medium", "High"), n, replace = TRUE, prob = c(0.6, 0.3, 0.1))) ) # Add some missing values realistically # PCR might be missing if rapid test is negative missing_pcr <- which(covid_data$rapid_antigen == "Negative" & runif(n) < 0.3) covid_data$pcr[missing_pcr] <- NA return(covid_data) } # Generate COVID dataset covid_screening_data <- create_covid_data(n = 1000, prevalence = 0.15) # ============================================================================ # DATASET 2: BREAST CANCER SCREENING # ============================================================================ create_breast_cancer_data <- function(n = 2000, prevalence = 0.005) { # True disease status (low prevalence for screening) disease <- rbinom(n, 1, prevalence) # Clinical Breast Exam (CBE) # Sensitivity: 54%, Specificity: 94% cbe_prob <- ifelse(disease == 1, 0.54, 0.06) cbe_result <- rbinom(n, 1, cbe_prob) # Mammography # Sensitivity: 85%, Specificity: 95% mammo_prob <- ifelse(disease == 1, 0.85, 0.05) mammo_result <- rbinom(n, 1, mammo_prob) # Ultrasound # Sensitivity: 80%, Specificity: 90% us_prob <- ifelse(disease == 1, 0.80, 0.10) us_result <- rbinom(n, 1, us_prob) # MRI (for high-risk patients) # Sensitivity: 95%, Specificity: 85% mri_prob <- ifelse(disease == 1, 0.95, 0.15) mri_result <- rbinom(n, 1, mri_prob) # Create risk factors age <- round(rnorm(n, 55, 10)) age[age < 40] <- 40 age[age > 75] <- 75 family_history <- rbinom(n, 1, 0.15) brca_status <- rbinom(n, 1, 0.02) # Create dataset breast_cancer_data <- data.frame( patient_id = 1:n, clinical_exam = factor(cbe_result, levels = c(0, 1), labels = c("Normal", "Abnormal")), mammography = factor(mammo_result, levels = c(0, 1), labels = c("BIRADS 1-2", "BIRADS 3-5")), ultrasound = factor(us_result, levels = c(0, 1), labels = c("Normal", "Suspicious")), mri = factor(mri_result, levels = c(0, 1), labels = c("Normal", "Suspicious")), cancer_status = factor(disease, levels = c(0, 1), labels = c("No Cancer", "Cancer")), age = age, family_history = factor(family_history, levels = c(0, 1), labels = c("No", "Yes")), brca_mutation = factor(brca_status, levels = c(0, 1), labels = c("Negative", "Positive")), breast_density = factor(sample(c("A", "B", "C", "D"), n, replace = TRUE, prob = c(0.1, 0.4, 0.4, 0.1))) ) # MRI typically only done for high-risk low_risk_idx <- which(breast_cancer_data$family_history == "No" & breast_cancer_data$brca_mutation == "Negative") breast_cancer_data$mri[sample(low_risk_idx, length(low_risk_idx) * 0.9)] <- NA return(breast_cancer_data) } # Generate breast cancer dataset breast_cancer_data <- create_breast_cancer_data(n = 2000, prevalence = 0.005) # ============================================================================ # DATASET 3: TUBERCULOSIS DIAGNOSIS # ============================================================================ create_tb_data <- function(n = 1500, prevalence = 0.20) { # True disease status (high prevalence in TB clinic) disease <- rbinom(n, 1, prevalence) # Symptom screening (cough > 2 weeks, fever, weight loss, night sweats) # Sensitivity: 80%, Specificity: 60% symptom_prob <- ifelse(disease == 1, 0.80, 0.40) symptom_result <- rbinom(n, 1, symptom_prob) # Sputum smear microscopy # Sensitivity: 60%, Specificity: 98% smear_prob <- ifelse(disease == 1, 0.60, 0.02) smear_result <- rbinom(n, 1, smear_prob) # GeneXpert MTB/RIF # Sensitivity: 88%, Specificity: 98% genexpert_prob <- ifelse(disease == 1, 0.88, 0.02) genexpert_result <- rbinom(n, 1, genexpert_prob) # Culture (gold standard-ish) # Sensitivity: 95%, Specificity: 100% culture_prob <- ifelse(disease == 1, 0.95, 0.00) culture_result <- rbinom(n, 1, culture_prob) # Chest X-ray # Sensitivity: 85%, Specificity: 75% cxr_prob <- ifelse(disease == 1, 0.85, 0.25) cxr_result <- rbinom(n, 1, cxr_prob) # HIV status affects presentation hiv_status <- rbinom(n, 1, 0.25) # Create dataset tb_data <- data.frame( patient_id = 1:n, symptoms = factor(symptom_result, levels = c(0, 1), labels = c("No", "Yes")), sputum_smear = factor(smear_result, levels = c(0, 1), labels = c("Negative", "Positive")), genexpert = factor(genexpert_result, levels = c(0, 1), labels = c("MTB not detected", "MTB detected")), culture = factor(culture_result, levels = c(0, 1), labels = c("Negative", "Positive")), chest_xray = factor(cxr_result, levels = c(0, 1), labels = c("Normal", "Abnormal")), tb_status = factor(disease, levels = c(0, 1), labels = c("No TB", "TB")), hiv_status = factor(hiv_status, levels = c(0, 1), labels = c("Negative", "Positive")), age = round(rnorm(n, 35, 15)), contact_history = factor(rbinom(n, 1, 0.30), levels = c(0, 1), labels = c("No", "Yes")) ) # Culture takes time, might not be done for all no_culture_idx <- which(tb_data$genexpert == "MTB not detected" & tb_data$symptoms == "No" & runif(n) < 0.4) tb_data$culture[no_culture_idx] <- NA return(tb_data) } # Generate TB dataset tb_diagnosis_data <- create_tb_data(n = 1500, prevalence = 0.20) # ============================================================================ # DATASET 4: MYOCARDIAL INFARCTION RULE-OUT (FIXED) # ============================================================================ create_mi_data <- function(n = 800, prevalence = 0.10) { # True disease status disease <- rbinom(n, 1, prevalence) # ECG changes # Sensitivity: 55%, Specificity: 95% ecg_prob <- ifelse(disease == 1, 0.55, 0.05) ecg_result <- rbinom(n, 1, ecg_prob) # Initial troponin # Sensitivity: 85%, Specificity: 95% trop1_prob <- ifelse(disease == 1, 0.85, 0.05) trop1_result <- rbinom(n, 1, trop1_prob) # 3-hour troponin # Sensitivity: 98%, Specificity: 95% trop3_prob <- ifelse(disease == 1, 0.98, 0.05) trop3_result <- rbinom(n, 1, trop3_prob) # Make sure 3-hour is at least as positive as initial trop3_result <- pmax(trop1_result, trop3_result) # CT Angiography # Sensitivity: 95%, Specificity: 90% cta_prob <- ifelse(disease == 1, 0.95, 0.10) cta_result <- rbinom(n, 1, cta_prob) # Clinical risk score components age <- round(rnorm(n, 60, 15)) age[age < 30] <- 30 age[age > 90] <- 90 # FIXED: Generate chest pain type for each individual based on their disease status chest_pain_type <- character(n) pain_types <- c("Typical", "Atypical", "Non-cardiac") for (i in 1:n) { if (disease[i] == 1) { # Disease present - more likely to have typical pain chest_pain_type[i] <- sample(pain_types, 1, prob = c(0.6, 0.3, 0.1)) } else { # No disease - more likely to have non-cardiac pain chest_pain_type[i] <- sample(pain_types, 1, prob = c(0.1, 0.3, 0.6)) } } # Create dataset mi_data <- data.frame( patient_id = 1:n, ecg = factor(ecg_result, levels = c(0, 1), labels = c("Normal", "Ischemic changes")), troponin_initial = factor(trop1_result, levels = c(0, 1), labels = c("Normal", "Elevated")), troponin_3hr = factor(trop3_result, levels = c(0, 1), labels = c("Normal", "Elevated")), ct_angiography = factor(cta_result, levels = c(0, 1), labels = c("No stenosis", "Significant stenosis")), mi_status = factor(disease, levels = c(0, 1), labels = c("No MI", "MI")), age = age, chest_pain = factor(chest_pain_type), diabetes = factor(rbinom(n, 1, 0.25), levels = c(0, 1), labels = c("No", "Yes")), smoking = factor(rbinom(n, 1, 0.30), levels = c(0, 1), labels = c("No", "Yes")), prior_cad = factor(rbinom(n, 1, 0.20), levels = c(0, 1), labels = c("No", "Yes")) ) # CTA typically only for intermediate risk low_risk_idx <- which(mi_data$chest_pain == "Non-cardiac" & mi_data$ecg == "Normal" & mi_data$troponin_initial == "Normal") if (length(low_risk_idx) > 0) { mi_data$ct_angiography[sample(low_risk_idx, min(length(low_risk_idx), floor(length(low_risk_idx) * 0.8)))] <- NA } return(mi_data) } # Generate MI dataset mi_ruleout_data <- create_mi_data(n = 800, prevalence = 0.10) # ============================================================================ # DATASET 5: THYROID NODULE EVALUATION # ============================================================================ create_thyroid_data <- function(n = 600, prevalence = 0.05) { # True disease status (thyroid cancer) disease <- rbinom(n, 1, prevalence) # Ultrasound features (TI-RADS) # Sensitivity: 90%, Specificity: 70% us_prob <- ifelse(disease == 1, 0.90, 0.30) us_result <- rbinom(n, 1, us_prob) # Fine Needle Aspiration (FNA) cytology # Sensitivity: 95%, Specificity: 98% fna_prob <- ifelse(disease == 1, 0.95, 0.02) fna_result <- rbinom(n, 1, fna_prob) # Molecular testing (ThyroSeq/Afirma) # Sensitivity: 91%, Specificity: 85% molecular_prob <- ifelse(disease == 1, 0.91, 0.15) molecular_result <- rbinom(n, 1, molecular_prob) # Thyroglobulin levels # Sensitivity: 70%, Specificity: 80% tg_prob <- ifelse(disease == 1, 0.70, 0.20) tg_result <- rbinom(n, 1, tg_prob) # Create dataset thyroid_data <- data.frame( patient_id = 1:n, ultrasound = factor(us_result, levels = c(0, 1), labels = c("TI-RADS 1-3", "TI-RADS 4-5")), fna_cytology = factor(fna_result, levels = c(0, 1), labels = c("Benign/Indeterminate", "Suspicious/Malignant")), molecular_test = factor(molecular_result, levels = c(0, 1), labels = c("Benign", "Suspicious")), thyroglobulin = factor(tg_result, levels = c(0, 1), labels = c("Normal", "Elevated")), cancer_status = factor(disease, levels = c(0, 1), labels = c("Benign", "Malignant")), nodule_size = round(rlnorm(n, log(15), 0.5)), age = round(rnorm(n, 50, 15)), gender = factor(sample(c("Female", "Male"), n, replace = TRUE, prob = c(0.75, 0.25))), radiation_history = factor(rbinom(n, 1, 0.05), levels = c(0, 1), labels = c("No", "Yes")) ) # Molecular testing only for indeterminate FNA molecular_not_done <- which(thyroid_data$fna_cytology != "Benign/Indeterminate" | runif(n) < 0.5) thyroid_data$molecular_test[molecular_not_done] <- NA return(thyroid_data) } # Generate thyroid dataset thyroid_nodule_data <- create_thyroid_data(n = 600, prevalence = 0.05) # ============================================================================ # SAVE ALL DATASETS # ============================================================================ # Save as RData file save(covid_screening_data, breast_cancer_data, tb_diagnosis_data, mi_ruleout_data, thyroid_nodule_data, file = "data/decision_panel_test_data.RData") # Also save as CSV files for external use write.csv(covid_screening_data, "data/covid_screening_data.csv", row.names = FALSE) write.csv(breast_cancer_data, "data/breast_cancer_data.csv", row.names = FALSE) write.csv(tb_diagnosis_data, "data/tb_diagnosis_data.csv", row.names = FALSE) write.csv(mi_ruleout_data, "data/mi_ruleout_data.csv", row.names = FALSE) write.csv(thyroid_nodule_data, "data/thyroid_nodule_data.csv", row.names = FALSE) # Print confirmation cat("✓ All datasets generated and saved successfully!\n") # ============================================================================ # DATASET SUMMARIES FOR VERIFICATION # ============================================================================ summarize_dataset <- function(data, disease_col, test_cols) { cat("\nDataset Summary:\n") cat("Total observations:", nrow(data), "\n") # Check if disease column exists and has the expected levels if (disease_col %in% names(data)) { disease_levels <- levels(factor(data[[disease_col]])) if (length(disease_levels) >= 2) { prevalence <- mean(data[[disease_col]] == disease_levels[2], na.rm = TRUE) cat("Disease prevalence:", round(prevalence * 100, 1), "%\n") } else { cat("Disease column found but insufficient levels\n") } } else { cat("Disease column not found\n") } cat("\nTest performance:\n") for (test in test_cols) { if (test %in% names(data) && disease_col %in% names(data)) { # Remove rows with NAs in either variable complete_data <- data[!is.na(data[[test]]) & !is.na(data[[disease_col]]), ] if (nrow(complete_data) > 0) { tab <- table(complete_data[[test]], complete_data[[disease_col]]) if (nrow(tab) >= 2 && ncol(tab) >= 2) { # Assume positive is the second level (index 2) if (nrow(tab) == 2 && ncol(tab) == 2) { sens <- tab[2,2] / sum(tab[,2]) spec <- tab[1,1] / sum(tab[,1]) cat(sprintf(" %s: Sens=%.1f%%, Spec=%.1f%% (n=%d)\n", test, sens*100, spec*100, nrow(complete_data))) } else { cat(sprintf(" %s: Complex table structure (n=%d)\n", test, nrow(complete_data))) } } else { cat(sprintf(" %s: Insufficient data for analysis (n=%d)\n", test, nrow(complete_data))) } } else { cat(sprintf(" %s: No complete cases\n", test)) } } else { cat(sprintf(" %s: Column not found\n", test)) } } } # Print summaries for verification cat("\n", paste(rep("=", 60), collapse = ""), "\n") cat("DATASET SUMMARIES") cat("\n", paste(rep("=", 60), collapse = ""), "\n") cat("\n=== COVID-19 SCREENING DATA ===") summarize_dataset(covid_screening_data, "covid_status", c("rapid_antigen", "pcr", "chest_ct")) cat("\n=== BREAST CANCER SCREENING DATA ===") summarize_dataset(breast_cancer_data, "cancer_status", c("clinical_exam", "mammography", "ultrasound", "mri")) cat("\n=== TUBERCULOSIS DIAGNOSIS DATA ===") summarize_dataset(tb_diagnosis_data, "tb_status", c("symptoms", "sputum_smear", "genexpert", "culture", "chest_xray")) cat("\n=== MYOCARDIAL INFARCTION DATA ===") summarize_dataset(mi_ruleout_data, "mi_status", c("ecg", "troponin_initial", "troponin_3hr", "ct_angiography")) cat("\n=== THYROID NODULE DATA ===") summarize_dataset(thyroid_nodule_data, "cancer_status", c("ultrasound", "fna_cytology", "molecular_test", "thyroglobulin")) cat("\n", paste(rep("=", 60), collapse = ""), "\n")
An emergency department needs to rapidly screen patients for COVID-19 while managing limited resources and preventing nosocomial transmission.
# Examine test characteristics covid_tests <- covid_screening_data %>% select(rapid_antigen, pcr, chest_ct, covid_status) %>% na.omit() # Calculate individual test performance test_performance <- function(test, truth) { tab <- table(test, truth) sens <- tab[2,2] / sum(tab[,2]) spec <- tab[1,1] / sum(tab[,1]) ppv <- tab[2,2] / sum(tab[2,]) npv <- tab[1,1] / sum(tab[1,]) return(data.frame( Sensitivity = sens, Specificity = spec, PPV = ppv, NPV = npv )) } # Individual test performance rapid_perf <- test_performance(covid_tests$rapid_antigen, covid_tests$covid_status) pcr_perf <- test_performance(covid_tests$pcr, covid_tests$covid_status) ct_perf <- test_performance(covid_tests$chest_ct, covid_tests$covid_status) performance_table <- rbind( `Rapid Antigen` = rapid_perf, `PCR` = pcr_perf, `Chest CT` = ct_perf ) kable(round(performance_table * 100, 1), caption = "Individual Test Performance (%)")
# Run optimization for different scenarios # 1. Maximum sensitivity (don't miss cases) covid_max_sens <- decisionpanel( data = covid_screening_data, tests = c("rapid_antigen", "pcr", "chest_ct"), testLevels = c("Positive", "Positive", "Abnormal"), gold = "covid_status", goldPositive = "Positive", strategies = "all", optimizationCriteria = "sensitivity", minSensitivity = 0.95 ) # 2. Cost-effective screening covid_cost_effective <- decisionpanel( data = covid_screening_data, tests = c("rapid_antigen", "pcr", "chest_ct"), testLevels = c("Positive", "Positive", "Abnormal"), gold = "covid_status", goldPositive = "Positive", strategies = "all", optimizationCriteria = "efficiency", useCosts = TRUE, testCosts = "5,50,200", fpCost = 500, fnCost = 5000 )
Based on the analysis, here's a practical algorithm:
# Implement sequential testing algorithm apply_covid_algorithm <- function(data) { n <- nrow(data) decisions <- character(n) tests_used <- character(n) for (i in 1:n) { # Step 1: Rapid antigen test if (data$rapid_antigen[i] == "Positive") { decisions[i] <- "Isolate, Confirm with PCR" tests_used[i] <- "Rapid" } else if (data$symptom_score[i] >= 6) { # Step 2: High symptom score → CT if (!is.na(data$chest_ct[i]) && data$chest_ct[i] == "Abnormal") { decisions[i] <- "Probable COVID, Isolate, PCR" tests_used[i] <- "Rapid + CT" } else { decisions[i] <- "Low probability, Standard care" tests_used[i] <- "Rapid + CT" } } else { decisions[i] <- "Low probability, Standard care" tests_used[i] <- "Rapid" } } return(data.frame(Decision = decisions, Tests = tests_used)) } # Apply algorithm algorithm_results <- apply_covid_algorithm(covid_screening_data[1:10,]) kable(cbind(covid_screening_data[1:10, c("patient_id", "rapid_antigen", "symptom_score", "covid_status")], algorithm_results))
# Simulate different strategies strategies <- expand.grid( use_rapid = c(TRUE, FALSE), use_pcr = c(TRUE, FALSE), use_ct = c(TRUE, FALSE) ) %>% filter(use_rapid | use_pcr | use_ct) # At least one test # Calculate performance for each strategy strategy_performance <- strategies %>% rowwise() %>% mutate( tests = paste(c( if(use_rapid) "RAT" else NULL, if(use_pcr) "PCR" else NULL, if(use_ct) "CT" else NULL ), collapse = "+"), cost = sum(c( if(use_rapid) 5 else 0, if(use_pcr) 50 else 0, if(use_ct) 200 else 0 )), # Simulated performance (would come from actual analysis) sensitivity = case_when( use_rapid & use_pcr & use_ct ~ 0.99, use_pcr & use_ct ~ 0.98, use_rapid & use_pcr ~ 0.97, use_pcr ~ 0.95, use_rapid & use_ct ~ 0.94, use_ct ~ 0.90, use_rapid ~ 0.65 ), specificity = case_when( use_rapid & use_pcr & use_ct ~ 0.83, use_pcr & use_ct ~ 0.84, use_rapid & use_pcr ~ 0.97, use_pcr ~ 0.99, use_rapid & use_ct ~ 0.83, use_ct ~ 0.85, use_rapid ~ 0.98 ) ) # Create cost-effectiveness plot ggplot(strategy_performance, aes(x = cost, y = sensitivity * 100)) + geom_point(aes(size = specificity * 100), alpha = 0.6) + geom_text(aes(label = tests), vjust = -1, size = 3) + geom_line(data = strategy_performance %>% arrange(cost) %>% filter(sensitivity == cummax(sensitivity)), color = "red", alpha = 0.5) + scale_size_continuous(name = "Specificity (%)", range = c(3, 10)) + labs( title = "Cost-Effectiveness of COVID-19 Testing Strategies", x = "Cost per Patient ($)", y = "Sensitivity (%)", caption = "Red line shows cost-effectiveness frontier" ) + theme_minimal()
A population-based breast cancer screening program needs to optimize use of mammography, ultrasound, and MRI based on risk factors.
# Examine population characteristics breast_summary <- breast_cancer_data %>% summarise( n = n(), prevalence = mean(cancer_status == "Cancer"), mean_age = mean(age), pct_family_history = mean(family_history == "Yes") * 100, pct_brca = mean(brca_mutation == "Positive") * 100, pct_dense_breast = mean(breast_density %in% c("C", "D")) * 100 ) kable(breast_summary, digits = 2, caption = "Population Characteristics") # Risk stratification breast_cancer_data <- breast_cancer_data %>% mutate( risk_category = case_when( brca_mutation == "Positive" ~ "High Risk", family_history == "Yes" & age < 50 ~ "High Risk", family_history == "Yes" | breast_density == "D" ~ "Moderate Risk", TRUE ~ "Average Risk" ) ) table(breast_cancer_data$risk_category)
# Analyze each risk group separately risk_groups <- split(breast_cancer_data, breast_cancer_data$risk_category) # High-risk group optimization high_risk_panel <- decisionpanel( data = risk_groups$`High Risk`, tests = c("mammography", "ultrasound", "mri"), testLevels = c("BIRADS 3-5", "Suspicious", "Suspicious"), gold = "cancer_status", goldPositive = "Cancer", strategies = "all", optimizationCriteria = "sensitivity", minSensitivity = 0.95 ) # Average-risk group optimization (cost-conscious) average_risk_panel <- decisionpanel( data = risk_groups$`Average Risk`, tests = c("clinical_exam", "mammography", "ultrasound"), testLevels = c("Abnormal", "BIRADS 3-5", "Suspicious"), gold = "cancer_status", goldPositive = "Cancer", strategies = "all", optimizationCriteria = "efficiency", useCosts = TRUE, testCosts = "20,100,150" )
# Create recommendation table recommendations <- data.frame( Risk_Category = c("High Risk", "Moderate Risk", "Average Risk"), Recommended_Tests = c( "Annual MRI + Mammography", "Annual Mammography + US if dense", "Biennial Mammography" ), Expected_Sensitivity = c("99%", "90%", "85%"), Expected_Specificity = c("80%", "92%", "95%"), Cost_per_Screen = c("$1,100", "$250", "$100"), NNS = c(50, 200, 500) # Number needed to screen ) kable(recommendations, caption = "Risk-Stratified Screening Recommendations")
# Analyze performance by age group age_groups <- breast_cancer_data %>% mutate(age_group = cut(age, breaks = c(40, 50, 60, 70, 80), labels = c("40-49", "50-59", "60-69", "70-79"))) # Calculate mammography performance by age age_performance <- age_groups %>% group_by(age_group) %>% summarise( n = n(), prevalence = mean(cancer_status == "Cancer"), mammography_sens = { tab <- table(mammography, cancer_status) if(nrow(tab) == 2 && ncol(tab) == 2) { tab[2,2] / sum(tab[,2]) } else NA }, mammography_spec = { tab <- table(mammography, cancer_status) if(nrow(tab) == 2 && ncol(tab) == 2) { tab[1,1] / sum(tab[,1]) } else NA } ) # Visualize ggplot(age_performance, aes(x = age_group)) + geom_bar(aes(y = mammography_sens * 100), stat = "identity", fill = "skyblue", alpha = 0.7) + geom_line(aes(y = prevalence * 1000, group = 1), color = "red", size = 2) + geom_point(aes(y = prevalence * 1000), color = "red", size = 3) + scale_y_continuous( name = "Mammography Sensitivity (%)", sec.axis = sec_axis(~./10, name = "Cancer Prevalence per 1000") ) + labs( title = "Mammography Performance by Age Group", x = "Age Group" ) + theme_minimal()
A TB program in a high-burden setting needs to optimize case finding with limited resources.
# TB test combinations for different settings tb_settings <- data.frame( Setting = c("Community", "Clinic", "Hospital", "Contact Tracing"), Prevalence = c(0.02, 0.20, 0.40, 0.10), Available_Tests = c( "Symptoms, CXR", "Symptoms, Smear, GeneXpert, CXR", "All tests", "Symptoms, GeneXpert" ), Budget_per_case = c(10, 30, 100, 50) ) kable(tb_settings, caption = "TB Testing Scenarios by Setting")
# Implement WHO-recommended algorithm apply_tb_algorithm <- function(data) { decisions <- character(nrow(data)) for (i in 1:nrow(data)) { if (data$symptoms[i] == "Yes" || data$contact_history[i] == "Yes") { # Symptomatic or contact: get GeneXpert if (!is.na(data$genexpert[i]) && data$genexpert[i] == "MTB detected") { decisions[i] <- "Start TB treatment" } else if (!is.na(data$chest_xray[i]) && data$chest_xray[i] == "Abnormal") { # CXR abnormal but GeneXpert negative if (!is.na(data$culture[i])) { decisions[i] <- ifelse(data$culture[i] == "Positive", "Start TB treatment", "Not TB, investigate other causes") } else { decisions[i] <- "Clinical decision needed" } } else { decisions[i] <- "TB unlikely" } } else { # Asymptomatic: screen with CXR if available if (!is.na(data$chest_xray[i]) && data$chest_xray[i] == "Abnormal") { decisions[i] <- "Needs further testing" } else { decisions[i] <- "No TB screening needed" } } } return(decisions) } # Apply to sample tb_sample <- tb_diagnosis_data[1:20,] tb_sample$decision <- apply_tb_algorithm(tb_sample) # Show results kable(tb_sample %>% select(patient_id, symptoms, genexpert, chest_xray, tb_status, decision) %>% head(10))
# Compare different TB screening strategies tb_strategies <- data.frame( Strategy = c( "Symptoms only", "Symptoms → Smear", "Symptoms → GeneXpert", "Symptoms → CXR → GeneXpert", "Universal GeneXpert", "Universal CXR → GeneXpert" ), Tests_per_case_found = c(100, 50, 20, 15, 10, 12), Cost_per_case_found = c(100, 200, 400, 350, 800, 600), Sensitivity = c(60, 70, 85, 92, 95, 93), Time_to_diagnosis = c(0, 3, 1, 2, 1, 1) ) # Create multi-dimensional comparison ggplot(tb_strategies, aes(x = Cost_per_case_found, y = Sensitivity)) + geom_point(aes(size = Tests_per_case_found, color = factor(Time_to_diagnosis)), alpha = 0.7) + geom_text(aes(label = Strategy), vjust = -1, size = 3) + scale_size_continuous(name = "Tests per case", range = c(3, 10)) + scale_color_discrete(name = "Days to diagnosis") + labs( title = "TB Screening Strategy Comparison", x = "Cost per TB case found ($)", y = "Sensitivity (%)" ) + theme_minimal()
Emergency department evaluation of chest pain requires rapid, accurate rule-out of myocardial infarction.
# Risk stratification using HEART score components mi_ruleout_data <- mi_ruleout_data %>% mutate( heart_score = (age > 65) * 1 + (chest_pain == "Typical") * 2 + (chest_pain == "Atypical") * 1 + (ecg == "Ischemic changes") * 2 + (prior_cad == "Yes") * 1 + (diabetes == "Yes" | smoking == "Yes") * 1, risk_category = cut(heart_score, breaks = c(-1, 3, 6, 10), labels = c("Low", "Moderate", "High")) ) # Show risk distribution risk_table <- table(mi_ruleout_data$risk_category, mi_ruleout_data$mi_status) kable(prop.table(risk_table, margin = 1) * 100, digits = 1, caption = "MI Prevalence by Risk Category (%)")
# Define protocols by urgency protocols <- list( rapid_rule_out = function(data) { # 0/1-hour protocol data$troponin_initial == "Normal" & data$ecg == "Normal" & data$heart_score <= 3 }, standard_rule_out = function(data) { # 0/3-hour protocol data$troponin_initial == "Normal" & data$troponin_3hr == "Normal" & data$ecg == "Normal" }, rule_in = function(data) { # Immediate rule-in data$troponin_initial == "Elevated" & data$ecg == "Ischemic changes" } ) # Apply protocols mi_ruleout_data <- mi_ruleout_data %>% mutate( rapid_rule_out = protocols$rapid_rule_out(.), standard_rule_out = protocols$standard_rule_out(.) & !rapid_rule_out, rule_in = protocols$rule_in(.), need_further_testing = !rapid_rule_out & !standard_rule_out & !rule_in ) # Summarize protocol performance protocol_performance <- mi_ruleout_data %>% summarise( rapid_rule_out_pct = mean(rapid_rule_out) * 100, rapid_rule_out_npv = sum(rapid_rule_out & mi_status == "No MI") / sum(rapid_rule_out) * 100, standard_rule_out_pct = mean(standard_rule_out) * 100, standard_rule_out_npv = sum(standard_rule_out & mi_status == "No MI") / sum(standard_rule_out) * 100, rule_in_pct = mean(rule_in) * 100, rule_in_ppv = sum(rule_in & mi_status == "MI") / sum(rule_in) * 100 ) kable(t(protocol_performance), digits = 1, caption = "Protocol Performance Metrics")
# Create patient flow diagram data flow_data <- mi_ruleout_data %>% mutate( disposition = case_when( rapid_rule_out ~ "Discharge (1 hour)", standard_rule_out ~ "Discharge (3 hours)", rule_in ~ "Admit CCU", TRUE ~ "Observation/Further testing" ) ) %>% group_by(disposition, mi_status) %>% summarise(n = n()) %>% mutate(pct = n / sum(n) * 100) # Create flow diagram ggplot(flow_data, aes(x = disposition, y = n, fill = mi_status)) + geom_bar(stat = "identity", position = "stack") + geom_text(aes(label = sprintf("%.0f%%", pct)), position = position_stack(vjust = 0.5)) + scale_fill_manual(values = c("No MI" = "lightgreen", "MI" = "salmon")) + labs( title = "Patient Disposition by Protocol", x = "Disposition", y = "Number of Patients", fill = "Final Diagnosis" ) + theme_minimal() + theme(axis.text.x = element_text(angle = 45, hjust = 1))
Thyroid nodules are common but cancer is rare. Optimize use of FNA, molecular testing, and surgery.
# Analyze by nodule size thyroid_by_size <- thyroid_nodule_data %>% mutate(size_category = cut(nodule_size, breaks = c(0, 10, 20, 40, 100), labels = c("<1cm", "1-2cm", "2-4cm", ">4cm"))) %>% group_by(size_category) %>% summarise( n = n(), cancer_rate = mean(cancer_status == "Malignant") * 100, fna_done = mean(!is.na(fna_cytology)) * 100, molecular_done = mean(!is.na(molecular_test)) * 100 ) kable(thyroid_by_size, digits = 1, caption = "Thyroid Nodule Characteristics by Size")
# Implement Bethesda-based algorithm thyroid_algorithm <- decisionpanel( data = thyroid_nodule_data, tests = c("ultrasound", "fna_cytology", "molecular_test"), testLevels = c("TI-RADS 4-5", "Suspicious/Malignant", "Suspicious"), gold = "cancer_status", goldPositive = "Malignant", strategies = "sequential", sequentialStop = "positive", createTree = TRUE, treeMethod = "cart", useCosts = TRUE, testCosts = "200,300,3000", # US, FNA, Molecular fpCost = 10000, # Unnecessary surgery fnCost = 50000 # Missed cancer )
# Simplified decision tree representation cat("Thyroid Nodule Evaluation Algorithm:\n") cat("1. Ultrasound Assessment\n") cat(" ├─ TI-RADS 1-2: No FNA needed\n") cat(" └─ TI-RADS 3-5: Proceed to FNA\n") cat(" ├─ Benign (Bethesda II): Follow-up\n") cat(" ├─ Indeterminate (Bethesda III-IV): Molecular testing\n") cat(" │ ├─ Benign profile: Follow-up\n") cat(" │ └─ Suspicious profile: Surgery\n") cat(" └─ Suspicious/Malignant (Bethesda V-VI): Surgery\n") # Create visual representation of outcomes outcomes <- data.frame( Test_Path = c("US only", "US+FNA", "US+FNA+Molecular", "Direct Surgery"), Patients_pct = c(40, 30, 20, 10), Cancers_found_pct = c(0, 20, 60, 20), Cost = c(200, 500, 3500, 200) ) ggplot(outcomes, aes(x = Test_Path, y = Patients_pct)) + geom_bar(stat = "identity", fill = "lightblue", alpha = 0.7) + geom_line(aes(y = Cancers_found_pct, group = 1), color = "red", size = 2) + geom_point(aes(y = Cancers_found_pct), color = "red", size = 3) + scale_y_continuous( name = "Percentage of Patients", sec.axis = sec_axis(~., name = "Percentage of Cancers Found") ) + labs( title = "Thyroid Nodule Evaluation Pathways", x = "Testing Path" ) + theme_minimal() + theme(axis.text.x = element_text(angle = 45, hjust = 1))
summary_recommendations <- data.frame( Scenario = c("Screening", "Diagnosis", "Emergency", "Surveillance"), Priority = c("High Sensitivity", "Balanced", "Speed + Accuracy", "Specificity"), Strategy = c("Parallel OR", "Sequential", "Rapid protocols", "Confirmatory"), Key_Metric = c("NPV", "Accuracy", "Time to decision", "PPV"), Example = c("Cancer screening", "TB diagnosis", "Chest pain", "Cancer follow-up") ) kable(summary_recommendations, caption = "Testing Strategy Recommendations by Clinical Scenario")
sessionInfo()
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.