In sbalci/ClinicoPathJamoviModule: Analysis for Clinicopathological Research

knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>",
  fig.width = 8,
  fig.height = 6,
  warning = FALSE,
  message = FALSE
)

Introduction

This vignette demonstrates real-world clinical applications of the Decision Panel Optimization module across different medical specialties and scenarios.

# Load required packages
library(meddecide)
library(dplyr)
library(ggplot2)
library(rpart)
library(rpart.plot)
library(knitr)
library(forcats)

# Test Datasets for Decision Panel Optimization Module

# Set seed for reproducibility
set.seed(42)

# Create data directory if it doesn't exist
if (!dir.exists("data")) {
  dir.create("data")
}

# ============================================================================
# DATASET 1: COVID-19 SCREENING
# ============================================================================

create_covid_data <- function(n = 1000, prevalence = 0.15) {
  # True disease status
  disease <- rbinom(n, 1, prevalence)

  # Rapid Antigen Test (RAT)
  # Sensitivity: 65%, Specificity: 98%
  rat_prob <- ifelse(disease == 1, 0.65, 0.02)
  rat_result <- rbinom(n, 1, rat_prob)

  # PCR Test
  # Sensitivity: 95%, Specificity: 99%
  pcr_prob <- ifelse(disease == 1, 0.95, 0.01)
  pcr_result <- rbinom(n, 1, pcr_prob)

  # Chest CT
  # Sensitivity: 90%, Specificity: 85%
  ct_prob <- ifelse(disease == 1, 0.90, 0.15)
  ct_result <- rbinom(n, 1, ct_prob)

  # Clinical symptoms score (0-10)
  # Higher in disease
  symptoms <- ifelse(disease == 1,
                     pmin(10, round(rnorm(sum(disease == 1), 7, 2))),
                     pmax(0, round(rnorm(sum(disease == 0), 3, 2))))

  # Create dataset
  covid_data <- data.frame(
    patient_id = 1:n,
    rapid_antigen = factor(rat_result, levels = c(0, 1), 
                          labels = c("Negative", "Positive")),
    pcr = factor(pcr_result, levels = c(0, 1), 
                labels = c("Negative", "Positive")),
    chest_ct = factor(ct_result, levels = c(0, 1), 
                     labels = c("Normal", "Abnormal")),
    symptom_score = symptoms,
    covid_status = factor(disease, levels = c(0, 1), 
                         labels = c("Negative", "Positive")),
    age = round(rnorm(n, 45, 15)),
    risk_group = factor(sample(c("Low", "Medium", "High"), n, 
                              replace = TRUE, prob = c(0.6, 0.3, 0.1)))
  )

  # Add some missing values realistically
  # PCR might be missing if rapid test is negative
  missing_pcr <- which(covid_data$rapid_antigen == "Negative" & 
                      runif(n) < 0.3)
  covid_data$pcr[missing_pcr] <- NA

  return(covid_data)
}

# Generate COVID dataset
covid_screening_data <- create_covid_data(n = 1000, prevalence = 0.15)

# ============================================================================
# DATASET 2: BREAST CANCER SCREENING
# ============================================================================

create_breast_cancer_data <- function(n = 2000, prevalence = 0.005) {
  # True disease status (low prevalence for screening)
  disease <- rbinom(n, 1, prevalence)

  # Clinical Breast Exam (CBE)
  # Sensitivity: 54%, Specificity: 94%
  cbe_prob <- ifelse(disease == 1, 0.54, 0.06)
  cbe_result <- rbinom(n, 1, cbe_prob)

  # Mammography
  # Sensitivity: 85%, Specificity: 95%
  mammo_prob <- ifelse(disease == 1, 0.85, 0.05)
  mammo_result <- rbinom(n, 1, mammo_prob)

  # Ultrasound
  # Sensitivity: 80%, Specificity: 90%
  us_prob <- ifelse(disease == 1, 0.80, 0.10)
  us_result <- rbinom(n, 1, us_prob)

  # MRI (for high-risk patients)
  # Sensitivity: 95%, Specificity: 85%
  mri_prob <- ifelse(disease == 1, 0.95, 0.15)
  mri_result <- rbinom(n, 1, mri_prob)

  # Create risk factors
  age <- round(rnorm(n, 55, 10))
  age[age < 40] <- 40
  age[age > 75] <- 75

  family_history <- rbinom(n, 1, 0.15)
  brca_status <- rbinom(n, 1, 0.02)

  # Create dataset
  breast_cancer_data <- data.frame(
    patient_id = 1:n,
    clinical_exam = factor(cbe_result, levels = c(0, 1), 
                          labels = c("Normal", "Abnormal")),
    mammography = factor(mammo_result, levels = c(0, 1), 
                        labels = c("BIRADS 1-2", "BIRADS 3-5")),
    ultrasound = factor(us_result, levels = c(0, 1), 
                       labels = c("Normal", "Suspicious")),
    mri = factor(mri_result, levels = c(0, 1), 
                labels = c("Normal", "Suspicious")),
    cancer_status = factor(disease, levels = c(0, 1), 
                          labels = c("No Cancer", "Cancer")),
    age = age,
    family_history = factor(family_history, levels = c(0, 1), 
                           labels = c("No", "Yes")),
    brca_mutation = factor(brca_status, levels = c(0, 1), 
                          labels = c("Negative", "Positive")),
    breast_density = factor(sample(c("A", "B", "C", "D"), n, 
                                  replace = TRUE, 
                                  prob = c(0.1, 0.4, 0.4, 0.1)))
  )

  # MRI typically only done for high-risk
  low_risk_idx <- which(breast_cancer_data$family_history == "No" & 
                       breast_cancer_data$brca_mutation == "Negative")
  breast_cancer_data$mri[sample(low_risk_idx, 
                               length(low_risk_idx) * 0.9)] <- NA

  return(breast_cancer_data)
}

# Generate breast cancer dataset
breast_cancer_data <- create_breast_cancer_data(n = 2000, prevalence = 0.005)

# ============================================================================
# DATASET 3: TUBERCULOSIS DIAGNOSIS
# ============================================================================

create_tb_data <- function(n = 1500, prevalence = 0.20) {
  # True disease status (high prevalence in TB clinic)
  disease <- rbinom(n, 1, prevalence)

  # Symptom screening (cough > 2 weeks, fever, weight loss, night sweats)
  # Sensitivity: 80%, Specificity: 60%
  symptom_prob <- ifelse(disease == 1, 0.80, 0.40)
  symptom_result <- rbinom(n, 1, symptom_prob)

  # Sputum smear microscopy
  # Sensitivity: 60%, Specificity: 98%
  smear_prob <- ifelse(disease == 1, 0.60, 0.02)
  smear_result <- rbinom(n, 1, smear_prob)

  # GeneXpert MTB/RIF
  # Sensitivity: 88%, Specificity: 98%
  genexpert_prob <- ifelse(disease == 1, 0.88, 0.02)
  genexpert_result <- rbinom(n, 1, genexpert_prob)

  # Culture (gold standard-ish)
  # Sensitivity: 95%, Specificity: 100%
  culture_prob <- ifelse(disease == 1, 0.95, 0.00)
  culture_result <- rbinom(n, 1, culture_prob)

  # Chest X-ray
  # Sensitivity: 85%, Specificity: 75%
  cxr_prob <- ifelse(disease == 1, 0.85, 0.25)
  cxr_result <- rbinom(n, 1, cxr_prob)

  # HIV status affects presentation
  hiv_status <- rbinom(n, 1, 0.25)

  # Create dataset
  tb_data <- data.frame(
    patient_id = 1:n,
    symptoms = factor(symptom_result, levels = c(0, 1), 
                     labels = c("No", "Yes")),
    sputum_smear = factor(smear_result, levels = c(0, 1), 
                         labels = c("Negative", "Positive")),
    genexpert = factor(genexpert_result, levels = c(0, 1), 
                      labels = c("MTB not detected", "MTB detected")),
    culture = factor(culture_result, levels = c(0, 1), 
                    labels = c("Negative", "Positive")),
    chest_xray = factor(cxr_result, levels = c(0, 1), 
                       labels = c("Normal", "Abnormal")),
    tb_status = factor(disease, levels = c(0, 1), 
                      labels = c("No TB", "TB")),
    hiv_status = factor(hiv_status, levels = c(0, 1), 
                       labels = c("Negative", "Positive")),
    age = round(rnorm(n, 35, 15)),
    contact_history = factor(rbinom(n, 1, 0.30), levels = c(0, 1), 
                           labels = c("No", "Yes"))
  )

  # Culture takes time, might not be done for all
  no_culture_idx <- which(tb_data$genexpert == "MTB not detected" & 
                         tb_data$symptoms == "No" & 
                         runif(n) < 0.4)
  tb_data$culture[no_culture_idx] <- NA

  return(tb_data)
}

# Generate TB dataset
tb_diagnosis_data <- create_tb_data(n = 1500, prevalence = 0.20)

# ============================================================================
# DATASET 4: MYOCARDIAL INFARCTION RULE-OUT (FIXED)
# ============================================================================

create_mi_data <- function(n = 800, prevalence = 0.10) {
  # True disease status
  disease <- rbinom(n, 1, prevalence)

  # ECG changes
  # Sensitivity: 55%, Specificity: 95%
  ecg_prob <- ifelse(disease == 1, 0.55, 0.05)
  ecg_result <- rbinom(n, 1, ecg_prob)

  # Initial troponin
  # Sensitivity: 85%, Specificity: 95%
  trop1_prob <- ifelse(disease == 1, 0.85, 0.05)
  trop1_result <- rbinom(n, 1, trop1_prob)

  # 3-hour troponin
  # Sensitivity: 98%, Specificity: 95%
  trop3_prob <- ifelse(disease == 1, 0.98, 0.05)
  trop3_result <- rbinom(n, 1, trop3_prob)

  # Make sure 3-hour is at least as positive as initial
  trop3_result <- pmax(trop1_result, trop3_result)

  # CT Angiography
  # Sensitivity: 95%, Specificity: 90%
  cta_prob <- ifelse(disease == 1, 0.95, 0.10)
  cta_result <- rbinom(n, 1, cta_prob)

  # Clinical risk score components
  age <- round(rnorm(n, 60, 15))
  age[age < 30] <- 30
  age[age > 90] <- 90

  # FIXED: Generate chest pain type for each individual based on their disease status
  chest_pain_type <- character(n)
  pain_types <- c("Typical", "Atypical", "Non-cardiac")

  for (i in 1:n) {
    if (disease[i] == 1) {
      # Disease present - more likely to have typical pain
      chest_pain_type[i] <- sample(pain_types, 1, prob = c(0.6, 0.3, 0.1))
    } else {
      # No disease - more likely to have non-cardiac pain
      chest_pain_type[i] <- sample(pain_types, 1, prob = c(0.1, 0.3, 0.6))
    }
  }

  # Create dataset
  mi_data <- data.frame(
    patient_id = 1:n,
    ecg = factor(ecg_result, levels = c(0, 1), 
                labels = c("Normal", "Ischemic changes")),
    troponin_initial = factor(trop1_result, levels = c(0, 1), 
                             labels = c("Normal", "Elevated")),
    troponin_3hr = factor(trop3_result, levels = c(0, 1), 
                         labels = c("Normal", "Elevated")),
    ct_angiography = factor(cta_result, levels = c(0, 1), 
                           labels = c("No stenosis", "Significant stenosis")),
    mi_status = factor(disease, levels = c(0, 1), 
                      labels = c("No MI", "MI")),
    age = age,
    chest_pain = factor(chest_pain_type),
    diabetes = factor(rbinom(n, 1, 0.25), levels = c(0, 1), 
                     labels = c("No", "Yes")),
    smoking = factor(rbinom(n, 1, 0.30), levels = c(0, 1), 
                    labels = c("No", "Yes")),
    prior_cad = factor(rbinom(n, 1, 0.20), levels = c(0, 1), 
                      labels = c("No", "Yes"))
  )

  # CTA typically only for intermediate risk
  low_risk_idx <- which(mi_data$chest_pain == "Non-cardiac" & 
                       mi_data$ecg == "Normal" & 
                       mi_data$troponin_initial == "Normal")
  if (length(low_risk_idx) > 0) {
    mi_data$ct_angiography[sample(low_risk_idx, 
                                 min(length(low_risk_idx), 
                                     floor(length(low_risk_idx) * 0.8)))] <- NA
  }

  return(mi_data)
}

# Generate MI dataset
mi_ruleout_data <- create_mi_data(n = 800, prevalence = 0.10)

# ============================================================================
# DATASET 5: THYROID NODULE EVALUATION
# ============================================================================

create_thyroid_data <- function(n = 600, prevalence = 0.05) {
  # True disease status (thyroid cancer)
  disease <- rbinom(n, 1, prevalence)

  # Ultrasound features (TI-RADS)
  # Sensitivity: 90%, Specificity: 70%
  us_prob <- ifelse(disease == 1, 0.90, 0.30)
  us_result <- rbinom(n, 1, us_prob)

  # Fine Needle Aspiration (FNA) cytology
  # Sensitivity: 95%, Specificity: 98%
  fna_prob <- ifelse(disease == 1, 0.95, 0.02)
  fna_result <- rbinom(n, 1, fna_prob)

  # Molecular testing (ThyroSeq/Afirma)
  # Sensitivity: 91%, Specificity: 85%
  molecular_prob <- ifelse(disease == 1, 0.91, 0.15)
  molecular_result <- rbinom(n, 1, molecular_prob)

  # Thyroglobulin levels
  # Sensitivity: 70%, Specificity: 80%
  tg_prob <- ifelse(disease == 1, 0.70, 0.20)
  tg_result <- rbinom(n, 1, tg_prob)

  # Create dataset
  thyroid_data <- data.frame(
    patient_id = 1:n,
    ultrasound = factor(us_result, levels = c(0, 1), 
                       labels = c("TI-RADS 1-3", "TI-RADS 4-5")),
    fna_cytology = factor(fna_result, levels = c(0, 1), 
                         labels = c("Benign/Indeterminate", "Suspicious/Malignant")),
    molecular_test = factor(molecular_result, levels = c(0, 1), 
                           labels = c("Benign", "Suspicious")),
    thyroglobulin = factor(tg_result, levels = c(0, 1), 
                          labels = c("Normal", "Elevated")),
    cancer_status = factor(disease, levels = c(0, 1), 
                          labels = c("Benign", "Malignant")),
    nodule_size = round(rlnorm(n, log(15), 0.5)),
    age = round(rnorm(n, 50, 15)),
    gender = factor(sample(c("Female", "Male"), n, 
                          replace = TRUE, prob = c(0.75, 0.25))),
    radiation_history = factor(rbinom(n, 1, 0.05), levels = c(0, 1), 
                             labels = c("No", "Yes"))
  )

  # Molecular testing only for indeterminate FNA
  molecular_not_done <- which(thyroid_data$fna_cytology != "Benign/Indeterminate" |
                             runif(n) < 0.5)
  thyroid_data$molecular_test[molecular_not_done] <- NA

  return(thyroid_data)
}

# Generate thyroid dataset
thyroid_nodule_data <- create_thyroid_data(n = 600, prevalence = 0.05)

# ============================================================================
# SAVE ALL DATASETS
# ============================================================================

# Save as RData file
save(covid_screening_data, 
     breast_cancer_data,
     tb_diagnosis_data,
     mi_ruleout_data,
     thyroid_nodule_data,
     file = "data/decision_panel_test_data.RData")

# Also save as CSV files for external use
write.csv(covid_screening_data, "data/covid_screening_data.csv", row.names = FALSE)
write.csv(breast_cancer_data, "data/breast_cancer_data.csv", row.names = FALSE)
write.csv(tb_diagnosis_data, "data/tb_diagnosis_data.csv", row.names = FALSE)
write.csv(mi_ruleout_data, "data/mi_ruleout_data.csv", row.names = FALSE)
write.csv(thyroid_nodule_data, "data/thyroid_nodule_data.csv", row.names = FALSE)

# Print confirmation
cat("✓ All datasets generated and saved successfully!\n")

# ============================================================================
# DATASET SUMMARIES FOR VERIFICATION
# ============================================================================

summarize_dataset <- function(data, disease_col, test_cols) {
  cat("\nDataset Summary:\n")
  cat("Total observations:", nrow(data), "\n")

  # Check if disease column exists and has the expected levels
  if (disease_col %in% names(data)) {
    disease_levels <- levels(factor(data[[disease_col]]))
    if (length(disease_levels) >= 2) {
      prevalence <- mean(data[[disease_col]] == disease_levels[2], na.rm = TRUE)
      cat("Disease prevalence:", round(prevalence * 100, 1), "%\n")
    } else {
      cat("Disease column found but insufficient levels\n")
    }
  } else {
    cat("Disease column not found\n")
  }

  cat("\nTest performance:\n")

  for (test in test_cols) {
    if (test %in% names(data) && disease_col %in% names(data)) {
      # Remove rows with NAs in either variable
      complete_data <- data[!is.na(data[[test]]) & !is.na(data[[disease_col]]), ]

      if (nrow(complete_data) > 0) {
        tab <- table(complete_data[[test]], complete_data[[disease_col]])

        if (nrow(tab) >= 2 && ncol(tab) >= 2) {
          # Assume positive is the second level (index 2)
          if (nrow(tab) == 2 && ncol(tab) == 2) {
            sens <- tab[2,2] / sum(tab[,2])
            spec <- tab[1,1] / sum(tab[,1])
            cat(sprintf("  %s: Sens=%.1f%%, Spec=%.1f%% (n=%d)\n", 
                        test, sens*100, spec*100, nrow(complete_data)))
          } else {
            cat(sprintf("  %s: Complex table structure (n=%d)\n", 
                        test, nrow(complete_data)))
          }
        } else {
          cat(sprintf("  %s: Insufficient data for analysis (n=%d)\n", 
                      test, nrow(complete_data)))
        }
      } else {
        cat(sprintf("  %s: No complete cases\n", test))
      }
    } else {
      cat(sprintf("  %s: Column not found\n", test))
    }
  }
}

# Print summaries for verification
cat("\n", paste(rep("=", 60), collapse = ""), "\n")
cat("DATASET SUMMARIES")
cat("\n", paste(rep("=", 60), collapse = ""), "\n")

cat("\n=== COVID-19 SCREENING DATA ===")
summarize_dataset(covid_screening_data, "covid_status", 
                 c("rapid_antigen", "pcr", "chest_ct"))

cat("\n=== BREAST CANCER SCREENING DATA ===")
summarize_dataset(breast_cancer_data, "cancer_status",
                 c("clinical_exam", "mammography", "ultrasound", "mri"))

cat("\n=== TUBERCULOSIS DIAGNOSIS DATA ===")
summarize_dataset(tb_diagnosis_data, "tb_status",
                 c("symptoms", "sputum_smear", "genexpert", "culture", "chest_xray"))

cat("\n=== MYOCARDIAL INFARCTION DATA ===")
summarize_dataset(mi_ruleout_data, "mi_status",
                 c("ecg", "troponin_initial", "troponin_3hr", "ct_angiography"))

cat("\n=== THYROID NODULE DATA ===")
summarize_dataset(thyroid_nodule_data, "cancer_status",
                 c("ultrasound", "fna_cytology", "molecular_test", "thyroglobulin"))

cat("\n", paste(rep("=", 60), collapse = ""), "\n")

Scenario 1: Emergency Department COVID-19 Screening

Clinical Context

An emergency department needs to rapidly screen patients for COVID-19 while managing limited resources and preventing nosocomial transmission.

# Examine test characteristics
covid_tests <- covid_screening_data %>%
  select(rapid_antigen, pcr, chest_ct, covid_status) %>%
  na.omit()

# Calculate individual test performance
test_performance <- function(test, truth) {
  tab <- table(test, truth)
  sens <- tab[2,2] / sum(tab[,2])
  spec <- tab[1,1] / sum(tab[,1])
  ppv <- tab[2,2] / sum(tab[2,])
  npv <- tab[1,1] / sum(tab[1,])

  return(data.frame(
    Sensitivity = sens,
    Specificity = spec,
    PPV = ppv,
    NPV = npv
  ))
}

# Individual test performance
rapid_perf <- test_performance(covid_tests$rapid_antigen, covid_tests$covid_status)
pcr_perf <- test_performance(covid_tests$pcr, covid_tests$covid_status)
ct_perf <- test_performance(covid_tests$chest_ct, covid_tests$covid_status)

performance_table <- rbind(
  `Rapid Antigen` = rapid_perf,
  `PCR` = pcr_perf,
  `Chest CT` = ct_perf
)

kable(round(performance_table * 100, 1), 
      caption = "Individual Test Performance (%)")

Optimization Analysis

# Run optimization for different scenarios
# 1. Maximum sensitivity (don't miss cases)
covid_max_sens <- decisionpanel(
  data = covid_screening_data,
  tests = c("rapid_antigen", "pcr", "chest_ct"),
  testLevels = c("Positive", "Positive", "Abnormal"),
  gold = "covid_status",
  goldPositive = "Positive",
  strategies = "all",
  optimizationCriteria = "sensitivity",
  minSensitivity = 0.95
)

# 2. Cost-effective screening
covid_cost_effective <- decisionpanel(
  data = covid_screening_data,
  tests = c("rapid_antigen", "pcr", "chest_ct"),
  testLevels = c("Positive", "Positive", "Abnormal"),
  gold = "covid_status",
  goldPositive = "Positive",
  strategies = "all",
  optimizationCriteria = "efficiency",
  useCosts = TRUE,
  testCosts = "5,50,200",
  fpCost = 500,
  fnCost = 5000
)

Clinical Decision Algorithm

Based on the analysis, here's a practical algorithm:

# Implement sequential testing algorithm
apply_covid_algorithm <- function(data) {
  n <- nrow(data)
  decisions <- character(n)
  tests_used <- character(n)

  for (i in 1:n) {
    # Step 1: Rapid antigen test
    if (data$rapid_antigen[i] == "Positive") {
      decisions[i] <- "Isolate, Confirm with PCR"
      tests_used[i] <- "Rapid"
    } else if (data$symptom_score[i] >= 6) {
      # Step 2: High symptom score → CT
      if (!is.na(data$chest_ct[i]) && data$chest_ct[i] == "Abnormal") {
        decisions[i] <- "Probable COVID, Isolate, PCR"
        tests_used[i] <- "Rapid + CT"
      } else {
        decisions[i] <- "Low probability, Standard care"
        tests_used[i] <- "Rapid + CT"
      }
    } else {
      decisions[i] <- "Low probability, Standard care"
      tests_used[i] <- "Rapid"
    }
  }

  return(data.frame(Decision = decisions, Tests = tests_used))
}

# Apply algorithm
algorithm_results <- apply_covid_algorithm(covid_screening_data[1:10,])
kable(cbind(covid_screening_data[1:10, c("patient_id", "rapid_antigen", 
                                         "symptom_score", "covid_status")],
            algorithm_results))

Cost-Effectiveness Visualization

# Simulate different strategies
strategies <- expand.grid(
  use_rapid = c(TRUE, FALSE),
  use_pcr = c(TRUE, FALSE),
  use_ct = c(TRUE, FALSE)
) %>%
  filter(use_rapid | use_pcr | use_ct) # At least one test

# Calculate performance for each strategy
strategy_performance <- strategies %>%
  rowwise() %>%
  mutate(
    tests = paste(c(
      if(use_rapid) "RAT" else NULL,
      if(use_pcr) "PCR" else NULL,
      if(use_ct) "CT" else NULL
    ), collapse = "+"),
    cost = sum(c(
      if(use_rapid) 5 else 0,
      if(use_pcr) 50 else 0,
      if(use_ct) 200 else 0
    )),
    # Simulated performance (would come from actual analysis)
    sensitivity = case_when(
      use_rapid & use_pcr & use_ct ~ 0.99,
      use_pcr & use_ct ~ 0.98,
      use_rapid & use_pcr ~ 0.97,
      use_pcr ~ 0.95,
      use_rapid & use_ct ~ 0.94,
      use_ct ~ 0.90,
      use_rapid ~ 0.65
    ),
    specificity = case_when(
      use_rapid & use_pcr & use_ct ~ 0.83,
      use_pcr & use_ct ~ 0.84,
      use_rapid & use_pcr ~ 0.97,
      use_pcr ~ 0.99,
      use_rapid & use_ct ~ 0.83,
      use_ct ~ 0.85,
      use_rapid ~ 0.98
    )
  )

# Create cost-effectiveness plot
ggplot(strategy_performance, aes(x = cost, y = sensitivity * 100)) +
  geom_point(aes(size = specificity * 100), alpha = 0.6) +
  geom_text(aes(label = tests), vjust = -1, size = 3) +
  geom_line(data = strategy_performance %>% 
              arrange(cost) %>%
              filter(sensitivity == cummax(sensitivity)),
            color = "red", alpha = 0.5) +
  scale_size_continuous(name = "Specificity (%)", range = c(3, 10)) +
  labs(
    title = "Cost-Effectiveness of COVID-19 Testing Strategies",
    x = "Cost per Patient ($)",
    y = "Sensitivity (%)",
    caption = "Red line shows cost-effectiveness frontier"
  ) +
  theme_minimal()

Scenario 2: Breast Cancer Screening Program

Clinical Context

A population-based breast cancer screening program needs to optimize use of mammography, ultrasound, and MRI based on risk factors.

# Examine population characteristics
breast_summary <- breast_cancer_data %>%
  summarise(
    n = n(),
    prevalence = mean(cancer_status == "Cancer"),
    mean_age = mean(age),
    pct_family_history = mean(family_history == "Yes") * 100,
    pct_brca = mean(brca_mutation == "Positive") * 100,
    pct_dense_breast = mean(breast_density %in% c("C", "D")) * 100
  )

kable(breast_summary, digits = 2,
      caption = "Population Characteristics")

# Risk stratification
breast_cancer_data <- breast_cancer_data %>%
  mutate(
    risk_category = case_when(
      brca_mutation == "Positive" ~ "High Risk",
      family_history == "Yes" & age < 50 ~ "High Risk",
      family_history == "Yes" | breast_density == "D" ~ "Moderate Risk",
      TRUE ~ "Average Risk"
    )
  )

table(breast_cancer_data$risk_category)

Risk-Stratified Analysis

# Analyze each risk group separately
risk_groups <- split(breast_cancer_data, breast_cancer_data$risk_category)

# High-risk group optimization
high_risk_panel <- decisionpanel(
  data = risk_groups$`High Risk`,
  tests = c("mammography", "ultrasound", "mri"),
  testLevels = c("BIRADS 3-5", "Suspicious", "Suspicious"),
  gold = "cancer_status",
  goldPositive = "Cancer",
  strategies = "all",
  optimizationCriteria = "sensitivity",
  minSensitivity = 0.95
)

# Average-risk group optimization (cost-conscious)
average_risk_panel <- decisionpanel(
  data = risk_groups$`Average Risk`,
  tests = c("clinical_exam", "mammography", "ultrasound"),
  testLevels = c("Abnormal", "BIRADS 3-5", "Suspicious"),
  gold = "cancer_status",
  goldPositive = "Cancer",
  strategies = "all",
  optimizationCriteria = "efficiency",
  useCosts = TRUE,
  testCosts = "20,100,150"
)

Screening Recommendations by Risk

# Create recommendation table
recommendations <- data.frame(
  Risk_Category = c("High Risk", "Moderate Risk", "Average Risk"),
  Recommended_Tests = c(
    "Annual MRI + Mammography",
    "Annual Mammography + US if dense",
    "Biennial Mammography"
  ),
  Expected_Sensitivity = c("99%", "90%", "85%"),
  Expected_Specificity = c("80%", "92%", "95%"),
  Cost_per_Screen = c("$1,100", "$250", "$100"),
  NNS = c(50, 200, 500)  # Number needed to screen
)

kable(recommendations, 
      caption = "Risk-Stratified Screening Recommendations")

Age-Specific Performance

# Analyze performance by age group
age_groups <- breast_cancer_data %>%
  mutate(age_group = cut(age, breaks = c(40, 50, 60, 70, 80),
                         labels = c("40-49", "50-59", "60-69", "70-79")))

# Calculate mammography performance by age
age_performance <- age_groups %>%
  group_by(age_group) %>%
  summarise(
    n = n(),
    prevalence = mean(cancer_status == "Cancer"),
    mammography_sens = {
      tab <- table(mammography, cancer_status)
      if(nrow(tab) == 2 && ncol(tab) == 2) {
        tab[2,2] / sum(tab[,2])
      } else NA
    },
    mammography_spec = {
      tab <- table(mammography, cancer_status)
      if(nrow(tab) == 2 && ncol(tab) == 2) {
        tab[1,1] / sum(tab[,1])
      } else NA
    }
  )

# Visualize
ggplot(age_performance, aes(x = age_group)) +
  geom_bar(aes(y = mammography_sens * 100), stat = "identity", 
           fill = "skyblue", alpha = 0.7) +
  geom_line(aes(y = prevalence * 1000, group = 1), color = "red", size = 2) +
  geom_point(aes(y = prevalence * 1000), color = "red", size = 3) +
  scale_y_continuous(
    name = "Mammography Sensitivity (%)",
    sec.axis = sec_axis(~./10, name = "Cancer Prevalence per 1000")
  ) +
  labs(
    title = "Mammography Performance by Age Group",
    x = "Age Group"
  ) +
  theme_minimal()

Scenario 3: Tuberculosis Case Finding

Clinical Context

A TB program in a high-burden setting needs to optimize case finding with limited resources.

# TB test combinations for different settings
tb_settings <- data.frame(
  Setting = c("Community", "Clinic", "Hospital", "Contact Tracing"),
  Prevalence = c(0.02, 0.20, 0.40, 0.10),
  Available_Tests = c(
    "Symptoms, CXR",
    "Symptoms, Smear, GeneXpert, CXR",
    "All tests",
    "Symptoms, GeneXpert"
  ),
  Budget_per_case = c(10, 30, 100, 50)
)

kable(tb_settings, caption = "TB Testing Scenarios by Setting")

Sequential Testing Algorithm

# Implement WHO-recommended algorithm
apply_tb_algorithm <- function(data) {
  decisions <- character(nrow(data))

  for (i in 1:nrow(data)) {
    if (data$symptoms[i] == "Yes" || data$contact_history[i] == "Yes") {
      # Symptomatic or contact: get GeneXpert
      if (!is.na(data$genexpert[i]) && data$genexpert[i] == "MTB detected") {
        decisions[i] <- "Start TB treatment"
      } else if (!is.na(data$chest_xray[i]) && data$chest_xray[i] == "Abnormal") {
        # CXR abnormal but GeneXpert negative
        if (!is.na(data$culture[i])) {
          decisions[i] <- ifelse(data$culture[i] == "Positive",
                                "Start TB treatment",
                                "Not TB, investigate other causes")
        } else {
          decisions[i] <- "Clinical decision needed"
        }
      } else {
        decisions[i] <- "TB unlikely"
      }
    } else {
      # Asymptomatic: screen with CXR if available
      if (!is.na(data$chest_xray[i]) && data$chest_xray[i] == "Abnormal") {
        decisions[i] <- "Needs further testing"
      } else {
        decisions[i] <- "No TB screening needed"
      }
    }
  }

  return(decisions)
}

# Apply to sample
tb_sample <- tb_diagnosis_data[1:20,]
tb_sample$decision <- apply_tb_algorithm(tb_sample)

# Show results
kable(tb_sample %>% 
      select(patient_id, symptoms, genexpert, chest_xray, tb_status, decision) %>%
      head(10))

Cost-Effectiveness by Strategy

# Compare different TB screening strategies
tb_strategies <- data.frame(
  Strategy = c(
    "Symptoms only",
    "Symptoms → Smear",
    "Symptoms → GeneXpert", 
    "Symptoms → CXR → GeneXpert",
    "Universal GeneXpert",
    "Universal CXR → GeneXpert"
  ),
  Tests_per_case_found = c(100, 50, 20, 15, 10, 12),
  Cost_per_case_found = c(100, 200, 400, 350, 800, 600),
  Sensitivity = c(60, 70, 85, 92, 95, 93),
  Time_to_diagnosis = c(0, 3, 1, 2, 1, 1)
)

# Create multi-dimensional comparison
ggplot(tb_strategies, aes(x = Cost_per_case_found, y = Sensitivity)) +
  geom_point(aes(size = Tests_per_case_found, 
                 color = factor(Time_to_diagnosis)), alpha = 0.7) +
  geom_text(aes(label = Strategy), vjust = -1, size = 3) +
  scale_size_continuous(name = "Tests per case", range = c(3, 10)) +
  scale_color_discrete(name = "Days to diagnosis") +
  labs(
    title = "TB Screening Strategy Comparison",
    x = "Cost per TB case found ($)",
    y = "Sensitivity (%)"
  ) +
  theme_minimal()

Scenario 4: Chest Pain Evaluation

Clinical Context

Emergency department evaluation of chest pain requires rapid, accurate rule-out of myocardial infarction.

# Risk stratification using HEART score components
mi_ruleout_data <- mi_ruleout_data %>%
  mutate(
    heart_score = (age > 65) * 1 +
                  (chest_pain == "Typical") * 2 +
                  (chest_pain == "Atypical") * 1 +
                  (ecg == "Ischemic changes") * 2 +
                  (prior_cad == "Yes") * 1 +
                  (diabetes == "Yes" | smoking == "Yes") * 1,
    risk_category = cut(heart_score, 
                       breaks = c(-1, 3, 6, 10),
                       labels = c("Low", "Moderate", "High"))
  )

# Show risk distribution
risk_table <- table(mi_ruleout_data$risk_category, mi_ruleout_data$mi_status)
kable(prop.table(risk_table, margin = 1) * 100,
      digits = 1,
      caption = "MI Prevalence by Risk Category (%)")

Time-Sensitive Protocols

# Define protocols by urgency
protocols <- list(
  rapid_rule_out = function(data) {
    # 0/1-hour protocol
    data$troponin_initial == "Normal" & 
    data$ecg == "Normal" & 
    data$heart_score <= 3
  },

  standard_rule_out = function(data) {
    # 0/3-hour protocol
    data$troponin_initial == "Normal" & 
    data$troponin_3hr == "Normal" &
    data$ecg == "Normal"
  },

  rule_in = function(data) {
    # Immediate rule-in
    data$troponin_initial == "Elevated" & 
    data$ecg == "Ischemic changes"
  }
)

# Apply protocols
mi_ruleout_data <- mi_ruleout_data %>%
  mutate(
    rapid_rule_out = protocols$rapid_rule_out(.),
    standard_rule_out = protocols$standard_rule_out(.) & !rapid_rule_out,
    rule_in = protocols$rule_in(.),
    need_further_testing = !rapid_rule_out & !standard_rule_out & !rule_in
  )

# Summarize protocol performance
protocol_performance <- mi_ruleout_data %>%
  summarise(
    rapid_rule_out_pct = mean(rapid_rule_out) * 100,
    rapid_rule_out_npv = sum(rapid_rule_out & mi_status == "No MI") / 
                         sum(rapid_rule_out) * 100,
    standard_rule_out_pct = mean(standard_rule_out) * 100,
    standard_rule_out_npv = sum(standard_rule_out & mi_status == "No MI") / 
                           sum(standard_rule_out) * 100,
    rule_in_pct = mean(rule_in) * 100,
    rule_in_ppv = sum(rule_in & mi_status == "MI") / sum(rule_in) * 100
  )

kable(t(protocol_performance), digits = 1,
      caption = "Protocol Performance Metrics")

Visualization of Patient Flow

# Create patient flow diagram data
flow_data <- mi_ruleout_data %>%
  mutate(
    disposition = case_when(
      rapid_rule_out ~ "Discharge (1 hour)",
      standard_rule_out ~ "Discharge (3 hours)",
      rule_in ~ "Admit CCU",
      TRUE ~ "Observation/Further testing"
    )
  ) %>%
  group_by(disposition, mi_status) %>%
  summarise(n = n()) %>%
  mutate(pct = n / sum(n) * 100)

# Create flow diagram
ggplot(flow_data, aes(x = disposition, y = n, fill = mi_status)) +
  geom_bar(stat = "identity", position = "stack") +
  geom_text(aes(label = sprintf("%.0f%%", pct)), 
            position = position_stack(vjust = 0.5)) +
  scale_fill_manual(values = c("No MI" = "lightgreen", "MI" = "salmon")) +
  labs(
    title = "Patient Disposition by Protocol",
    x = "Disposition",
    y = "Number of Patients",
    fill = "Final Diagnosis"
  ) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

Scenario 5: Thyroid Nodule Evaluation

Clinical Context

Thyroid nodules are common but cancer is rare. Optimize use of FNA, molecular testing, and surgery.

# Analyze by nodule size
thyroid_by_size <- thyroid_nodule_data %>%
  mutate(size_category = cut(nodule_size, 
                             breaks = c(0, 10, 20, 40, 100),
                             labels = c("<1cm", "1-2cm", "2-4cm", ">4cm"))) %>%
  group_by(size_category) %>%
  summarise(
    n = n(),
    cancer_rate = mean(cancer_status == "Malignant") * 100,
    fna_done = mean(!is.na(fna_cytology)) * 100,
    molecular_done = mean(!is.na(molecular_test)) * 100
  )

kable(thyroid_by_size, digits = 1,
      caption = "Thyroid Nodule Characteristics by Size")

Diagnostic Algorithm Implementation

# Implement Bethesda-based algorithm
thyroid_algorithm <- decisionpanel(
  data = thyroid_nodule_data,
  tests = c("ultrasound", "fna_cytology", "molecular_test"),
  testLevels = c("TI-RADS 4-5", "Suspicious/Malignant", "Suspicious"),
  gold = "cancer_status",
  goldPositive = "Malignant",
  strategies = "sequential",
  sequentialStop = "positive",
  createTree = TRUE,
  treeMethod = "cart",
  useCosts = TRUE,
  testCosts = "200,300,3000",  # US, FNA, Molecular
  fpCost = 10000,  # Unnecessary surgery
  fnCost = 50000   # Missed cancer
)

Decision Tree Visualization

# Simplified decision tree representation
cat("Thyroid Nodule Evaluation Algorithm:\n")
cat("1. Ultrasound Assessment\n")
cat("   ├─ TI-RADS 1-2: No FNA needed\n")
cat("   └─ TI-RADS 3-5: Proceed to FNA\n")
cat("      ├─ Benign (Bethesda II): Follow-up\n")
cat("      ├─ Indeterminate (Bethesda III-IV): Molecular testing\n")
cat("      │  ├─ Benign profile: Follow-up\n")
cat("      │  └─ Suspicious profile: Surgery\n")
cat("      └─ Suspicious/Malignant (Bethesda V-VI): Surgery\n")

# Create visual representation of outcomes
outcomes <- data.frame(
  Test_Path = c("US only", "US+FNA", "US+FNA+Molecular", "Direct Surgery"),
  Patients_pct = c(40, 30, 20, 10),
  Cancers_found_pct = c(0, 20, 60, 20),
  Cost = c(200, 500, 3500, 200)
)

ggplot(outcomes, aes(x = Test_Path, y = Patients_pct)) +
  geom_bar(stat = "identity", fill = "lightblue", alpha = 0.7) +
  geom_line(aes(y = Cancers_found_pct, group = 1), color = "red", size = 2) +
  geom_point(aes(y = Cancers_found_pct), color = "red", size = 3) +
  scale_y_continuous(
    name = "Percentage of Patients",
    sec.axis = sec_axis(~., name = "Percentage of Cancers Found")
  ) +
  labs(
    title = "Thyroid Nodule Evaluation Pathways",
    x = "Testing Path"
  ) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

Summary and Best Practices

Key Learnings Across Scenarios

Context Matters: Optimal strategies differ between screening and diagnosis
Sequential Testing: Often more efficient than parallel testing
Risk Stratification: Improves efficiency and outcomes
Cost Considerations: Must balance performance with resources
Implementation: Clear algorithms improve adoption

General Recommendations

summary_recommendations <- data.frame(
  Scenario = c("Screening", "Diagnosis", "Emergency", "Surveillance"),
  Priority = c("High Sensitivity", "Balanced", "Speed + Accuracy", "Specificity"),
  Strategy = c("Parallel OR", "Sequential", "Rapid protocols", "Confirmatory"),
  Key_Metric = c("NPV", "Accuracy", "Time to decision", "PPV"),
  Example = c("Cancer screening", "TB diagnosis", "Chest pain", "Cancer follow-up")
)

kable(summary_recommendations,
      caption = "Testing Strategy Recommendations by Clinical Scenario")

Future Directions

Machine Learning Integration: Combine multiple variables beyond just test results
Dynamic Protocols: Adapt based on local prevalence and resources
Real-time Optimization: Update algorithms based on performance data
Patient Preferences: Include patient values in decision-making

Session Information

sessionInfo()

sbalci/ClinicoPathJamoviModule documentation built on June 13, 2025, 9:34 a.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

sbalci/ClinicoPathJamoviModule
Analysis for Clinicopathological Research

In sbalci/ClinicoPathJamoviModule: Analysis for Clinicopathological Research

Introduction

Scenario 1: Emergency Department COVID-19 Screening

Clinical Context

Optimization Analysis

Clinical Decision Algorithm

Cost-Effectiveness Visualization

Scenario 2: Breast Cancer Screening Program

Clinical Context

Risk-Stratified Analysis

Screening Recommendations by Risk

Age-Specific Performance

Scenario 3: Tuberculosis Case Finding

Clinical Context

Sequential Testing Algorithm

Cost-Effectiveness by Strategy

Scenario 4: Chest Pain Evaluation

Clinical Context

Time-Sensitive Protocols

Visualization of Patient Flow

Scenario 5: Thyroid Nodule Evaluation

Clinical Context

Diagnostic Algorithm Implementation

Decision Tree Visualization

Summary and Best Practices

Key Learnings Across Scenarios

General Recommendations

Future Directions

Session Information

R Package Documentation

Browse R Packages

We want your feedback!

sbalci/ClinicoPathJamoviModule Analysis for Clinicopathological Research

In sbalci/ClinicoPathJamoviModule: Analysis for Clinicopathological Research

Introduction

Scenario 1: Emergency Department COVID-19 Screening

Clinical Context

Optimization Analysis

Clinical Decision Algorithm

Cost-Effectiveness Visualization

Scenario 2: Breast Cancer Screening Program

Clinical Context

Risk-Stratified Analysis

Screening Recommendations by Risk

Age-Specific Performance

Scenario 3: Tuberculosis Case Finding

Clinical Context

Sequential Testing Algorithm

Cost-Effectiveness by Strategy

Scenario 4: Chest Pain Evaluation

Clinical Context

Time-Sensitive Protocols

Visualization of Patient Flow

Scenario 5: Thyroid Nodule Evaluation

Clinical Context

Diagnostic Algorithm Implementation

Decision Tree Visualization

Summary and Best Practices

Key Learnings Across Scenarios

General Recommendations

Future Directions

Session Information

R Package Documentation

Browse R Packages

We want your feedback!

sbalci/ClinicoPathJamoviModule
Analysis for Clinicopathological Research