Nothing
report_mode <- 1
# If 1, we are generating a report!
petoc <- function() {
if (report_mode == 0) {
message("Press [Enter] to continue")
r <- readline()
if (r == "q") {
terminate_session()
stop("User asked for termination.\n")
}
}
}
#' Basic tests of model functionality. Serious issues if the test does not pass.
#' @return tests results
#' @export
sanity_check <- function() {
init_session()
message("test 1: zero all costs\n")
input <- model_input$values
for (el in get_list_elements(input$cost)) input$cost[[el]] <- input$cost[[el]] * 0
input$medication$medication_costs <- 0 * input$medication$medication_costs
res <- run(100, input = input)
if (get_output()$total_cost != 0)
message("Test failed!") else message("Test passed!")
terminate_session()
message("test 2: zero all utilities\n")
init_session()
input <- model_input$values
for (el in get_list_elements(input$utility)) input$utility[[el]] <- input$utility[[el]] * 0
input$medication$medication_utility <- input$medication$medication_utility*0
input$global_parameters$discount_qaly <- input$global_parameters$discount_qaly*0
res <- run(100, input = input)
if (get_output()$total_qaly != 0) {
message("Test failed!")
message(get_output()$total_qaly)} else message("Test passed!")
terminate_session()
message("test 3: set all utilities to 1 and get one QALY without discount\n")
init_session()
input <- model_input$values
input$global_parameters$discount_qaly <- 0
input$medication$medication_utility <- input$medication$medication_utility*0
for (el in get_list_elements(input$utility)) input$utility[[el]] <- input$utility[[el]] * 0 + 1
input$utility$exac_dutil = input$utility$exac_dutil * 0
res <- run(100, input = input)
if (get_output()$total_qaly/get_output()$cumul_time != 1)
message("Test failed!") else message("Test passed!")
terminate_session()
message("test 4: zero mortality (both bg and exac)\n")
init_session()
input <- model_input$values
input$exacerbation$logit_p_death_by_sex <- input$exacerbation$logit_p_death_by_sex * 0 - 10000000 # log scale
input$agent$p_bgd_by_sex <- input$agent$p_bgd_by_sex * 0
input$manual$explicit_mortality_by_age_sex <- input$manual$explicit_mortality_by_age_sex * 0
res <- run(100, input = input)
if (get_output()$n_deaths != 0) {
message (get_output()$n_deaths)
stop("Test failed!")
} else message("Test passed!")
terminate_session()
return(0)
}
#' Returns simulated vs. predicted population table and a plot
#' @param incidence_k a number (default=1) by which the incidence rate of population will be multiplied.
#' @param remove_COPD 0 or 1, indicating whether COPD-caused mortality should be removed
#' @param savePlots 0 or 1, exports 300 DPI population growth and pyramid plots comparing simulated vs. predicted population
#' @param jurisdiction character string specifying the jurisdiction for validation ("canada" or "us"). Default is "canada".
#' @return For Canada: returns a table showing predicted (StatsCan) and simulated population values. For US: returns a data frame with population comparisons by age group and year.
#' @export
validate_population <- function(remove_COPD = 0, incidence_k = 1, savePlots = 0, jurisdiction = "canada") {
message("Validate_population(...) is responsible for producing output that can be used to test if the population module is properly calibrated.\n")
# Handle US jurisdiction
if (tolower(jurisdiction) == "us") {
USSimulation <- readr::read_csv(system.file("USCensus.csv", package = "epicR"))
# Setup and run simulation
settings <- get_default_settings()
settings$record_mode <- 0
settings$n_base_agents <- 1e+05 # Reduced from 1e6 for testing speed
time_horizon <- 56
results <- simulate(settings = settings, jurisdiction = "us", time_horizon = time_horizon, extended_results = TRUE)
output <- results$extended
# Create data frame
epic_popsize_age <- data.frame(year = seq(2015, by = 1, length.out = time_horizon),
output$n_alive_by_ctime_age)
# Rename columns
# Logic from validation.R: Columns 2 to N are renamed 1 to N-1
colnames(epic_popsize_age)[2:ncol(epic_popsize_age)] <- 1:(ncol(epic_popsize_age) - 1)
# Remove columns 2 to 40 (ages < 40)
epic_popsize_age <- epic_popsize_age[, -(2:40)]
# Reshape (Long format)
epic_popsize_age_long <- tidyr::pivot_longer(epic_popsize_age,
cols = -1,
names_to = "age",
values_to = "EPIC_popsize")
# Convert age to integer
epic_popsize_age_long$age <- as.integer(epic_popsize_age_long$age)
# Join/Merge Data
colnames(USSimulation)[colnames(USSimulation) == "value"] <- "US_popsize"
validate_pop_size_scaled <- merge(USSimulation, epic_popsize_age_long,
by = c("year", "age"),
all.x = TRUE)
# Initialize Scaled Column
validate_pop_size_scaled$EPIC_output_scaled <- NA
validate_pop_size_scaled$EPIC_output_scaled[validate_pop_size_scaled$year == 2015] <-
validate_pop_size_scaled$US_popsize[validate_pop_size_scaled$year == 2015]
# Calculate Growth Rates
total_epic_by_year <- aggregate(
x = validate_pop_size_scaled["EPIC_popsize"],
by = validate_pop_size_scaled["year"],
FUN = sum,
na.rm = TRUE
)
colnames(total_epic_by_year)[2] <- "total_EPIC_output"
# Sort by year
total_epic_by_year <- total_epic_by_year[order(total_epic_by_year$year), ]
# Calculate growth rate (current / previous)
prev_vals <- c(NA, total_epic_by_year$total_EPIC_output[-nrow(total_epic_by_year)])
total_epic_by_year$growth_rate <- total_epic_by_year$total_EPIC_output / prev_vals
# Merge growth rates back
df_with_growth <- merge(validate_pop_size_scaled,
total_epic_by_year[, c("year", "growth_rate")],
by = "year",
all.x = TRUE)
# Sort for calculations
df_with_growth <- df_with_growth[order(df_with_growth$age, df_with_growth$year), ]
# Apply Growth Projection
df_split <- split(df_with_growth, df_with_growth$age)
df_split <- lapply(df_split, function(sub_df) {
# Ensure sorted by year
sub_df <- sub_df[order(sub_df$year), ]
rates <- sub_df$growth_rate
rates[is.na(rates)] <- 1
# Get baseline (2015) US size
baseline <- sub_df$US_popsize[sub_df$year == 2015]
if(length(baseline) == 0) baseline <- 0
else baseline <- baseline[1]
# Calculate Projection
sub_df$EPIC_output_scaled <- baseline * cumprod(rates)
return(sub_df)
})
# Recombine
df_with_growth <- do.call(rbind, df_split)
# Create Age Groups
df_with_growth$age_group <- NA
df_with_growth$age_group[df_with_growth$age >= 40 & df_with_growth$age <= 59] <- "40-59"
df_with_growth$age_group[df_with_growth$age >= 60 & df_with_growth$age <= 79] <- "60-79"
df_with_growth$age_group[df_with_growth$age >= 80] <- "80+"
# Aggregate Final Data
df_summed_ranges <- aggregate(
x = df_with_growth[c("EPIC_output_scaled", "US_popsize")],
by = df_with_growth[c("year", "age_group")],
FUN = sum,
na.rm = TRUE
)
colnames(df_summed_ranges)[3:4] <- c("total_EPIC_population", "total_US_population")
# Calculate RMSE
rmse_results <- by(df_summed_ranges, df_summed_ranges$age_group, function(sub) {
sqrt(mean((sub$total_EPIC_population - sub$total_US_population)^2, na.rm = TRUE))
})
rmse_per_range <- data.frame(age_group = names(rmse_results),
rmse = as.numeric(rmse_results))
message(paste(capture.output(rmse_per_range), collapse = "\n"))
# Plotting Loop
unique_groups <- unique(df_summed_ranges$age_group)
for(age_grp in unique_groups) {
# Plot
df_subset <- df_summed_ranges[df_summed_ranges$year <= 2050 & df_summed_ranges$age_group == age_grp, ]
df_plot <- tidyr::gather(df_subset,
key = "Population_Type",
value = "Population",
"total_EPIC_population", "total_US_population")
p <- ggplot2::ggplot(
df_plot,
ggplot2::aes(x = .data$year,
y = .data$Population,
color = .data$Population_Type)
) +
ggplot2::geom_line(linewidth = 1.2) +
ggplot2::geom_point(size = 2) +
ggthemes::theme_tufte(base_size = 14, ticks = FALSE) +
ggplot2::ggtitle(paste("Comparison of EPIC vs. US Population Over Time for Age Group", age_grp)) +
ggplot2::scale_y_continuous(name = "Population", labels = scales::comma) +
ggplot2::scale_x_continuous(name = "Year", breaks = seq(min(df_plot$year), max(df_plot$year), by = 2)) +
ggplot2::expand_limits(y = 0) +
ggplot2::theme(
legend.title = ggplot2::element_blank(),
legend.position = "bottom",
plot.background = ggplot2::element_rect(fill = "white", color = NA),
panel.background = ggplot2::element_rect(fill = "white", color = NA)
)
print(p)
}
# RETURN FOR TESTING
return(df_summed_ranges)
}
# Check for unsupported jurisdictions
if (tolower(jurisdiction) != "canada") {
stop("Validation for jurisdiction '", jurisdiction, "' has not been implemented yet. Currently only 'canada' and 'us' are supported.")
}
# Canada implementation (existing code)
petoc()
settings <- default_settings
settings$record_mode <- record_mode["record_mode_none"]
settings$agent_stack_size <- 0
settings$n_base_agents <- 1e+06
settings$event_stack_size <- 1
init_session(settings = settings)
input <- model_input$values #We can work with local copy more conveniently and submit it to the Run function
message("\nBecause you have called me with remove_COPD=", remove_COPD, ", I am", c("NOT", "indeed")[remove_COPD + 1], "going to remove COPD-related mortality from my calculations")
petoc()
# CanSim.052.0005<-read.csv(system.file ('extdata', 'CanSim.052.0005.csv', package = 'epicR'), header = TRUE); #package ready
# reading
x <- aggregate(CanSim.052.0005[, "value"], by = list(CanSim.052.0005[, "year"]), FUN = sum)
x[, 2] <- x[, 2]/x[1, 2]
x <- x[1:input$global_parameters$time_horizon, ]
plot(x, type = "l", ylim = c(0.5, max(x[, 2] * 1.5)), xlab = "Year", ylab = "Relative population size")
title(cex.main = 0.5, "Relative populaton size")
message("The plot I just drew is the expected (well, StatCan's predictions) relative population growth from 2015\n")
petoc()
if (remove_COPD) {
input$exacerbation$logit_p_death_by_sex <- -1000 + input$exacerbation$logit_p_death_by_sex
input$manual$explicit_mortality_by_age_sex <- 0
}
input$agent$l_inc_betas[1] <- input$agent$l_inc_betas[1] + log(incidence_k)
message("working...\n")
res <- run(input = input)
if (res < 0) {
stop("Something went awry; bye!")
return()
}
n_y1_agents <- sum(get_output_ex()$n_alive_by_ctime_sex[1, ])
legend("topright", c("Predicted", "Simulated"), lty = c(1, 1), col = c("black", "red"))
message("And the black one is the observed (simulated) growth\n")
######## pretty population growth curve
CanSim <- tibble::as_tibble(CanSim.052.0005)
CanSim <- tidyr::spread(CanSim, key = year, value = value)
CanSim <- CanSim[, 3:51]
CanSim <- colSums (CanSim)
df <- data.frame(Year = c(2015:(2015 + model_input$values$global_parameters$time_horizon-1)), Predicted = CanSim[1:model_input$values$global_parameters$time_horizon] * 1000, Simulated = rowSums(get_output_ex()$n_alive_by_ctime_sex)/ settings$n_base_agents * 18179400) #rescaling population. There are about 18.6 million Canadians above 40
message ("Here's simulated vs. predicted population table:")
message(paste(capture.output(df), collapse = "\n"))
dfm <- reshape2::melt(df[,c('Year','Predicted','Simulated')], id.vars = 1)
plot_population_growth <- ggplot2::ggplot(dfm, aes(x = Year, y = value)) + theme_tufte(base_size=14, ticks=FALSE) +
geom_bar(aes(fill = variable), stat = "identity", position = "dodge") +
labs(title = "Population Growth Curve") + ylab ("Population") +
labs(caption = "(based on population at age 40 and above)") +
theme(legend.title=element_blank()) +
scale_y_continuous(name="Population", labels = scales::comma)
plot (plot_population_growth)
if (savePlots) ggsave(file.path(tempdir(), paste0("PopulationGrowth",".tiff")), plot = last_plot(), device = "tiff", dpi = 300)
pyramid <- matrix(NA, nrow = input$global_parameters$time_horizon, ncol = length(get_output_ex()$n_alive_by_ctime_age[1, ]) -
input$global_parameters$age0)
for (year in 0:model_input$values$global_parameters$time_horizon - 1) pyramid[1 + year, ] <- get_output_ex()$n_alive_by_ctime_age[year +1, -(1:input$global_parameters$age0)]
message("Also, the ratio of the expected to observed population in years 10 and 20 are ", sum(get_output_ex()$n_alive_by_ctime_sex[10,
])/x[10, 2], " and ", sum(get_output_ex()$n_alive_by_ctime_sex[20, ])/x[20, 2])
petoc()
message("Now evaluating the population pyramid\n")
for (year in c(2015, 2025, 2034)) {
message("The observed population pyramid in", year, "is just drawn\n")
x <- CanSim.052.0005[which(CanSim.052.0005[, "year"] == year & CanSim.052.0005[, "sex"] == "both"), "value"]
#x <- c(x, rep(0, 111 - length(x) - 40))
#barplot(x, names.arg=40:110, xlab = "Age")
#title(cex.main = 0.5, paste("Predicted Pyramid - ", year))
dfPredicted <- data.frame (population = x * 1000, age = 40:100)
# message("Predicted average age of those >40 y/o is", sum((input$global_parameters$age0:(input$global_parameters$age0 + length(x) -
# 1)) * x)/sum(x), "\n")
# petoc()
#
# message("Simulated average age of those >40 y/o is", sum((input$global_parameters$age0:(input$global_parameters$age0 + length(x) -
# 1)) * x)/sum(x), "\n")
# petoc()
dfSimulated <- data.frame (population = pyramid[year - 2015 + 1, ], age = 40:110)
dfSimulated$population <- dfSimulated$population * (-1) / settings$n_base_agents * 18179400 #rescaling population. There are 18179400 Canadians above 40
p <- ggplot (NULL, aes(x = age, y = population)) + theme_tufte(base_size=14, ticks=FALSE) +
geom_bar (aes(fill = "Simulated"), data = dfSimulated, stat="identity", alpha = 0.5) +
geom_bar (aes(fill = "Predicted"), data = dfPredicted, stat="identity", alpha = 0.5) +
theme(axis.title=element_blank()) +
ggtitle(paste0("Simulated vs. Predicted Population Pyramid in ", year)) +
theme(legend.title=element_blank()) +
scale_y_continuous(name="Population", labels = scales::comma) +
scale_x_continuous(name="Age", labels = scales::comma)
if (savePlots) ggsave(file.path(tempdir(), paste0("Population ", year,".tiff")), plot = last_plot(), device = "tiff", dpi = 300)
plot(p)
}
terminate_session()
}
#' Returns results of validation tests for smoking module.
#'
#' It compares simulated smoking prevalence and trends against observed data to
#' assess the model's accuracy.
#' @param intercept_k a number
#' @param remove_COPD 0 or 1. whether to remove COPD-related mortality.
#' @param jurisdiction character string specifying the jurisdiction for validation ("canada" or "us"). Default is "canada".
#' @return For Canada: validation test results (invisible). For US: data frame with smoking status rates by year.
#' @export
validate_smoking <- function(remove_COPD = 1, intercept_k = NULL, jurisdiction = "canada") {
# Handle US jurisdiction
if (tolower(jurisdiction) == "us") {
message("Welcome to EPIC validator! Today we will see if the model make good smoking predictions for the US population")
settings <- get_default_settings()
settings$record_mode <- record_mode["record_mode_event"]
settings$n_base_agents <- 1e+04
settings$event_stack_size <- settings$n_base_agents * 1.7 * 30
# Prepare custom input modifications
input_modifications <- list()
message("\nBecause you have called me with remove_COPD=", remove_COPD, ", I am", c("NOT", "indeed")[remove_COPD + 1], "going to remove COPD-related mortality from my calculations")
if (remove_COPD) {
input_modifications$exacerbation_logit_p_death_by_sex_modifier <- -10000
}
if (!is.null(intercept_k)) {
input_modifications$manual_smoking_intercept_k <- intercept_k
}
message("There are three US validation targets: 1) the prevalence of current, former, and non-smokers in 2018, 2) the prevalence of current smokers in 2023, and 3) the average annual percent change (AAPC).\n")
message("Starting validation target 1: baseline prevalence of smokers.\n")
USSmoking2018 <- readr::read_csv(system.file("USSmoking2018.csv", package = "epicR"))
tab1 <- as.numeric(USSmoking2018$value[1:3]) / 100
message("This is the observed percentage of current smokers in 2018 (m,f)\n")
barplot(tab1, names.arg = c("40-64", "65-74", "75+"), ylim = c(0, 0.4), xlab = "Age group", ylab = "Prevalence of smoking",
col = c("grey"))
title(cex.main = 0.5, "Prevalence of current smoker by sex and age group (observed)")
legend("topright", c("Overall"), fill = c("grey"))
message("Now I will run the model using the default smoking parameters")
message("running the model\n")
# Run with jurisdiction and settings
# Note: Input modifications for remove_COPD and intercept_k would need to be handled differently
# For now, we'll modify after getting input from simulate
input <- get_input(jurisdiction = "us")
if (remove_COPD) {
input$values$exacerbation$logit_p_death_by_sex <- input$values$exacerbation$logit_p_death_by_sex * -10000
}
if (!is.null(intercept_k)) {
input$values$manual$smoking$intercept_k <- intercept_k
}
# Run with modified input - pass jurisdiction to ensure consistency
results <- simulate(input = input$values, settings = settings, jurisdiction = "us", extended_results = TRUE, return_events = TRUE)
dataS <- as.matrix(results$events)
dataS <- dataS[which(dataS[, "event"] == events["event_start"]), ]
age_list <- list(a1 = c(40, 64), a2 = c(65, 74), a3 = c(75, 111))
tab2 <- numeric(length(age_list))
for (j in 1:length(age_list)) {
tab2[j] <- mean(dataS[
dataS[, "age_at_creation"] > age_list[[j]][1] &
dataS[, "age_at_creation"] <= age_list[[j]][2],
"smoking_status"
])
}
message("This is the model generated bar plot")
barplot(tab2, names.arg = c("40-64", "65-74", "75+"), ylim = c(0, 0.4), xlab = "Age group", ylab = "Prevalence of smoking",
col = c("black"))
title(cex.main = 0.5, "Prevalence of current smoking at creation (simulated)")
legend("topright", c("Overall"), fill = c("black"))
message("Now we will validate the model on smoking trends")
message("To model the decline in smoking among adults aged 40 and over beyond 2023, historical trends were analyzed using the 2025 MMWR report (DOI: 10.15585/mmwr.mm7407a3). An Annual Average Percent Change (AAPC) of -1.9% was derived from the observed 2017-2023 decrease in smokers aged 45 and over and applied to future projections\n")
op_ex <- results$extended
smoker_prev <- op_ex$n_current_smoker_by_ctime_sex/op_ex$n_alive_by_ctime_sex
smoker_packyears <- op_ex$sum_pack_years_by_ctime_sex/op_ex$n_alive_by_ctime_sex
plot(2015:(2015+input$values$global_parameters$time_horizon-1), smoker_prev[, 1], type = "l", ylim = c(0, 0.25), col = "black", xlab = "Year", ylab = "Prevalence of current smoking")
lines(2015:(2015+input$values$global_parameters$time_horizon-1), smoker_prev[, 2], type = "l", col = "grey")
legend("topright", c("male", "female"), lty = c(1, 1), col = c("black", "grey"))
title(cex.main = 0.5, "Annual prevalence of current smoking (simulated)")
plot(2015:(2015+input$values$global_parameters$time_horizon-1), smoker_packyears[, 1], type = "l", ylim = c(0, 30), col = "black", xlab = "Year", ylab = "Average Pack years")
lines(2015:(2015+input$values$global_parameters$time_horizon-1), smoker_packyears[, 2], type = "l", col = "grey")
legend("topright", c("male", "female"), lty = c(1, 1), col = c("black", "grey"))
title(cex.main = 0.5, "Average Pack-Years Per Year for 40+ Population (simulated)")
z <- log(rowSums(smoker_prev))
message("average decline in % of current_smoking rate is", 1 - exp(mean(c(z[-1], NaN) - z, na.rm = TRUE)))
#plotting overall distribution of smoking stats over time
smoking_status_ctime <- matrix (NA, nrow = input$values$global_parameters$time_horizon, ncol = 4)
colnames(smoking_status_ctime) <- c("Year", "Non-Smoker", "Smoker", "Former smoker")
smoking_status_ctime[1:(input$values$global_parameters$time_horizon), 1] <- c(2015:(2015 + input$values$global_parameters$time_horizon-1))
smoking_status_ctime [, 2:4] <- op_ex$n_smoking_status_by_ctime / rowSums(as.data.frame (op_ex$n_alive_by_ctime_sex)) * 100
df <- as.data.frame(smoking_status_ctime)
dfm <- reshape2::melt(df[,c("Year", "Non-Smoker", "Smoker", "Former smoker")], id.vars = 1)
plot_smoking_status_ctime <- ggplot2::ggplot(dfm, aes(x = Year, y = value, color = variable)) +
geom_point () + geom_line() + labs(title = "Smoking Status per year") + ylab ("%") +
scale_colour_manual(values = c("#66CC99", "#CC6666", "#56B4E9")) + scale_y_continuous(breaks = scales::pretty_breaks(n = 12))
plot(plot_smoking_status_ctime)
terminate_session()
# Preparing dataframe of rates for the test expectations
n_alive <- rowSums(op_ex$n_alive_by_ctime_sex)
results_df <- data.frame(
Year = smoking_status_ctime[, "Year"],
Current = op_ex$n_smoking_status_by_ctime[, 2] / n_alive,
Former = op_ex$n_smoking_status_by_ctime[, 3] / n_alive,
NonSmoker = op_ex$n_smoking_status_by_ctime[, 1] / n_alive
)
return(results_df)
}
# Check for unsupported jurisdictions
if (tolower(jurisdiction) != "canada") {
stop("Validation for jurisdiction '", jurisdiction, "' has not been implemented yet. Currently only 'canada' and 'us' are supported.")
}
# Canada implementation (existing code)
message("Welcome to EPIC validator! Today we will see if the model make good smoking predictions")
petoc()
settings <- default_settings
settings$record_mode <- record_mode["record_mode_event"]
settings$agent_stack_size <- 0
settings$n_base_agents <- 1e+05
settings$event_stack_size <- settings$n_base_agents * 1.7 * 30
init_session(settings = settings)
input <- model_input$values
message("\nBecause you have called me with remove_COPD=", remove_COPD, ", I am", c("NOT", "indeed")[remove_COPD + 1], "going to remove COPD-related mortality from my calculations")
if (remove_COPD) {
input$exacerbation$logit_p_death_by_sex <- input$exacerbation$logit_p_death_by_sex * -10000 # TODO why was this zero? Amin
}
if (!is.null(intercept_k))
input$manual$smoking$intercept_k <- intercept_k
petoc()
message("There are two validation targets: 1) the prevalence of current smokers (by sex) in 2015, and 2) the projected decline in smoking rate.\n")
message("Starting validation target 1: baseline prevalence of smokers.\n")
petoc()
# CanSim.105.0501<-read.csv(paste(data_path,'/CanSim.105.0501.csv',sep=''),header=TRUE) Included in the package as internal data
tab1 <- rbind(CanSim.105.0501[1:3, "value"], CanSim.105.0501[4:6, "value"])/100
message("This is the observed percentage of current smokers in 2014 (m,f)\n")
barplot(tab1, beside = TRUE, names.arg = c("40", "52", "65+"), ylim = c(0, 0.4), xlab = "Age group", ylab = "Prevalenc of smoking",
col = c("black", "grey"))
title(cex.main = 0.5, "Prevalence of current smoker by sex and age group (observed)")
legend("topright", c("Male", "Female"), fill = c("black", "grey"))
petoc()
message("Now I will run the model using the default smoking parameters")
petoc()
message("running the model\n")
run(input = input)
dataS <- get_all_events_matrix()
dataS <- dataS[which(dataS[, "event"] == events["event_start"]), ]
age_list <- list(a1 = c(35, 45), a2 = c(45, 65), a3 = c(65, 111))
tab2 <- tab1
for (i in 0:1) for (j in 1:length(age_list)) tab2[i + 1, j] <- mean(dataS[which(dataS[, "female"] == i & dataS[, "age_at_creation"] >
age_list[[j]][1] & dataS[, "age_at_creation"] <= age_list[[j]][2]), "smoking_status"])
message("This is the model generated bar plot")
petoc()
barplot(tab2, beside = TRUE, names.arg = c("40", "52", "65+"), ylim = c(0, 0.4), xlab = "Age group", ylab = "Prevalence of smoking",
col = c("black", "grey"))
title(cex.main = 0.5, "Prevalence of current smoking at creation (simulated)")
legend("topright", c("Male", "Female"), fill = c("black", "grey"))
message("This step is over; press enter to continue to step 2")
petoc()
message("Now we will validate the model on smoking trends")
petoc()
message("According to Table 2.1 of this report (see the extracted data in data folder): http://www.tobaccoreport.ca/2015/TobaccoUseinCanada_2015.pdf, the prevalence of current smoker is declining by around 3.8% per year\n")
petoc()
op_ex <- get_output_ex()
smoker_prev <- op_ex$n_current_smoker_by_ctime_sex/op_ex$n_alive_by_ctime_sex
smoker_packyears <- op_ex$sum_pack_years_by_ctime_sex/op_ex$n_alive_by_ctime_sex
plot(2015:(2015+input$global_parameters$time_horizon-1), smoker_prev[, 1], type = "l", ylim = c(0, 0.25), col = "black", xlab = "Year", ylab = "Prevalence of current smoking")
lines(2015:(2015+input$global_parameters$time_horizon-1), smoker_prev[, 2], type = "l", col = "grey")
legend("topright", c("male", "female"), lty = c(1, 1), col = c("black", "grey"))
title(cex.main = 0.5, "Annual prevalence of currrent smoking (simulated)")
plot(2015:(2015+input$global_parameters$time_horizon-1), smoker_packyears[, 1], type = "l", ylim = c(0, 30), col = "black", xlab = "Year", ylab = "Average Pack years")
lines(2015:(2015+input$global_parameters$time_horizon-1), smoker_packyears[, 2], type = "l", col = "grey")
legend("topright", c("male", "female"), lty = c(1, 1), col = c("black", "grey"))
title(cex.main = 0.5, "Average Pack-Years Per Year for 40+ Population (simulated)")
z <- log(rowSums(smoker_prev))
message("average decline in % of current_smoking rate is", 1 - exp(mean(c(z[-1], NaN) - z, na.rm = TRUE)))
petoc()
#plotting overall distribution of smoking stats over time
smoking_status_ctime <- matrix (NA, nrow = input$global_parameters$time_horizon, ncol = 4)
colnames(smoking_status_ctime) <- c("Year", "Non-Smoker", "Smoker", "Former smoker")
smoking_status_ctime[1:(input$global_parameters$time_horizon), 1] <- c(2015:(2015 + input$global_parameters$time_horizon-1))
smoking_status_ctime [, 2:4] <- op_ex$n_smoking_status_by_ctime / rowSums(as.data.frame (op_ex$n_alive_by_ctime_sex)) * 100
df <- as.data.frame(smoking_status_ctime)
dfm <- reshape2::melt(df[,c("Year", "Non-Smoker", "Smoker", "Former smoker")], id.vars = 1)
plot_smoking_status_ctime <- ggplot2::ggplot(dfm, aes(x = Year, y = value, color = variable)) +
geom_point () + geom_line() + labs(title = "Smoking Status per year") + ylab ("%") +
scale_colour_manual(values = c("#66CC99", "#CC6666", "#56B4E9")) + scale_y_continuous(breaks = scales::pretty_breaks(n = 12))
plot(plot_smoking_status_ctime ) #plot needs to be showing
# Plotting pack-years over time
dataS <- as.data.frame (get_all_events_matrix())
dataS <- subset (dataS, (event == 0 | event == 1 ))
data_all <- dataS
dataS <- subset (dataS, pack_years != 0)
avg_pack_years_ctime <- matrix (NA, nrow = input$global_parameters$time_horizon + 1, ncol = 4)
colnames(avg_pack_years_ctime) <- c("Year", "Smokers PYs", "Former Smokers PYs", "all")
avg_pack_years_ctime[1:(input$global_parameters$time_horizon + 1), 1] <- c(2015:(2015 + input$global_parameters$time_horizon))
for (i in 0:input$global_parameters$time_horizon) {
smokers <- subset (dataS, (floor(local_time + time_at_creation) == (i)) & smoking_status != 0)
prev_smokers <- subset (dataS, (floor(local_time + time_at_creation) == (i)) & smoking_status == 0)
all <- subset (data_all, floor(local_time + time_at_creation) == i)
avg_pack_years_ctime[i+1, "Smokers PYs"] <- colSums(smokers)[["pack_years"]] / dim (smokers)[1]
avg_pack_years_ctime[i+1, "Former Smokers PYs"] <- colSums(prev_smokers)[["pack_years"]] / dim (prev_smokers) [1]
avg_pack_years_ctime[i+1, "all"] <- colSums(all)[["pack_years"]] / dim (all) [1] #includes non-smokers
}
df <- as.data.frame(avg_pack_years_ctime)
dfm <- reshape2::melt(df[,c( "Year", "Smokers PYs", "Former Smokers PYs", "all")], id.vars = 1)
plot_avg_pack_years_ctime <- ggplot2::ggplot(dfm, aes(x = Year, y = value, color = variable)) +
geom_point () + geom_line() + labs(title = "Average pack-years per year ") + ylab ("Pack-years")
plot(plot_avg_pack_years_ctime) #plot needs to be showing
# Plotting pack-years over age
avg_pack_years_age <- matrix (NA, nrow = 110 - 40 + 1, ncol = 3)
colnames(avg_pack_years_age) <- c("Age", "Smokers PYs", "Former Smokers PYs")
avg_pack_years_age[1:(110 - 40 + 1), 1] <- c(40:110)
for (i in 0:(110 - 40)) {
smokers <- subset (dataS, (floor (local_time + age_at_creation) == (i+40)) & smoking_status != 0)
prev_smokers <- subset (dataS, (floor (local_time + age_at_creation) == (i+40)) & smoking_status == 0)
avg_pack_years_age[i+1, "Smokers PYs"] <- colSums(smokers)[["pack_years"]] / dim (smokers)[1]
avg_pack_years_age[i+1, "Former Smokers PYs"] <- colSums(prev_smokers)[["pack_years"]] / dim (prev_smokers) [1]
}
df <- as.data.frame(avg_pack_years_age)
dfm <- reshape2::melt(df[,c( "Age", "Smokers PYs", "Former Smokers PYs")], id.vars = 1)
plot_avg_pack_years_age <- ggplot2::ggplot(dfm, aes(x = Age, y = value, color = variable, ymin = 40, ymax = 100)) +
geom_point () + geom_line() + labs(title = "Average pack-years per age ") + ylab ("Pack-years")
plot(plot_avg_pack_years_age) #plot needs to be showing
message("This test is over; terminating the session")
petoc()
terminate_session()
}
#' Basic COPD test.
#' @return validation test results
#' @export
sanity_COPD <- function() {
settings <- default_settings
settings$record_mode <- session_env$record_mode["record_mode_agent"]
settings$agent_stack_size <- 0
settings$n_base_agents <- 20000
settings$event_stack_size <- settings$n_base_agents * 10
init_session(settings = settings)
# Load input parameters
inp <- get_input()
message("COPD incidence and prevalence parameters are as follows\n")
message("model_input$values$COPD$logit_p_COPD_betas_by_sex:\n")
message(paste(capture.output(inp$values$COPD$logit_p_COPD_betas_by_sex), collapse = "\n"))
petoc()
message("model_input$values$COPD$p_prevalent_COPD_stage:\n")
message(paste(capture.output(inp$values$COPD$p_prevalent_COPD_stage), collapse = "\n"))
petoc()
message("model_input$values$COPD$ln_h_COPD_betas_by_sex:\n")
message(paste(capture.output(inp$values$COPD$ln_h_COPD_betas_by_sex), collapse = "\n"))
petoc()
message("Now I am going to first turn off both prevalence and incidence parameters and run the model to see how many COPDs I get\n")
petoc()
input <- inp$values
input$COPD$logit_p_COPD_betas_by_sex <- input$COPD$logit_p_COPD_betas_by_sex * 0 - 100
input$COPD$ln_h_COPD_betas_by_sex <- input$COPD$ln_h_COPD_betas_by_sex * 0 - 100
run(input = input)
message("The model is reporting it has got this many COPDs cases: ", get_output()$n_COPD, " out of ", get_output()$n_agents, " agents.\n")
all_events <- as.data.frame(get_all_events_matrix())
dataS <- all_events[all_events$event == events["event_start"], ]
message("The prevalence of COPD in Start event dump is: ", mean(dataS[, "gold"] > 0), "\n")
dataS <- all_events[all_events$event == events["event_end"], ]
message("The prevalence of COPD in End event dump is: ", mean(dataS[, "gold"] > 0), "\n")
petoc()
message("Now I am going to switch off incidence and create COPD patients only through prevalence (using natural prevalence)")
petoc()
terminate_session()
init_session(settings = settings)
inp <- get_input()
input <- inp$values
# Keep prevalence at natural values (don't multiply by 0)
# Turn off incidence completely
input$COPD$ln_h_COPD_betas_by_sex <- input$COPD$ln_h_COPD_betas_by_sex * 0 - 100
run(input = input)
message("The model is reporting it has got this many COPDs: ", get_output()$n_COPD, " out of ", get_output()$n_agents, "agents.\n")
all_events <- as.data.frame(get_all_events_matrix())
dataS <- all_events[all_events$event == events["event_start"], ]
message("The prevalence of COPD in Start event dump is: ", mean(dataS[, "gold"] > 0), "\n")
dataS <- all_events[all_events$event == events["event_end"], ]
message("The prevalence of COPD in End event dump is: ", mean(dataS[, "gold"] > 0), "\n")
petoc()
message("Now I am going to switch off prevalence and create COPD patients only through incidence\n")
petoc()
terminate_session()
init_session(settings = settings)
inp <- get_input()
input <- inp$values
# Turn off prevalence completely
input$COPD$logit_p_COPD_betas_by_sex <- input$COPD$logit_p_COPD_betas_by_sex * 0 - 100
# Keep incidence at natural values (don't set to negative)
run(input = input)
message("The model is reporting it has got this many COPDs: ", get_output()$n_COPD, " out of ", get_output()$n_agents, "agents.\n")
all_events <- as.data.frame(get_all_events_matrix())
dataS <- all_events[all_events$event == events["event_start"], ]
message("The prevalence of COPD in Start event dump is: ", mean(dataS[, "gold"] > 0), "\n")
dataS <- all_events[all_events$event == events["event_end"], ]
message("The prevalence of COPD in End event dump is: ", mean(dataS[, "gold"] > 0), "\n")
petoc()
terminate_session()
}
#' Returns results of validation tests for COPD
#'
#' This function runs validation tests for COPD model outputs. It estimates the baseline
#' prevalence and incidence of COPD, along with sex-specific baseline COPD prevalence.
#' Additionally, it calculates the baseline prevalence of COPD by age groups and
#' smoking pack-years. It also estimates the coefficients for the relationships between
#' age, pack-years, smoking status, and the prevalence of COPD.
#'
#'
#' @param incident_COPD_k a number (default=1) by which the incidence rate of COPD will be multiplied.
#' @param return_CI if TRUE, returns 95 percent confidence intervals for the "Year" coefficient
#' @param jurisdiction character string specifying the jurisdiction for validation ("canada" or "us"). Default is "canada".
#' @return For Canada: list with validation test results. For US: data frame with COPD prevalence by age group over time.
#' @export
validate_COPD <- function(incident_COPD_k = 1, return_CI = FALSE, jurisdiction = "canada") # The incidence rate is multiplied by K
{
# Handle US jurisdiction
if (tolower(jurisdiction) == "us") {
settings <- get_default_settings()
settings$record_mode <- 0
settings$n_base_agents <- 1e6
time_horizon <- 26
results <- simulate(settings = settings, jurisdiction = "us", time_horizon = time_horizon, extended_results = TRUE)
output <- results$extended
# Determine overall COPD prevalence
COPDprevalence_ctime_age <- as.data.frame(output$n_COPD_by_ctime_age)
totalpopulation <- output$n_alive_by_ctime_age
# Overall prevalence of COPD
alive_age_all <- rowSums(output$n_alive_by_ctime_age[1:time_horizon, 40:111])
COPD_age_all <- rowSums(output$n_COPD_by_ctime_age[1:time_horizon, 40:111])
prevalenceCOPD_age_all <- COPD_age_all / alive_age_all
# Prevalence by age 40-59
alive_age_40to59 <- rowSums(output$n_alive_by_ctime_age[1:time_horizon, 40:59])
COPD_age_40to59 <- rowSums(output$n_COPD_by_ctime_age[1:time_horizon, 40:59])
prevalenceCOPD_age_40to59 <- COPD_age_40to59 / alive_age_40to59
# Prevalence by age 60-79
alive_age_60to79 <- rowSums(output$n_alive_by_ctime_age[1:time_horizon, 60:79])
COPD_age_60to79 <- rowSums(output$n_COPD_by_ctime_age[1:time_horizon, 60:79])
prevalenceCOPD_age_60to79 <- COPD_age_60to79 / alive_age_60to79
# Prevalence by age 80+
alive_age_over80 <- rowSums(output$n_alive_by_ctime_age[1:time_horizon, 80:111])
COPD_age_over80 <- rowSums(output$n_COPD_by_ctime_age[1:time_horizon, 80:111])
prevalenceCOPD_age_over80 <- COPD_age_over80 / alive_age_over80
# Display summary
COPD_prevalence_summary <- data.frame(
Year = 2015:(2015 + time_horizon - 1),
Prevalence_all = prevalenceCOPD_age_all,
Prevalence_40to59 = prevalenceCOPD_age_40to59,
Prevalence_60to79 = prevalenceCOPD_age_60to79,
Prevalence_over80 = prevalenceCOPD_age_over80
)
message(paste(capture.output(knitr::kable(COPD_prevalence_summary,
caption = "COPD Prevalence by Age Group Over Time",
digits = 3)), collapse = "\n"))
# Plot: All Ages
plot_prevalenceCOPD_age_all <- data.frame(
Year = 2015:(2015 + time_horizon - 1),
Prevalence = prevalenceCOPD_age_all
)
gg_plot_prevalenceCOPD_age_all <- ggplot2::ggplot(plot_prevalenceCOPD_age_all,
ggplot2::aes(x = .data$Year, y = .data$Prevalence)) +
ggplot2::geom_line(linewidth = 1.5, color = "#003f5c") +
ggplot2::geom_point(size = 3, color = "#66c2ff", stroke = 0.8) +
ggplot2::scale_y_continuous(
labels = scales::percent_format(accuracy = 1),
limits = c(0, 0.15),
breaks = seq(0, 0.15, by = 0.05)
) +
ggplot2::scale_x_continuous(breaks = seq(2015, 2040, by = 5)) +
ggplot2::labs(
title = "COPD Prevalence Over Time (All Ages)",
subtitle = "Estimated proportion of population with COPD from 2016-2040",
x = "Year",
y = "Prevalence (%)"
) +
ggplot2::theme_minimal(base_size = 14) +
ggplot2::theme(
plot.title = ggplot2::element_text(face = "bold", size = 18, hjust = 0.5, margin = ggplot2::margin(b = 8)),
plot.subtitle = ggplot2::element_text(size = 14, hjust = 0.5),
axis.title = ggplot2::element_text(face = "bold"),
axis.text = ggplot2::element_text(color = "black"),
axis.line = ggplot2::element_line(color = "black", linewidth = 0.8),
panel.grid.major = ggplot2::element_blank(),
panel.grid.minor = ggplot2::element_blank()
)
print(gg_plot_prevalenceCOPD_age_all)
# Plot: Age 40-59
plot_prevalence_40to59 <- data.frame(
Year = 2015:(2015 + time_horizon - 1),
Prevalence = prevalenceCOPD_age_40to59
)
gg_plot_prevalence_40to59 <- ggplot2::ggplot(plot_prevalence_40to59,
ggplot2::aes(x = .data$Year, y = .data$Prevalence)) +
ggplot2::geom_line(linewidth = 1.5, color = "#003f5c") +
ggplot2::geom_point(size = 3, color = "#66c2ff", stroke = 0.8) +
ggplot2::scale_y_continuous(
labels = scales::percent_format(accuracy = 1),
limits = c(0, 0.10),
breaks = seq(0, 0.10, by = 0.05)) +
ggplot2::scale_x_continuous(breaks = seq(2015, 2040, by = 5)) +
ggplot2::labs(
title = "COPD Prevalence Over Time (Age 40-59)",
subtitle = "Estimated proportion of population with COPD from 2016-2040",
x = "Year",
y = "Prevalence (%)") +
ggplot2::theme_minimal(base_size = 14) +
ggplot2::theme(
plot.title = ggplot2::element_text(face = "bold", size = 18, hjust = 0.5, margin = ggplot2::margin(b = 8)),
plot.subtitle = ggplot2::element_text(size = 14, hjust = 0.5),
axis.title = ggplot2::element_text(face = "bold"),
axis.text = ggplot2::element_text(color = "black"),
axis.line = ggplot2::element_line(color = "black", linewidth = 0.8),
panel.grid.major = ggplot2::element_blank(),
panel.grid.minor = ggplot2::element_blank()
)
print(gg_plot_prevalence_40to59)
# Plot: Age 60-79
plot_prevalence_60to79 <- data.frame(
Year = 2015:(2015 + time_horizon - 1),
Prevalence = prevalenceCOPD_age_60to79
)
gg_plot_prevalence_60to79 <- ggplot2::ggplot(plot_prevalence_60to79,
ggplot2::aes(x = .data$Year, y = .data$Prevalence)) +
ggplot2::geom_line(linewidth = 1.5, color = "#003f5c") +
ggplot2::geom_point(size = 3, color = "#66c2ff", stroke = 0.8) +
ggplot2::scale_y_continuous(
labels = scales::percent_format(accuracy = 1),
limits = c(0, 0.15),
breaks = seq(0, 0.15, by = 0.05)
) +
ggplot2::scale_x_continuous(breaks = seq(2015, 2040, by = 5)) +
ggplot2::labs(
title = "COPD Prevalence Over Time (Age 60-79)",
subtitle = "Estimated proportion of population with COPD from 2016-2040",
x = "Year",
y = "Prevalence (%)"
) +
ggplot2::theme_minimal(base_size = 14) +
ggplot2::theme(
plot.title = ggplot2::element_text(face = "bold", size = 18, hjust = 0.5, margin = ggplot2::margin(b = 8)),
plot.subtitle = ggplot2::element_text(size = 14, hjust = 0.5),
axis.title = ggplot2::element_text(face = "bold"),
axis.text = ggplot2::element_text(color = "black"),
axis.line = ggplot2::element_line(color = "black", linewidth = 0.8),
panel.grid.major = ggplot2::element_blank(),
panel.grid.minor = ggplot2::element_blank()
)
print(gg_plot_prevalence_60to79)
# Plot: Age 80+
plot_prevalence_over80 <- data.frame(
Year = 2015:(2015 + time_horizon - 1),
Prevalence = prevalenceCOPD_age_over80
)
gg_plot_prevalence_over80 <- ggplot2::ggplot(plot_prevalence_over80,
ggplot2::aes(x = .data$Year, y = .data$Prevalence)) +
ggplot2::geom_line(linewidth = 1.5, color = "#003f5c") +
ggplot2::geom_point(size = 3, color = "#66c2ff", stroke = 0.8) +
ggplot2::scale_y_continuous(
labels = scales::percent_format(accuracy = 1),
limits = c(0, 0.30),
breaks = seq(0, 0.30, by = 0.05)
) +
ggplot2::scale_x_continuous(breaks = seq(2015, 2040, by = 5)) +
ggplot2::labs(
title = "COPD Prevalence Over Time (Age 80+)",
subtitle = "Estimated proportion of population with COPD from 2016-2040",
x = "Year",
y = "Prevalence (%)"
) +
ggplot2::theme_minimal(base_size = 14) +
ggplot2::theme(
plot.title = ggplot2::element_text(face = "bold", size = 18, hjust = 0.5, margin = ggplot2::margin(b = 8)),
plot.subtitle = ggplot2::element_text(size = 14, hjust = 0.5),
axis.title = ggplot2::element_text(face = "bold"),
axis.text = ggplot2::element_text(color = "black"),
axis.line = ggplot2::element_line(color = "black", linewidth = 0.8),
panel.grid.major = ggplot2::element_blank(),
panel.grid.minor = ggplot2::element_blank()
)
print(gg_plot_prevalence_over80)
terminate_session()
return(COPD_prevalence_summary)
}
# Check for unsupported jurisdictions
if (tolower(jurisdiction) != "canada") {
stop("Validation for jurisdiction '", jurisdiction, "' has not been implemented yet. Currently only 'canada' and 'us' are supported.")
}
# Canada implementation (existing code)
out <- list()
settings <- default_settings
settings$record_mode <- record_mode["record_mode_event"]
settings$agent_stack_size <- 0
settings$n_base_agents <- 1e+05
settings$event_stack_size <- settings$n_base_agents * 50
init_session(settings = settings)
input <- model_input$values
if (incident_COPD_k == 0)
input$COPD$ln_h_COPD_betas_by_sex <- input$COPD$ln_h_COPD_betas_by_sex * 0 - 100 else input$COPD$ln_h_COPD_betas_by_sex[1, ] <- model_input$values$COPD$ln_h_COPD_betas_by_sex[1, ] + log(incident_COPD_k)
message("working...\n")
run(input = input)
op <- get_output()
opx <- get_output_ex()
data <- as.data.frame(get_all_events_matrix())
dataS <- data[which(data[, "event"] == events["event_start"]), ]
dataE <- data[which(data[, "event"] == events["event_end"]), ]
out$p_copd_at_creation <- mean(dataS[, "gold"] > 0)
new_COPDs <- which(dataS[which(dataE[, "gold"] > 0), "gold"] == 0)
out$inc_copd <- sum(opx$n_inc_COPD_by_ctime_age)/opx$cumul_non_COPD_time
out$inc_copd_by_sex <- sum(opx$n_inc_COPD_by_ctime_age)/opx$cumul_non_COPD_time
x <- sqldf::sqldf("SELECT female, SUM(gold>0) AS n_copd, COUNT(*) AS n FROM dataS GROUP BY female")
out$p_copd_at_creation_by_sex <- x[, "n_copd"]/x[, "n"]
age_cats <- c(40, 50, 60, 70, 80, 111)
dataS[, "age_cat"] <- as.numeric(cut(dataS[, "age_at_creation"] + dataS[, "local_time"], age_cats, include.lowest = TRUE))
x <- sqldf::sqldf("SELECT age_cat, SUM(gold>0) AS n_copd, COUNT(*) AS n FROM dataS GROUP BY age_cat")
temp <- x[, "n_copd"]/x[, "n"]
names(temp) <- paste(age_cats[-length(age_cats)], age_cats[-1], sep = "-")
out$p_copd_at_creation_by_age <- temp
py_cats <- c(0, 15, 30, 45, Inf)
dataS[, "py_cat"] <- as.numeric(cut(dataS[, "pack_years"], py_cats, include.lowest = TRUE))
x <- sqldf::sqldf("SELECT py_cat, SUM(gold>0) AS n_copd, COUNT(*) AS n FROM dataS GROUP BY py_cat")
temp <- x[, "n_copd"]/x[, "n"]
names(temp) <- paste(py_cats[-length(py_cats)], py_cats[-1], sep = "-")
out$p_copd_at_creation_by_pack_years <- temp
dataF <- data[which(data[, "event"] == events["event_fixed"]), ]
dataF[, "age"] <- dataF[, "local_time"] + dataF[, "age_at_creation"]
dataF[, "copd"] <- (dataF[, "gold"] > 0) * 1
dataF[, "gold2p"] <- (dataF[, "gold"] > 1) * 1
dataF[, "gold3p"] <- (dataF[, "gold"] > 2) * 1
dataF[, "year"] <- dataF[, "local_time"] + dataF[, "time_at_creation"]
res <- glm(data = dataF[which(dataF[, "female"] == 0), ], formula = copd ~ age + pack_years + smoking_status + year, family = binomial(link = logit))
out$calib_prev_copd_reg_coeffs_male <- coefficients(res)
if (return_CI) {out$conf_prev_copd_reg_coeffs_male <- stats::confint(res, "year", level = 0.95)}
res <- glm(data = dataF[which(dataF[, "female"] == 1), ], formula = copd ~ age + pack_years + smoking_status + year, family = binomial(link = logit))
out$calib_prev_copd_reg_coeffs_female <- coefficients(res)
if (return_CI) {out$conf_prev_copd_reg_coeffs_female <- stats::confint(res, "year", level = 0.95)}
res <- glm(data = dataF[which(dataF[, "female"] == 0), ], formula = gold2p ~ age + pack_years + smoking_status + year, family = binomial(link = logit))
out$calib_prev_gold2p_reg_coeffs_male <- coefficients(res)
if (return_CI) {out$conf_prev_gold2p_reg_coeffs_male <- stats::confint(res, "year", level = 0.95)}
res <- glm(data = dataF[which(dataF[, "female"] == 1), ], formula = gold2p ~ age + pack_years + smoking_status + year, family = binomial(link = logit))
out$calib_prev_gold2p_reg_coeffs_female <- coefficients(res)
if (return_CI) {out$conf_prev_gold2p_reg_coeffs_female <- stats::confint(res, "year", level = 0.95)}
res <- glm(data = dataF[which(dataF[, "female"] == 0), ], formula = gold3p ~ age + pack_years + smoking_status + year, family = binomial(link = logit))
out$calib_prev_gold3p_reg_coeffs_male <- coefficients(res)
if (return_CI) {out$conf_prev_gold3p_reg_coeffs_male <- stats::confint(res, "year", level = 0.95)}
res <- glm(data = dataF[which(dataF[, "female"] == 1), ], formula = gold3p ~ age + pack_years + smoking_status + year, family = binomial(link = logit))
out$calib_prev_gold3p_reg_coeffs_female <- coefficients(res)
if (return_CI) {out$conf_prev_gold3p_reg_coeffs_female <- stats::confint(res, "year", level = 0.95)}
terminate_session()
return(out)
}
#' Returns results of validation tests for payoffs, costs and QALYs
#' @param nPatient number of simulated patients. Default is 1e6.
#' @param disableDiscounting if TRUE, discounting will be disabled for cost and QALY calculations. Default: TRUE
#' @param disableExacMortality if TRUE, mortality due to exacerbations will be disabled for cost and QALY calculations. Default: TRUE
#' @param jurisdiction character string specifying the jurisdiction for validation ("canada" or "us"). Default is "canada". Currently only "canada" is implemented.
#' @return validation test results
#' @export
validate_payoffs <- function(nPatient = 1e6, disableDiscounting = TRUE, disableExacMortality = TRUE, jurisdiction = "canada")
{
# Check jurisdiction
if (tolower(jurisdiction) != "canada") {
stop("Validation for jurisdiction '", jurisdiction, "' has not been implemented yet. Currently only 'canada' is supported.")
}
out <- list()
settings <- default_settings
settings$record_mode <- record_mode["record_mode_none"]
settings$agent_stack_size <- 0
settings$n_base_agents <- nPatient
settings$event_stack_size <- 0
init_session(settings = settings)
input <- model_input$values
if (disableDiscounting) {
input$global_parameters$discount_cost <- 0
input$global_parameters$discount_qaly <- 0
}
if (disableExacMortality) {
input$exacerbation$logit_p_death_by_sex <- -1000 + 0*input$exacerbation$logit_p_death_by_sex
}
run(input = input)
op <- get_output()
op_ex <- get_output_ex()
exac_dutil<-get_inputs()$utility$exac_dutil
exac_dcost<-get_inputs()$cost$exac_dcost
total_qaly<-colSums(op_ex$cumul_qaly_gold_ctime)[2:5]
qaly_loss_dueto_exac_by_gold<-rowSums(op_ex$n_exac_by_gold_severity*exac_dutil)
back_calculated_utilities<-(total_qaly-qaly_loss_dueto_exac_by_gold)/colSums(op_ex$cumul_time_by_ctime_GOLD)[2:5]
#I=0.81,II=0.72,III=0.68,IV=0.58)))
out$cumul_time_per_GOLD <- colSums(op_ex$cumul_time_by_ctime_GOLD)[2:5]
out$total_qaly <- total_qaly
out$qaly_loss_dueto_exac_by_gold <- qaly_loss_dueto_exac_by_gold
out$back_calculated_utilities <- back_calculated_utilities
out$utility_target_values <- input$utility$bg_util_by_stage
out$utility_difference_percentage <- (out$back_calculated_utilities - out$utility_target_values[2:5]) / out$utility_target_values[2:5] * 100
total_cost<-colSums(op_ex$cumul_cost_gold_ctime)[2:5]
cost_dueto_exac_by_gold<-rowSums(t((exac_dcost)*t(op_ex$n_exac_by_gold_severity)))
back_calculated_costs<-(total_cost-cost_dueto_exac_by_gold)/colSums(op_ex$cumul_time_by_ctime_GOLD)[2:5]
#I=615, II=1831, III=2619, IV=3021
out$total_cost <- total_cost
out$cost_dueto_exac_by_gold <- cost_dueto_exac_by_gold
out$back_calculated_costs <- back_calculated_costs
out$cost_target_values <- input$cost$bg_cost_by_stage
out$cost_difference_percentage <- (out$back_calculated_costs - out$cost_target_values[2:5]) / out$cost_target_values[2:5] * 100
terminate_session()
return(out)
}
#' Returns results of validation tests for mortality rate
#'
#' This function returns a table and a plot of the difference between simulated and expected
#' (life table) mortality, by sex and age.
#' @param n_sim number of simulated agents
#' @param bgd a number
#' @param bgd_h a number
#' @param manual a number
#' @param exacerbation a number
#' @param jurisdiction character string specifying the jurisdiction ("canada" or "us"). Default is "canada"
#' @return validation test results
#' @export
validate_mortality <- function(n_sim = 5e+05, bgd = 1, bgd_h = 1, manual = 1, exacerbation = 1, jurisdiction = "canada") {
message("Hello from EPIC! I am going to test mortality rate and how it is affected by input parameters\n")
# Check jurisdiction
if (tolower(jurisdiction) != "canada") {
stop("Validation for jurisdiction '", jurisdiction, "' has not been implemented yet. Currently only 'canada' is supported.")
}
petoc()
settings <- default_settings
settings$record_mode <- record_mode["record_mode_none"]
settings$agent_stack_size <- 0
settings$n_base_agents <- n_sim
settings$event_stack_size <- 0
init_session(settings = settings)
input <- model_input$values
input$global_parameters$time_horizon <- 1
input$agent$p_bgd_by_sex <- input$agent$p_bgd_by_sex * bgd
input$agent$ln_h_bgd_betas <- input$agent$ln_h_bgd_betas * bgd_h
input$manual$explicit_mortality_by_age_sex <- input$manual$explicit_mortality_by_age_sex * manual
input$exacerbation$logit_p_death_by_sex <- input$exacerbation$logit_p_death_by_sex * exacerbation
message("working...\n")
res <- run(input = input)
message("Mortality rate was", get_output()$n_death/get_output()$cumul_time, "\n")
if (get_output()$n_death > 0) {
ratio<-(get_output_ex()$n_death_by_age_sex[41:111,]/get_output_ex()$sum_time_by_age_sex[41:111,])/model_input$values$agent$p_bgd_by_sex[41:111,]
plot(40:110,ratio[,1],type='l',col='blue',xlab="age",ylab="Ratio", ylim = c(0, 4))
legend("topright",c("male","female"),lty=c(1,1),col=c("blue","red"))
lines(40:110,ratio[,2],type='l',col='red')
title(cex.main=0.5,"Ratio of simulated to expected (life table) mortality, by sex and age")
difference <- (get_output_ex()$n_death_by_age_sex[41:91, ]/get_output_ex()$sum_time_by_age_sex[41:91, ]) - model_input$values$agent$p_bgd_by_sex[41:91,
]
plot(40:90, difference[, 1], type = "l", col = "blue", xlab = "age", ylab = "Difference", ylim = c(-.1, .1))
legend("topright", c("male", "female"), lty = c(1, 1), col = c("blue", "red"))
lines(40:90, difference[, 2], type = "l", col = "red")
title(cex.main = 0.5, "Difference between simulated and expected (life table) mortality, by sex and age")
return(list(difference = difference))
} else message("No death occured.\n")
}
# validate_comorbidity removed - MI/stroke/HF deprecated
#' Returns results of validation tests for lung function
#'
#' This function evaluates FEV1 (Forced Expiratory Volume in 1 second) values
#' and GOLD stage distributions to assess lung function in simulated patients.
#' @param jurisdiction character string specifying the jurisdiction ("canada" or "us"). Default is "canada"
#' @return validation test results
#' @export
validate_lung_function <- function(jurisdiction = "canada") {
message("This function examines FEV1 values\n")
# Check jurisdiction
if (tolower(jurisdiction) != "canada") {
stop("Validation for jurisdiction '", jurisdiction, "' has not been implemented yet. Currently only 'canada' is supported.")
}
petoc()
settings <- default_settings
settings$record_mode <- record_mode["record_mode_some_event"]
settings$events_to_record <- events[c("event_start", "event_COPD", "event_fixed")]
settings$agent_stack_size <- 0
settings$n_base_agents <- 1e+05
settings$event_stack_size <- settings$n_base_agents * 100
init_session(settings = settings)
input <- model_input$values
input$global_parameters$discount_qaly <- 0
run(input = input)
all_events <- as.data.frame(get_all_events_matrix())
COPD_events <- which(all_events[, "event"] == events["event_COPD"])
start_events <- which(all_events[, "event"] == events["event_start"])
out_FEV1_prev <- sqldf::sqldf(paste("SELECT gold, AVG(FEV1) AS 'Mean', STDEV(FEV1) AS 'SD' FROM all_events WHERE event=", events["event_start"],
" GROUP BY gold"))
out_FEV1_inc <- sqldf::sqldf(paste("SELECT gold, AVG(FEV1) AS 'Mean', STDEV(FEV1) AS 'SD' FROM all_events WHERE event=", events["event_COPD"],
" GROUP BY gold"))
out_gold_prev <- sqldf::sqldf(paste("SELECT gold, COUNT(*) AS N FROM all_events WHERE event=", events["event_start"], " GROUP BY gold"))
out_gold_prev[, "Percent"] <- round(out_gold_prev[, "N"]/sum(out_gold_prev[, "N"]), 3)
out_gold_inc <- sqldf::sqldf(paste("SELECT gold, COUNT(*) AS N FROM all_events WHERE event=", events["event_COPD"], " GROUP BY gold"))
out_gold_inc[, "Percent"] <- round(out_gold_inc[, "N"]/sum(out_gold_inc[, "N"]), 3)
COPD_events_patients <- subset(all_events, event == 4)
start_events_patients <- subset(all_events, event == 0 & gold > 0)
table(COPD_events_patients[, "gold"])/sum(table(COPD_events_patients[, "gold"]))
table(start_events_patients[, "gold"])/sum(table(start_events_patients[, "gold"]))
out_gold_inc_patients <- table(COPD_events_patients[, "gold"])/sum(table(COPD_events_patients[, "gold"]))
out_gold_prev_patients <- table(start_events_patients[, "gold"])/sum(table(start_events_patients[, "gold"]))
COPD_ids <- all_events[COPD_events, "id"]
for (i in 1:100) {
y <- which(all_events[, "id"] == COPD_ids[i] & all_events[, "gold"] > 0)
if (i == 1)
plot(all_events[y, "local_time"], all_events[y, "FEV1"], type = "l", xlim = c(0, 20), ylim = c(0, 5), xlab = "local time",
ylab = "FEV1") else lines(all_events[y, "local_time"], all_events[y, "FEV1"], type = "l")
}
title(cex.main = 0.5, "Trajectories of FEV1 in 100 individuals")
return(list(FEV1_prev = out_FEV1_prev, FEV1_inc = out_FEV1_inc, gold_prev = out_gold_prev, gold_inc = out_gold_inc, gold_prev_patients = out_gold_prev_patients,
gold_inc_patients = out_gold_inc_patients))
}
#' Returns results of validation tests for exacerbation rates
#'
#' This function runs validation tests for COPD exacerbation rates by GOLD stage
#' and compares them with reference values from studies such as CanCOLD, CIHI,
#' and Hoogendoorn. It simulates exacerbation events, and returns key metrics,
#' including overall exacerbation rates, rates by GOLD stage, and rates in
#' diagnosed vs. undiagnosed patients.
#' @param base_agents Number of agents in the simulation. Default is 1e4.
#' @param input EPIC inputs
#' @param jurisdiction character string specifying the jurisdiction for validation ("canada" or "us"). Default is "canada".
#' @return For Canada: validation test results (invisible). For US: invisible NULL (results displayed via messages and plots).
#' @export
validate_exacerbation <- function(base_agents=1e4, input=NULL, jurisdiction = "canada") {
# Handle US jurisdiction
if (tolower(jurisdiction) == "us") {
settings <- get_default_settings()
settings$record_mode <- record_mode["record_mode_event"]
settings$n_base_agents <- base_agents
results <- simulate(settings = settings, jurisdiction = "us", extended_results = TRUE, return_events = TRUE)
op <- results$basic
output_ex <- results$extended
all_events <- results$events
exac_events <- subset(all_events, event == 5)
exit_events <- subset(all_events, event == 14)
Follow_up_GOLD <- c(0, 0, 0, 0)
last_GOLD_transition_time <- 0
for (i in 2:dim(all_events)[1]) {
if (all_events[i, "id"] != all_events[i - 1, "id"])
last_GOLD_transition_time <- 0
if ((all_events[i, "id"] == all_events[i - 1, "id"]) & (all_events[i, "gold"] != all_events[i - 1, "gold"])) {
Follow_up_GOLD[all_events[i - 1, "gold"]] = Follow_up_GOLD[all_events[i - 1, "gold"]] + all_events[i - 1, "followup_after_COPD"] -
last_GOLD_transition_time
last_GOLD_transition_time <- all_events[i - 1, "followup_after_COPD"]
}
if (all_events[i, "event"] == 14)
Follow_up_GOLD[all_events[i, "gold"]] = Follow_up_GOLD[all_events[i, "gold"]] + all_events[i, "followup_after_COPD"] -
last_GOLD_transition_time
}
#----------------------------DIAGNOSED ------------------------------------
#-------------------------------------------------------------------------
all_events_diagnosed <- subset(all_events, diagnosis > 0 & gold > 0 )
exac_events_diagnosed <- subset(all_events_diagnosed, event == 5 )
sev_exac_events_diagnosed <- subset(all_events_diagnosed, event == 5 & (exac_status == 3 | exac_status == 4) )
mod_sev_exac_events_diagnosed <- subset(all_events_diagnosed, event == 5 & (exac_status == 3 | exac_status == 4 | exac_status == 2) )
exit_events_diagnosed <- subset(all_events_diagnosed, event == 14)
Follow_up_GOLD_diagnosed <- c(0, 0, 0, 0)
last_GOLD_transition_time_diagnosed <- 0
for (i in 2:dim(all_events_diagnosed)[1]) {
if ((all_events_diagnosed[i, "id"] != all_events_diagnosed[i - 1, "id"]))
last_GOLD_transition_time_diagnosed <- 0
if ((all_events_diagnosed[i, "id"] == all_events_diagnosed[i - 1, "id"]) & (all_events_diagnosed[i, "gold"] != all_events_diagnosed[i - 1, "gold"])) {
Follow_up_GOLD_diagnosed[all_events_diagnosed[i - 1, "gold"]] = Follow_up_GOLD_diagnosed[all_events_diagnosed[i - 1, "gold"]] + (all_events_diagnosed[i - 1, "local_time"]-all_events_diagnosed[i - 1, "time_at_diagnosis"]) -
last_GOLD_transition_time_diagnosed
last_GOLD_transition_time_diagnosed <- (all_events_diagnosed[i - 1, "local_time"]-all_events_diagnosed[i - 1, "time_at_diagnosis"])
}
if (all_events_diagnosed[i, "event"] == 14)
Follow_up_GOLD_diagnosed[all_events_diagnosed[i, "gold"]] = Follow_up_GOLD_diagnosed[all_events_diagnosed[i, "gold"]] + (all_events_diagnosed[i, "local_time"]-all_events_diagnosed[i, "time_at_diagnosis"]) -
last_GOLD_transition_time_diagnosed
}
#----------------------------UNDIAGNOSED ------------------------------------
#-------------------------------------------------------------------------
all_events_undiagnosed <- subset(all_events, diagnosis == 0 & gold > 0 & gold < 3) #CanCOLD is only GOLD 1 and 2
exac_events_undiagnosed <- subset(all_events_undiagnosed, event == 5 )
sev_exac_events_undiagnosed <- subset(all_events_undiagnosed, event == 5 & (exac_status == 3 | exac_status == 4) )
mod_sev_exac_events_undiagnosed <- subset(all_events_undiagnosed, event == 5 & (exac_status == 3 | exac_status == 4 | exac_status == 2) )
exit_events_undiagnosed <- subset(all_events_undiagnosed, event == 14)
Follow_up_GOLD_undiagnosed <- c(0, 0, 0, 0)
last_GOLD_transition_time_undiagnosed <- 0
for (i in 2:dim(all_events_undiagnosed)[1]) {
if ((all_events_undiagnosed[i, "id"] != all_events_undiagnosed[i - 1, "id"]))
last_GOLD_transition_time_undiagnosed <- 0
if ((all_events_undiagnosed[i, "id"] == all_events_undiagnosed[i - 1, "id"]) & (all_events_undiagnosed[i, "gold"] != all_events_undiagnosed[i - 1, "gold"])) {
Follow_up_GOLD_undiagnosed[all_events_undiagnosed[i - 1, "gold"]] = Follow_up_GOLD_undiagnosed[all_events_undiagnosed[i - 1, "gold"]] + (all_events_undiagnosed[i - 1, "local_time"]-all_events_undiagnosed[i - 1, "time_at_diagnosis"]) -
last_GOLD_transition_time_undiagnosed
last_GOLD_transition_time_undiagnosed <- (all_events_undiagnosed[i - 1, "local_time"]-all_events_undiagnosed[i - 1, "time_at_diagnosis"])
}
if (all_events_undiagnosed[i, "event"] == 14)
Follow_up_GOLD_undiagnosed[all_events_undiagnosed[i, "gold"]] = Follow_up_GOLD_undiagnosed[all_events_undiagnosed[i, "gold"]] + (all_events_undiagnosed[i, "local_time"]-all_events_undiagnosed[i, "time_at_diagnosis"]) -
last_GOLD_transition_time_undiagnosed
}
terminate_session()
#----------------------------All ------------------------------------
#-------------------------------------------------------------------------
message("Exacerbation Rates per GOLD stages for all patients:")
GOLD_I <- (as.data.frame(table(exac_events[, "gold"]))[1, 2]/Follow_up_GOLD[1])
GOLD_II <- (as.data.frame(table(exac_events[, "gold"]))[2, 2]/Follow_up_GOLD[2])
GOLD_III <- (as.data.frame(table(exac_events[, "gold"]))[3, 2]/Follow_up_GOLD[3])
GOLD_IV <- (as.data.frame(table(exac_events[, "gold"]))[4, 2]/Follow_up_GOLD[4])
message(paste0("exacRateGOLDI = ", round(GOLD_I , 2)))
message(paste0("exacRateGOLDII = ", round(GOLD_II , 2)))
message(paste0("exacRateGOLDIII = ", round(GOLD_III, 2)))
message(paste0("exacRateGOLDIV = ", round(GOLD_IV , 2)))
#----------------------------All ------------------------------------
#-------------------------------------------------------------------------
total_rate <- round(nrow(exac_events)/sum(Follow_up_GOLD), 2)
Exac_per_GOLD <- matrix (NA, nrow = 3, ncol =3)
colnames(Exac_per_GOLD) <- c("GOLD", "EPIC", "CanCOLD")
# CanCOLD only available for GOLD 1 and 2. See doi: 10.1164/rccm.201509-1795OC
Follow_up_GOLD_all_2level <- c(Follow_up_GOLD[1], Follow_up_GOLD[2]) # Because CanCOLD is mostly GOLD2, here we compare EPIC's GOLD2 only instead of GOLD2+
# Follow_up_GOLD_all_2level <- c(Follow_up_GOLD[1], sum(Follow_up_GOLD[2:4]))
GOLD_counts_all <- as.data.frame(table(exac_events[, "gold"]))[, 2]
GOLD_counts_all <- c(GOLD_counts_all[1], sum(GOLD_counts_all[2:4]))
Exac_per_GOLD[1:3, 1] <- c("total", "gold1", "gold2+")
Exac_per_GOLD[1:3, 2] <- c(total_rate,
round(x=GOLD_counts_all/Follow_up_GOLD_all_2level,
digits = 2))
Exac_per_GOLD[1:3, 3] <- c(0.39, 0.28, 0.53)
df <- as.data.frame(Exac_per_GOLD)
dfm <- melt(df[,c("GOLD", "EPIC", "CanCOLD")],id.vars = 1)
plot <-
ggplot(dfm, aes(x = GOLD, y = as.numeric(value))) +
scale_y_continuous(breaks = seq(0, 3, by = 0.5)) +
theme_tufte(base_size=14, ticks=FALSE) +
geom_bar(aes(fill = variable), stat = "identity", position = "dodge") +
ylab ("Rate") +
labs(caption = "Total rate of exacerbations per year for all patients")
print(plot)
message("Total rate of exacerbation in all patients (0.39 per year in CanCOLD): ", total_rate)
#--------------------------- total number of severe exacerbations:
message("Is the rate of severe and very severe exacerbations around 510 per Ford et al. 2015?")
n_exac <- data.frame(year= 1:20,Severe_Exacerbations = (output_ex$n_exac_by_ctime_severity[,3]+output_ex$n_exac_by_ctime_severity[,4])* (100000/rowSums(output_ex$n_alive_by_ctime_sex)))
avgRate_sevExac <- mean(n_exac$Severe_Exacerbations[round(nrow(n_exac)/2,0):nrow(n_exac)])
avgRate_sevExac <- round(avgRate_sevExac, 2)
message(paste0("Average rate during 20 years: ", avgRate_sevExac))
rate2017_sevExac <-(output_ex$n_exac_by_ctime_severity[3,3]+output_ex$n_exac_by_ctime_severity[3,4])*(100000/sum(output_ex$n_alive_by_ctime_sex[3,]))
rate2017_sevExac <- round(rate2017_sevExac, 2)
message(paste0("Rate in 2017: ", rate2017_sevExac))
#----------------------------Diagnosed ------------------------------------
#-------------------------------------------------------------------------
Exac_per_GOLD_diagnosed <- matrix (NA, nrow = 4, ncol = 3)
colnames(Exac_per_GOLD_diagnosed) <- c("GOLD", "EPIC", "Hoogendoorn")
# ACCEPT data is rates from a join of ECLIPSE, MACRO, OPTIMAL and STATCOPE.
Exac_per_GOLD_diagnosed[1:4, 1] <- c("gold1", "gold2", "gold3", "gold4")
Exac_per_GOLD_diagnosed[1:4, 2] <- round(
x=as.data.frame(table(exac_events_diagnosed[, "gold"]))[, 2]/
Follow_up_GOLD_diagnosed, digits = 2)
Exac_per_GOLD_diagnosed[1:4, 3] <- c(0.82, 1.17, 1.61, 2.10)
df <- as.data.frame(Exac_per_GOLD_diagnosed)
dfm <- melt(df[,c("GOLD", "EPIC", "Hoogendoorn")],id.vars = 1)
plot <- ggplot(dfm, aes(x = GOLD, y = as.numeric(value))) +
scale_y_continuous(breaks = seq(0, 3, by = 0.5)) +
theme_tufte(base_size=14, ticks=FALSE) +
geom_bar(aes(fill = variable), stat = "identity", position = "dodge") +
ylab ("Rate") +
labs(caption = "Total rate of exacerbations per year for diagnosed patients")
print(plot)
message("Total rate of exacerbation in diagnosed patients (1.5 per year in Hoogendoorn): ", round(nrow(exac_events_diagnosed)/sum(Follow_up_GOLD_diagnosed), 2))
#----------------------------Diagnosed Moderate and Severe------------------------------------
#-------------------------------------------------------------------------
# Updated October 14, 2025
Exac_per_GOLD_diagnosed <- matrix (NA, nrow = 4, ncol = 3)
colnames(Exac_per_GOLD_diagnosed) <- c("GOLD", "EPIC", "Wallace 2019")
Exac_per_GOLD_diagnosed[1:4, 1] <- c("gold1", "gold2", "gold3", "gold4")
Exac_per_GOLD_diagnosed[1:4, 2] <- round(
x=as.data.frame(table(mod_sev_exac_events_diagnosed[, "gold"]))[, 2]/
Follow_up_GOLD_diagnosed, digits = 2)
Exac_per_GOLD_diagnosed[1:4, 3] <- c(0.404, 0.489, 0.836, 0.891)
df <- as.data.frame(Exac_per_GOLD_diagnosed)
dfm <- melt(df[,c("GOLD", "EPIC", "Wallace 2019")],id.vars = 1)
plot <-
ggplot(dfm, aes(x = GOLD, y = as.numeric(value))) +
scale_y_continuous(breaks = seq(0, 3, by = 0.5)) +
theme_tufte(base_size=14, ticks=FALSE) +
geom_bar(aes(fill = variable), stat = "identity", position = "dodge") +
ylab ("Rate") +
labs(caption = "Total rate of moderate/severe exacerbations per year for diagnosed patients")
print(plot)
#----------------------------Diagnosed Severe------------------------------------
#-------------------------------------------------------------------------
Exac_per_GOLD_diagnosed <- matrix (NA, nrow = 4, ncol = 3)
colnames(Exac_per_GOLD_diagnosed) <- c("GOLD", "EPIC", "Wallace 2019")
Exac_per_GOLD_diagnosed[1:4, 1] <- c("gold1", "gold2", "gold3", "gold4")
Exac_per_GOLD_diagnosed[1:4, 2] <- round(
x=as.data.frame(table(sev_exac_events_diagnosed[, "gold"]))[, 2]/
Follow_up_GOLD_diagnosed, digits = 2)
Exac_per_GOLD_diagnosed[1:4, 3] <- c(0.12, 0.139, 0.254, 0.422)
df <- as.data.frame(Exac_per_GOLD_diagnosed)
dfm <- melt(df[,c("GOLD", "EPIC", "Wallace 2019")],id.vars = 1)
plot <-
ggplot(dfm, aes(x = GOLD, y = as.numeric(value))) +
scale_y_continuous(breaks = seq(0, 3, by = 0.5)) +
theme_tufte(base_size=14, ticks=FALSE) +
geom_bar(aes(fill = variable), stat = "identity", position = "dodge") +
ylab ("Rate") +
labs(caption = "Total rate of severe exacerbations per year for diagnosed patients")
print(plot)
#----------------------------Undiagnosed ------------------------------------
#----------------------------------------------------------------------------
total_rate_undiagnosed <- round(nrow(exac_events_undiagnosed)/sum(Follow_up_GOLD_undiagnosed), 2)
Exac_per_GOLD_undiagnosed <- matrix (NA, nrow = 3, ncol = 3)
colnames(Exac_per_GOLD_undiagnosed) <- c("GOLD", "EPIC", "CanCOLD")
Exac_per_GOLD_undiagnosed[1:3, 1] <- c("total", "gold1", "gold2+")
Follow_up_GOLD_undiagnosed_2level <- c(Follow_up_GOLD_undiagnosed[1],
Follow_up_GOLD_undiagnosed[2]) #Because CanCOLD is mostly GOLD2, we compare to GOLD2 EPIC
#Follow_up_GOLD_undiagnosed_2level <- c(Follow_up_GOLD_undiagnosed[1], sum(Follow_up_GOLD_undiagnosed[2:4]))
GOLD_counts_undiagnosed <- as.data.frame(table(exac_events_undiagnosed[, "gold"]))[, 2]
GOLD_counts_undiagnosed <- c(GOLD_counts_undiagnosed[1],
GOLD_counts_undiagnosed[2])
Exac_per_GOLD_undiagnosed[1:3, 2] <- c(total_rate_undiagnosed,
round(x=GOLD_counts_undiagnosed/Follow_up_GOLD_undiagnosed_2level, digits = 2))
Exac_per_GOLD_undiagnosed[1:3, 3] <- c(0.30, 0.24, 0.40)
df <- as.data.frame(Exac_per_GOLD_undiagnosed)
dfm <- melt(df[,c("GOLD", "EPIC", "CanCOLD")],id.vars = 1)
plot <-
ggplot(dfm, aes(x = GOLD, y = as.numeric(value))) +
scale_y_continuous(breaks = seq(0, 3, by = 0.5)) +
theme_tufte(base_size=14, ticks=FALSE) +
geom_bar(aes(fill = variable), stat = "identity", position = "dodge") +
ylab ("Rate") +
labs(caption = "Total rate of exacerbations per year for undiagnosed patients")
print(plot)
message("Total rate of exacerbation in undiagnosed patients (0.30 per year in CanCOLD): ",
total_rate_undiagnosed)
return(invisible(NULL))
}
# Check for unsupported jurisdictions
if (tolower(jurisdiction) != "canada") {
stop("Validation for jurisdiction '", jurisdiction, "' has not been implemented yet. Currently only 'canada' and 'us' are supported.")
}
# Canada implementation (existing code)
settings <- default_settings
settings$record_mode <- record_mode["record_mode_event"]
#settings$agent_stack_size <- 0
settings$n_base_agents <- base_agents
#settings$event_stack_size <- 1
init_session(settings = settings)
if (is.null(input)) {input <- model_input$values}
run(input = input)
op <- get_output()
output_ex <- get_output_ex()
all_events <- as.data.frame(get_all_events_matrix())
exac_events <- subset(all_events, event == 5)
exit_events <- subset(all_events, event == 14)
Follow_up_GOLD <- c(0, 0, 0, 0)
last_GOLD_transition_time <- 0
for (i in 2:dim(all_events)[1]) {
if (all_events[i, "id"] != all_events[i - 1, "id"])
last_GOLD_transition_time <- 0
if ((all_events[i, "id"] == all_events[i - 1, "id"]) & (all_events[i, "gold"] != all_events[i - 1, "gold"])) {
Follow_up_GOLD[all_events[i - 1, "gold"]] = Follow_up_GOLD[all_events[i - 1, "gold"]] + all_events[i, "followup_after_COPD"] -
last_GOLD_transition_time
last_GOLD_transition_time <- all_events[i, "followup_after_COPD"]
}
if (all_events[i, "event"] == 14)
Follow_up_GOLD[all_events[i, "gold"]] = Follow_up_GOLD[all_events[i, "gold"]] + all_events[i, "followup_after_COPD"] -
last_GOLD_transition_time
}
#----------------------------DIAGNOSED ------------------------------------
#-------------------------------------------------------------------------
all_events_diagnosed <- subset(all_events, diagnosis > 0 & gold > 0 )
exac_events_diagnosed <- subset(all_events_diagnosed, event == 5 )
sev_exac_events_diagnosed <- subset(all_events_diagnosed, event == 5 & (exac_status == 3 | exac_status == 4) )
mod_sev_exac_events_diagnosed <- subset(all_events_diagnosed, event == 5 & (exac_status == 3 | exac_status == 4 | exac_status == 2) )
exit_events_diagnosed <- subset(all_events_diagnosed, event == 14)
Follow_up_GOLD_diagnosed <- c(0, 0, 0, 0)
last_GOLD_transition_time_diagnosed <- 0
for (i in 2:dim(all_events_diagnosed)[1]) {
if ((all_events_diagnosed[i, "id"] != all_events_diagnosed[i - 1, "id"]))
last_GOLD_transition_time_diagnosed <- 0
if ((all_events_diagnosed[i, "id"] == all_events_diagnosed[i - 1, "id"]) & (all_events_diagnosed[i, "gold"] != all_events_diagnosed[i - 1, "gold"])) {
Follow_up_GOLD_diagnosed[all_events_diagnosed[i - 1, "gold"]] = Follow_up_GOLD_diagnosed[all_events_diagnosed[i - 1, "gold"]] + (all_events_diagnosed[i, "local_time"]-all_events_diagnosed[i, "time_at_diagnosis"]) -
last_GOLD_transition_time_diagnosed
last_GOLD_transition_time_diagnosed <- (all_events_diagnosed[i, "local_time"]-all_events_diagnosed[i, "time_at_diagnosis"])
}
if (all_events_diagnosed[i, "event"] == 14)
Follow_up_GOLD_diagnosed[all_events_diagnosed[i, "gold"]] = Follow_up_GOLD_diagnosed[all_events_diagnosed[i, "gold"]] + (all_events_diagnosed[i, "local_time"]-all_events_diagnosed[i, "time_at_diagnosis"]) -
last_GOLD_transition_time_diagnosed
}
#----------------------------UNDIAGNOSED ------------------------------------
#-------------------------------------------------------------------------
all_events_undiagnosed <- subset(all_events, diagnosis == 0 & gold > 0 & gold < 3) #CanCOLD is only GOLD 1 and 2
exac_events_undiagnosed <- subset(all_events_undiagnosed, event == 5 )
sev_exac_events_undiagnosed <- subset(all_events_undiagnosed, event == 5 & (exac_status == 3 | exac_status == 4) )
mod_sev_exac_events_undiagnosed <- subset(all_events_undiagnosed, event == 5 & (exac_status == 3 | exac_status == 4 | exac_status == 2) )
exit_events_undiagnosed <- subset(all_events_undiagnosed, event == 14)
Follow_up_GOLD_undiagnosed <- c(0, 0, 0, 0)
last_GOLD_transition_time_undiagnosed <- 0
for (i in 2:dim(all_events_undiagnosed)[1]) {
if ((all_events_undiagnosed[i, "id"] != all_events_undiagnosed[i - 1, "id"]))
last_GOLD_transition_time_undiagnosed <- 0
if ((all_events_undiagnosed[i, "id"] == all_events_undiagnosed[i - 1, "id"]) & (all_events_undiagnosed[i, "gold"] != all_events_undiagnosed[i - 1, "gold"])) {
Follow_up_GOLD_undiagnosed[all_events_undiagnosed[i - 1, "gold"]] = Follow_up_GOLD_undiagnosed[all_events_undiagnosed[i - 1, "gold"]] + (all_events_undiagnosed[i, "local_time"]-all_events_undiagnosed[i, "time_at_diagnosis"]) -
last_GOLD_transition_time_undiagnosed
last_GOLD_transition_time_undiagnosed <- (all_events_undiagnosed[i, "local_time"]-all_events_undiagnosed[i, "time_at_diagnosis"])
}
if (all_events_undiagnosed[i, "event"] == 14)
Follow_up_GOLD_undiagnosed[all_events_undiagnosed[i, "gold"]] = Follow_up_GOLD_undiagnosed[all_events_undiagnosed[i, "gold"]] + (all_events_undiagnosed[i, "local_time"]-all_events_undiagnosed[i, "time_at_diagnosis"]) -
last_GOLD_transition_time_undiagnosed
}
terminate_session()
#----------------------------All ------------------------------------
#-------------------------------------------------------------------------
message("Exacerbation Rates per GOLD stages for all patients:")
# Count exacerbations by GOLD stage - use named vector to avoid indexing errors
exac_counts_by_gold <- table(exac_events[, "gold"])
GOLD_I <- ifelse(Follow_up_GOLD[1] > 0,
as.numeric(exac_counts_by_gold["1"]) / Follow_up_GOLD[1],
0)
GOLD_II <- ifelse(Follow_up_GOLD[2] > 0,
as.numeric(exac_counts_by_gold["2"]) / Follow_up_GOLD[2],
0)
GOLD_III <- ifelse(Follow_up_GOLD[3] > 0,
as.numeric(exac_counts_by_gold["3"]) / Follow_up_GOLD[3],
0)
GOLD_IV <- ifelse(Follow_up_GOLD[4] > 0,
as.numeric(exac_counts_by_gold["4"]) / Follow_up_GOLD[4],
0)
# Replace NA with 0 (happens when there are no exacerbations for that GOLD stage)
GOLD_I <- ifelse(is.na(GOLD_I), 0, GOLD_I)
GOLD_II <- ifelse(is.na(GOLD_II), 0, GOLD_II)
GOLD_III <- ifelse(is.na(GOLD_III), 0, GOLD_III)
GOLD_IV <- ifelse(is.na(GOLD_IV), 0, GOLD_IV)
message(paste0("exacRateGOLDI = ", round(GOLD_I , 2)))
message(paste0("exacRateGOLDII = ", round(GOLD_II , 2)))
message(paste0("exacRateGOLDIII = ", round(GOLD_III, 2)))
message(paste0("exacRateGOLDIV = ", round(GOLD_IV , 2)))
#----------------------------All ------------------------------------
#-------------------------------------------------------------------------
total_rate <- round(nrow(exac_events)/sum(Follow_up_GOLD), 2)
Exac_per_GOLD <- matrix (NA, nrow = 3, ncol =3)
colnames(Exac_per_GOLD) <- c("GOLD", "EPIC", "CanCOLD")
# CanCOLD only available for GOLD 1 and 2. See doi: 10.1164/rccm.201509-1795OC
Follow_up_GOLD_all_2level <- c(Follow_up_GOLD[1], Follow_up_GOLD[2]) # Because CanCOLD is mostly GOLD2, here we compare EPIC's GOLD2 only instead of GOLD2+
# Follow_up_GOLD_all_2level <- c(Follow_up_GOLD[1], sum(Follow_up_GOLD[2:4]))
# Use named access to avoid indexing errors
exac_table <- table(exac_events[, "gold"])
GOLD_counts_all <- c(
as.numeric(exac_table["1"]),
sum(as.numeric(exac_table[c("2", "3", "4")]), na.rm = TRUE)
)
# Replace NA with 0
GOLD_counts_all[is.na(GOLD_counts_all)] <- 0
Exac_per_GOLD[1:3, 1] <- c("total", "gold1", "gold2+")
Exac_per_GOLD[1:3, 2] <- c(total_rate,
round(x=GOLD_counts_all/Follow_up_GOLD_all_2level,
digits = 2))
Exac_per_GOLD[1:3, 3] <- c(0.39, 0.28, 0.53)
df <- as.data.frame(Exac_per_GOLD)
dfm <- melt(df[,c("GOLD", "EPIC", "CanCOLD")],id.vars = 1)
plot <-
ggplot(dfm, aes(x = GOLD, y = as.numeric(value))) +
scale_y_continuous(breaks = seq(0, 3, by = 0.5)) +
theme_tufte(base_size=14, ticks=FALSE) +
geom_bar(aes(fill = variable), stat = "identity", position = "dodge") +
ylab ("Rate") +
labs(caption = "Total rate of exacerbations per year for all patients")
print(plot)
message("Total rate of exacerbation in all patients (0.39 per year in CanCOLD): ", total_rate)
#--------------------------- total number of severe exacerbations:
message("Is the rate of severe and very severe exacerbations around 477 per CIHI?")
n_exac <- data.frame(year= 1:20,Severe_Exacerbations = (output_ex$n_exac_by_ctime_severity[,3]+output_ex$n_exac_by_ctime_severity[,4])* (100000/rowSums(output_ex$n_alive_by_ctime_sex)))
avgRate_sevExac <- mean(n_exac$Severe_Exacerbations[round(nrow(n_exac)/2,0):nrow(n_exac)])
avgRate_sevExac <- round(avgRate_sevExac, 2)
message(paste0("Average rate during 20 years: ", avgRate_sevExac))
rate2017_sevExac <-(output_ex$n_exac_by_ctime_severity[3,3]+output_ex$n_exac_by_ctime_severity[3,4])*(100000/sum(output_ex$n_alive_by_ctime_sex[3,]))
rate2017_sevExac <- round(rate2017_sevExac, 2)
message(paste0("Rate in 2017: ", rate2017_sevExac))
#----------------------------Diagnosed ------------------------------------
#-------------------------------------------------------------------------
Exac_per_GOLD_diagnosed <- matrix (NA, nrow = 4, ncol = 3)
colnames(Exac_per_GOLD_diagnosed) <- c("GOLD", "EPIC", "Hoogendoorn")
# ACCEPT data is rates from a join of ECLIPSE, MACRO, OPTIMAL and STATCOPE.
Exac_per_GOLD_diagnosed[1:4, 1] <- c("gold1", "gold2", "gold3", "gold4")
# Use named access to avoid indexing errors
exac_diag_table <- table(exac_events_diagnosed[, "gold"])
exac_diag_rates <- sapply(1:4, function(i) {
count <- as.numeric(exac_diag_table[as.character(i)])
if (is.na(count)) count <- 0
if (Follow_up_GOLD_diagnosed[i] > 0) {
return(count / Follow_up_GOLD_diagnosed[i])
} else {
return(0)
}
})
Exac_per_GOLD_diagnosed[1:4, 2] <- round(exac_diag_rates, digits = 2)
Exac_per_GOLD_diagnosed[1:4, 3] <- c(0.82, 1.17, 1.61, 2.10)
df <- as.data.frame(Exac_per_GOLD_diagnosed)
dfm <- melt(df[,c("GOLD", "EPIC", "Hoogendoorn")],id.vars = 1)
plot <- ggplot(dfm, aes(x = GOLD, y = as.numeric(value))) +
scale_y_continuous(breaks = seq(0, 3, by = 0.5)) +
theme_tufte(base_size=14, ticks=FALSE) +
geom_bar(aes(fill = variable), stat = "identity", position = "dodge") +
ylab ("Rate") +
labs(caption = "Total rate of exacerbations per year for diagnosed patients")
print(plot)
message("Total rate of exacerbation in diagnosed patients (1.5 per year in Hoogendoorn): ", round(nrow(exac_events_diagnosed)/sum(Follow_up_GOLD_diagnosed), 2))
#----------------------------Diagnosed Moderate and Severe------------------------------------
#-------------------------------------------------------------------------
Exac_per_GOLD_diagnosed <- matrix (NA, nrow = 4, ncol = 3)
colnames(Exac_per_GOLD_diagnosed) <- c("GOLD", "EPIC", "ACCEPT")
# ACCEPT data is rates from a join of ECLIPSE, MACRO, OPTIMAL and STATCOPE.
Exac_per_GOLD_diagnosed[1:4, 1] <- c("gold1", "gold2", "gold3", "gold4")
# Use named access to avoid indexing errors
mod_sev_diag_table <- table(mod_sev_exac_events_diagnosed[, "gold"])
mod_sev_diag_rates <- sapply(1:4, function(i) {
count <- as.numeric(mod_sev_diag_table[as.character(i)])
if (is.na(count)) count <- 0
if (Follow_up_GOLD_diagnosed[i] > 0) {
return(count / Follow_up_GOLD_diagnosed[i])
} else {
return(0)
}
})
Exac_per_GOLD_diagnosed[1:4, 2] <- round(mod_sev_diag_rates, digits = 2)
Exac_per_GOLD_diagnosed[1:4, 3] <- c(0.58, 0.91, 1.41, 1.69)
df <- as.data.frame(Exac_per_GOLD_diagnosed)
dfm <- melt(df[,c("GOLD", "EPIC", "ACCEPT")],id.vars = 1)
plot <-
ggplot(dfm, aes(x = GOLD, y = as.numeric(value))) +
scale_y_continuous(breaks = seq(0, 3, by = 0.5)) +
theme_tufte(base_size=14, ticks=FALSE) +
geom_bar(aes(fill = variable), stat = "identity", position = "dodge") +
ylab ("Rate") +
labs(caption = "Total rate of moderate/severe exacerbations per year for diagnosed patients")
print(plot)
#----------------------------Diagnosed Severe------------------------------------
#-------------------------------------------------------------------------
Exac_per_GOLD_diagnosed <- matrix (NA, nrow = 4, ncol = 4)
colnames(Exac_per_GOLD_diagnosed) <- c("GOLD", "EPIC", "Hoogendoorn", "ACCEPT")
# ACCEPT data is rates from a join of ECLIPSE, MACRO, OPTIMAL and STATCOPE.
Exac_per_GOLD_diagnosed[1:4, 1] <- c("gold1", "gold2", "gold3", "gold4")
# Use named access to avoid indexing errors
sev_diag_table <- table(sev_exac_events_diagnosed[, "gold"])
sev_diag_rates <- sapply(1:4, function(i) {
count <- as.numeric(sev_diag_table[as.character(i)])
if (is.na(count)) count <- 0
if (Follow_up_GOLD_diagnosed[i] > 0) {
return(count / Follow_up_GOLD_diagnosed[i])
} else {
return(0)
}
})
Exac_per_GOLD_diagnosed[1:4, 2] <- round(sev_diag_rates, digits = 2)
Exac_per_GOLD_diagnosed[1:4, 3] <- c(0.11, 0.16, 0.22, 0.28)
Exac_per_GOLD_diagnosed[1:4, 4] <- c(0.10, 0.14, 0.32, 0.42)
df <- as.data.frame(Exac_per_GOLD_diagnosed)
dfm <- melt(df[,c("GOLD", "EPIC", "Hoogendoorn", "ACCEPT")],id.vars = 1)
plot <-
ggplot(dfm, aes(x = GOLD, y = as.numeric(value))) +
scale_y_continuous(breaks = seq(0, 3, by = 0.5)) +
theme_tufte(base_size=14, ticks=FALSE) +
geom_bar(aes(fill = variable), stat = "identity", position = "dodge") +
ylab ("Rate") +
labs(caption = "Total rate of severe exacerbations per year for diagnosed patients")
print(plot)
#----------------------------Undiagnosed ------------------------------------
#----------------------------------------------------------------------------
total_rate_undiagnosed <- round(nrow(exac_events_undiagnosed)/sum(Follow_up_GOLD_undiagnosed), 2)
Exac_per_GOLD_undiagnosed <- matrix (NA, nrow = 3, ncol = 3)
colnames(Exac_per_GOLD_undiagnosed) <- c("GOLD", "EPIC", "CanCOLD")
Exac_per_GOLD_undiagnosed[1:3, 1] <- c("total", "gold1", "gold2+")
Follow_up_GOLD_undiagnosed_2level <- c(Follow_up_GOLD_undiagnosed[1],
Follow_up_GOLD_undiagnosed[2]) #Because CANCold is mostly GOLD2, we comprare to GOLD2 EPIC
#Follow_up_GOLD_undiagnosed_2level <- c(Follow_up_GOLD_undiagnosed[1], sum(Follow_up_GOLD_undiagnosed[2:4]))
# Use named access to avoid indexing errors
exac_undiag_table <- table(exac_events_undiagnosed[, "gold"])
GOLD_counts_undiagnosed <- c(
as.numeric(exac_undiag_table["1"]),
as.numeric(exac_undiag_table["2"])
)
# Replace NA with 0
GOLD_counts_undiagnosed[is.na(GOLD_counts_undiagnosed)] <- 0
Exac_per_GOLD_undiagnosed[1:3, 2] <- c(total_rate_undiagnosed,
round(x=GOLD_counts_undiagnosed/Follow_up_GOLD_undiagnosed_2level, digits = 2))
Exac_per_GOLD_undiagnosed[1:3, 3] <- c(0.30, 0.24, 0.40)
df <- as.data.frame(Exac_per_GOLD_undiagnosed)
dfm <- melt(df[,c("GOLD", "EPIC", "CanCOLD")],id.vars = 1)
plot <-
ggplot(dfm, aes(x = GOLD, y = as.numeric(value))) +
scale_y_continuous(breaks = seq(0, 3, by = 0.5)) +
theme_tufte(base_size=14, ticks=FALSE) +
geom_bar(aes(fill = variable), stat = "identity", position = "dodge") +
ylab ("Rate") +
labs(caption = "Total rate of exacerbations per year for undiagnosed patients")
print(plot)
message("Total rate of exacerbation in undiagnosed patients (0.30 per year in CanCOLD): ",
total_rate_undiagnosed)
#----------------------------By Sex ------------------------------------
#-----------------------------------------------------------------------
message("\n")
message("Exacerbation rates (per COPD patient-year) by sex over time:\n")
# Extract n_exac_by_ctime_sex data (number of exacerbations)
# This is a matrix with dimensions [time_horizon][2] where columns are sex (0=male, 1=female)
n_exac <- as.data.frame(output_ex$n_exac_by_ctime_sex)
# Extract n_COPD_by_ctime_sex data (COPD patient-years at risk)
n_COPD <- as.data.frame(output_ex$n_COPD_by_ctime_sex)
# Calculate rate per COPD patient-year
exac_rate <- n_exac / n_COPD
colnames(exac_rate) <- c("Male", "Female")
exac_rate$Year <- 1:nrow(exac_rate)
# Reshape data for ggplot
exac_rate_long <- reshape2::melt(exac_rate, id.vars = "Year",
variable.name = "Sex",
value.name = "Exacerbation_Rate")
# Create the plot
plot <- ggplot2::ggplot(exac_rate_long, ggplot2::aes(x = .data$Year, y = .data$Exacerbation_Rate, color = .data$Sex)) +
ggplot2::geom_line(linewidth = 1) +
ggplot2::geom_point(size = 2) +
theme_bw(base_size = 14) +
labs(title = "Exacerbation Rate by Sex Over Time",
x = "Year",
y = "Exacerbation Rate (per COPD patient-year)",
color = "Sex") +
theme(legend.position = "bottom",
plot.title = element_text(hjust = 0.5))
print(plot)
# Print summary statistics
message("\nSummary statistics:")
message("Total exacerbations (Male): ", sum(n_exac[, 1]))
message("Total exacerbations (Female): ", sum(n_exac[, 2]))
message("Total COPD patient-years (Male): ", round(sum(n_COPD[, 1]), 2))
message("Total COPD patient-years (Female): ", round(sum(n_COPD[, 2]), 2))
message("Mean exacerbation rate per COPD patient-year (Male): ", round(mean(exac_rate$Male, na.rm = TRUE), 4))
message("Mean exacerbation rate per COPD patient-year (Female): ", round(mean(exac_rate$Female, na.rm = TRUE), 4))
message("Overall exacerbation rate per COPD patient-year (Male): ", round(sum(n_exac[, 1]) / sum(n_COPD[, 1]), 4))
message("Overall exacerbation rate per COPD patient-year (Female): ", round(sum(n_exac[, 2]) / sum(n_COPD[, 2]), 4))
}
#' Returns the Kaplan Meier curve comparing COPD and non-COPD
#' @param savePlots TRUE or FALSE (default), exports 300 DPI population growth and pyramid plots comparing simulated vs. predicted population
#' @param base_agents Number of agents in the simulation. Default is 1e4.
#' @param jurisdiction character string specifying the jurisdiction ("canada" or "us"). Default is "canada"
#' @return validation test results
#' @export
validate_survival <- function(savePlots = FALSE, base_agents=1e4, jurisdiction = "canada") {
# Check jurisdiction
if (tolower(jurisdiction) != "canada") {
stop("Validation for jurisdiction '", jurisdiction, "' has not been implemented yet. Currently only 'canada' is supported.")
}
if (!requireNamespace("survival", quietly = TRUE)) {
stop("Package \"survival\" needed for this function to work. Please install it.",
call. = FALSE)
}
if (!requireNamespace("survminer", quietly = TRUE)) {
stop("Package \"survminer\" needed for this function to work. Please install it.",
call. = FALSE)
}
settings <- default_settings
settings$record_mode <- record_mode["record_mode_event"]
#settings$agent_stack_size <- 0
settings$n_base_agents <- base_agents
#settings$event_stack_size <- 1
init_session(settings = settings)
input <- model_input$values #We can work with local copy more conveniently and submit it to the Run function
run(input = input)
events <- as.data.frame(get_all_events_matrix())
terminate_session()
cohort <- subset(events, ((event==7) | (event==13) | (event==14)))
cohort <- cohort %>% filter((id==lead(id) | ((event == 14) & id!=lag(id))))
cohort$copd <- (cohort$gold>0)
cohort$death <- (cohort$event!=14)
cohort$age <- (cohort$age_at_creation+cohort$local_time)
#fit <- survfit(Surv(age, death) ~ copd, data=cohort)
fit <- survival::survfit(Surv(age, death) ~ copd, data=cohort)
# Customized survival curves
surv_plot <- survminer::ggsurvplot(fit, data = cohort, censor.shape="", censor.size = 1,
surv.median.line = "hv", # Add medians survival
# Change legends: title & labels
legend.title = "Disease Status",
legend.labs = c("Non-COPD", "COPD"),
# Add p-value and tervals
pval = TRUE,
conf.int = TRUE,
xlim = c(40,110), # present narrower X axis, but not affect
# survival estimates.
xlab = "Age", # customize X axis label.
break.time.by = 20, # break X axis in time intervals by 500.
# Add risk table
#risk.table = TRUE,
tables.height = 0.2,
tables.theme = theme_cleantable(),
# Color palettes. Use custom color: c("#E7B800", "#2E9FDF"),
# or brewer color (e.g.: "Dark2"), or ggsci color (e.g.: "jco")
#palette = c("gray0", "gray1"),
ggtheme = theme_tufte() +
theme(axis.line = element_line(colour = "black"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_blank()) # Change ggplot2 theme
)
plot (surv_plot)
if (savePlots) ggsave(file.path(tempdir(), paste0("survival-diagnosed", ".tiff")), plot = plot(surv_plot), device = "tiff", dpi = 300)
fitcox <- coxph(Surv(age, death) ~ copd, data = cohort)
ftest <- cox.zph(fitcox)
message(paste(capture.output(summary(fitcox)), collapse = "\n"))
return(surv_plot)
}
#' Returns results of validation tests for diagnosis
#'
#' This function returns a table showing the proportion of COPD patients diagnosed
#' over the model's runtime. It also provides a second table that breaks down the proportion
#' of diagnosed patients by COPD severity. Additionally, the function generates a plot
#' to visualize the distribution of diagnoses over time.
#'
#' @param n_sim number of agents
#' @param jurisdiction character string specifying the jurisdiction ("canada" or "us"). Default is "canada"
#' @return validation test results
#' @export
validate_diagnosis <- function(n_sim = 1e+04, jurisdiction = "canada") {
message("Let's take a look at diagnosis\n")
# Check jurisdiction
if (tolower(jurisdiction) != "canada") {
stop("Validation for jurisdiction '", jurisdiction, "' has not been implemented yet. Currently only 'canada' is supported.")
}
petoc()
settings <- default_settings
settings$record_mode <- record_mode["record_mode_none"]
settings$agent_stack_size <- 0
settings$n_base_agents <- n_sim
settings$event_stack_size <- 0
init_session(settings = settings)
input <- model_input$values
res <- run(input = input)
if (res < 0)
stop("Execution stopped.\n")
inputs <- get_inputs()
output_ex <- get_output_ex()
message("Here are the proportion of COPD patients diagnosed over model time: \n")
diag <- data.frame(Year=1:inputs$global_parameters$time_horizon,
COPD=rowSums(output_ex$n_COPD_by_ctime_sex),
Diagnosed=rowSums(output_ex$n_Diagnosed_by_ctime_sex))
diag$Proportion <- round(diag$Diagnosed/diag$COPD,2)
message(paste(capture.output(diag), collapse = "\n"))
message("The average proportion diagnosed from year", round(length(diag$Proportion)/2,0), "to", length(diag$Proportion), "is",
mean(diag$Proportion[(round(length(diag$Proportion)/2,0)):(length(diag$Proportion))]),"\n")
diag.plot <- tidyr::gather(data=diag, key="Variable", value="Number", c(COPD,Diagnosed))
diag.plotted <- ggplot2::ggplot(diag.plot, aes(x=Year, y=Number, col=Variable)) +
geom_line() + geom_point() + expand_limits(y = 0) +
theme_bw() + ylab("Number of COPD patients") + xlab("Years")
plot(diag.plotted)
message("\n")
message("Now let's look at the proportion diagnosed by COPD severity.\n")
prop <- data.frame(Year=1:inputs$global_parameters$time_horizon,
output_ex$n_Diagnosed_by_ctime_severity/output_ex$n_COPD_by_ctime_severity)[,c(1,3,4,5,6)]
names(prop) <- c("Year","GOLD1","GOLD2","GOLD3","GOLD4")
prop <- prop[-1,]
message(paste(capture.output(prop), collapse = "\n"))
message("The average proportion of GOLD 1 and 2 that are diagnosed from year", round(nrow(prop)/2,0), "to", max(prop$Year), "is",
(mean(prop$GOLD1[round((nrow(prop)/2),0):nrow(prop)]) + mean(prop$GOLD2[round((nrow(prop)/2),0):nrow(prop)]))/2,"\n")
prop.plot <- tidyr::gather(data=prop, key="GOLD", value="Proportion", c(GOLD1:GOLD4))
prop.plotted <- ggplot2::ggplot(prop.plot, aes(x=Year, y=Proportion, col=GOLD)) +
geom_line() + geom_point() + expand_limits(y = 0) +
theme_bw() + ylab("Proportion diagnosed") + xlab("Years")
plot(prop.plotted)
terminate_session()
}
#' Returns results of validation tests for GP visits
#'
#' This function returns Average number of GP visits by sex, COPD severity and
#' COPD diagnosis status along with their plots.
#' @param n_sim number of agents
#' @param jurisdiction character string specifying the jurisdiction ("canada" or "us"). Default is "canada"
#' @return validation test results
#' @export
validate_gpvisits <- function(n_sim = 1e+04, jurisdiction = "canada") {
message("Let's take a look at GP visits\n")
# Check jurisdiction
if (tolower(jurisdiction) != "canada") {
stop("Validation for jurisdiction '", jurisdiction, "' has not been implemented yet. Currently only 'canada' is supported.")
}
petoc()
settings <- default_settings
settings$record_mode <- record_mode["record_mode_none"]
settings$agent_stack_size <- 0
settings$n_base_agents <- n_sim
settings$event_stack_size <- 0
init_session(settings = settings)
input <- model_input$values
res <- run(input = input)
if (res < 0)
stop("Execution stopped.\n")
inputs <- get_inputs()
output_ex <- get_output_ex()
message("\n")
message("Here is the Average number of GP visits by sex:\n")
GPSex <- data.frame(1:inputs$global_parameters$time_horizon,
output_ex$n_GPvisits_by_ctime_sex/output_ex$n_alive_by_ctime_sex)
names(GPSex) <- c("Year","Male","Female")
message(paste(capture.output(GPSex), collapse = "\n"))
GPSex.plot <- tidyr::gather(data=GPSex, key="Sex", value="Visits", c(Male,Female))
GPSex.plot <- subset(GPSex.plot, Year!=1)
GPSex.plotted <- ggplot2::ggplot(GPSex.plot, aes(x=Year, y=Visits, col=Sex)) +
geom_line() + geom_point() + expand_limits(y = 0) +
theme_bw() + ylab("Average GP visits/year") + xlab("Years")
plot(GPSex.plotted)
message("\n")
message("Here is the Average number of GP visits by COPD severity:\n")
GPCOPD <- data.frame(1:inputs$global_parameters$time_horizon,
output_ex$n_GPvisits_by_ctime_severity/output_ex$cumul_time_by_ctime_GOLD)
names(GPCOPD) <- c("Year","NoCOPD","GOLD1","GOLD2","GOLD3","GOLD4")
message(paste(capture.output(GPCOPD[-1,]), collapse = "\n"))
GPCOPD.plot <- tidyr::gather(data=GPCOPD, key="COPD", value="Visits", c(NoCOPD:GOLD4))
GPCOPD.plot <- subset(GPCOPD.plot, Year!=1)
GPCOPD.plotted <- ggplot2::ggplot(GPCOPD.plot, aes(x=Year, y=Visits, col=COPD)) +
geom_line() + geom_point() + expand_limits(y = 0) +
theme_bw() + ylab("Average GP visits/year") + xlab("Years")
plot(GPCOPD.plotted)
message("\n")
message("Here is the Average number of GP visits by COPD diagnosis status:\n")
Diagnosed <- rowSums(output_ex$n_Diagnosed_by_ctime_sex)
Undiagnosed <- rowSums(output_ex$cumul_time_by_ctime_GOLD[,2:5]) - Diagnosed
data <- cbind(Undiagnosed, Diagnosed)
GPDiag<- data.frame(Year=1:inputs$global_parameters$time_horizon,
output_ex$n_GPvisits_by_ctime_diagnosis/data)
message(paste(capture.output(GPDiag[-1,]), collapse = "\n"))
GPDiag.plot <- tidyr::gather(data=GPDiag, key="Diagnosis", value="Visits", c(Undiagnosed,Diagnosed))
GPDiag.plot <- subset(GPDiag.plot, Year!=1)
GPDiag.plotted <- ggplot2::ggplot(GPDiag.plot, aes(x=Year, y=Visits, col=Diagnosis)) +
geom_line() + geom_point() + expand_limits(y = 0) +
theme_bw() + ylab("Average GP visits/year") + xlab("Years")
plot(GPDiag.plotted)
message("\n")
terminate_session()
}
#' Returns results of validation tests for Symptoms
#'
#' This function plots the prevalence of cough, dyspnea, phlegm and wheeze
#' over time and by GOLD stage.
#' @param n_sim number of agents
#' @param jurisdiction character string specifying the jurisdiction ("canada" or "us"). Default is "canada"
#' @return validation test results
#' @export
validate_symptoms <- function(n_sim = 1e+04, jurisdiction = "canada") {
message("Let's take a look at symptoms\n")
# Check jurisdiction
if (tolower(jurisdiction) != "canada") {
stop("Validation for jurisdiction '", jurisdiction, "' has not been implemented yet. Currently only 'canada' is supported.")
}
petoc()
settings <- default_settings
settings$record_mode <- record_mode["record_mode_none"]
settings$agent_stack_size <- 0
settings$n_base_agents <- n_sim
settings$event_stack_size <- 0
init_session(settings = settings)
input <- model_input$values
res <- run(input = input)
if (res < 0)
stop("Execution stopped.\n")
inputs <- get_inputs()
output_ex <- get_output_ex()
# COUGH
message("\n")
message("I'm going to plot the prevalence of each symptom over time and by GOLD stage\n")
message("\n")
message("Cough:\n")
message("\n")
cough <- data.frame(1:inputs$global_parameters$time_horizon,
output_ex$n_cough_by_ctime_severity/output_ex$n_COPD_by_ctime_severity)
names(cough) <- c("Year","NoCOPD","GOLD1","GOLD2","GOLD3","GOLD4")
message(paste(capture.output(cough), collapse = "\n"))
# plot
cough.plot <- tidyr::gather(data=cough, key="GOLD", value="Prevalence", NoCOPD:GOLD4)
cough.plot$Symptom <- "cough"
cough.plotted <- ggplot2::ggplot(cough.plot, aes(x=Year, y=Prevalence, col=GOLD)) +
geom_smooth(method=lm, formula = y~x, level=0) + geom_point() + expand_limits(y = 0) +
theme_bw() + ylab("Proportion with cough") + xlab("Model Year")
#plot(cough.plotted)
message("\n")
# PHLEGM
message("Phlegm:\n")
message("\n")
phlegm <- data.frame(1:inputs$global_parameters$time_horizon,
output_ex$n_phlegm_by_ctime_severity/output_ex$n_COPD_by_ctime_severity)
names(phlegm) <- c("Year","NoCOPD","GOLD1","GOLD2","GOLD3","GOLD4")
message(paste(capture.output(phlegm), collapse = "\n"))
# plot
phlegm.plot <- tidyr::gather(data=phlegm, key="GOLD", value="Prevalence", NoCOPD:GOLD4)
phlegm.plot$Symptom <- "phlegm"
phlegm.plotted <- ggplot2::ggplot(phlegm.plot, aes(x=Year, y=Prevalence, col=GOLD)) +
geom_smooth(method=lm, formula = y~x, level=0) + geom_point() + expand_limits(y = 0) +
theme_bw() + ylab("Proportion with phlegm") + xlab("Model Year")
#plot(phlegm.plotted)
message("\n")
# WHEEZE
message("Wheeze:\n")
message("\n")
wheeze <- data.frame(1:inputs$global_parameters$time_horizon,
output_ex$n_wheeze_by_ctime_severity/output_ex$n_COPD_by_ctime_severity)
names(wheeze) <- c("Year","NoCOPD","GOLD1","GOLD2","GOLD3","GOLD4")
message(paste(capture.output(wheeze), collapse = "\n"))
# plot
wheeze.plot <- tidyr::gather(data=wheeze, key="GOLD", value="Prevalence", NoCOPD:GOLD4)
wheeze.plot$Symptom <- "wheeze"
wheeze.plotted <- ggplot2::ggplot(wheeze.plot, aes(x=Year, y=Prevalence, col=GOLD)) +
geom_smooth(method=lm, formula = y~x, level=0) + geom_point() + expand_limits(y = 0) +
theme_bw() + ylab("Proportion with wheeze") + xlab("Model Year")
#plot(wheeze.plotted)
message("\n")
# DYSPNEA
message("Dyspnea:\n")
message("\n")
dyspnea <- data.frame(1:inputs$global_parameters$time_horizon,
output_ex$n_dyspnea_by_ctime_severity/output_ex$n_COPD_by_ctime_severity)
names(dyspnea) <- c("Year","NoCOPD","GOLD1","GOLD2","GOLD3","GOLD4")
message(paste(capture.output(dyspnea), collapse = "\n"))
# plot
dyspnea.plot <- tidyr::gather(data=dyspnea, key="GOLD", value="Prevalence", NoCOPD:GOLD4)
dyspnea.plot$Symptom <- "dyspnea"
dyspnea.plotted <- ggplot2::ggplot(dyspnea.plot, aes(x=Year, y=Prevalence, col=GOLD)) +
geom_smooth(method=lm, formula = y~x, level=0) + geom_point() + expand_limits(y = 0) +
theme_bw() + ylab("Proportion with dyspnea") + xlab("Model Year")
#plot(dyspnea.plotted)
message("\n")
message("All symptoms plotted together:\n")
all.plot <- rbind(cough.plot, phlegm.plot, wheeze.plot, dyspnea.plot)
all.plotted <- ggplot2::ggplot(all.plot, aes(x=Year, y=Prevalence, col=GOLD)) +
geom_smooth(method=lm, formula = y~x, level=0) + geom_point() + facet_wrap(~Symptom) +
expand_limits(y = 0) + theme_bw() + ylab("Proportion with symptom") + xlab("Model Year")
plot(all.plotted)
terminate_session()
}
#' Returns results of validation tests for Treatment
#'
#' This function runs validation tests to examine how treatment initiated at diagnosis
#' influences exacerbation rates in COPD patients. It compares exacerbation rates between
#' diagnosed and undiagnosed patients and assesses the impact of treatment.
#' @param n_sim number of agents
#' @param jurisdiction character string specifying the jurisdiction ("canada" or "us"). Default is "canada"
#' @return validation test results
#' @export
validate_treatment<- function(n_sim = 1e+04, jurisdiction = "canada") {
message("Let's make sure that treatment (which is initiated at diagnosis) is affecting the exacerbation rate.\n")
# Check jurisdiction
if (tolower(jurisdiction) != "canada") {
stop("Validation for jurisdiction '", jurisdiction, "' has not been implemented yet. Currently only 'canada' is supported.")
}
petoc()
settings <- default_settings
settings$record_mode <- record_mode["record_mode_none"]
settings$agent_stack_size <- 0
settings$n_base_agents <- n_sim
settings$event_stack_size <- 0
init_session(settings = settings)
input <- model_input$values
res <- run(input = input)
if (res < 0)
stop("Execution stopped.\n")
inputs <- get_inputs()
output_ex <- get_output_ex()
message("\n")
message("Exacerbation rate for undiagnosed COPD patients.\n")
message("\n")
undiagnosed <- data.frame(cbind(1:inputs$global_parameters$time_horizon, output_ex$n_exac_by_ctime_severity_undiagnosed/
(rowSums(output_ex$n_COPD_by_ctime_severity[,-1]) - rowSums(output_ex$n_Diagnosed_by_ctime_sex))))
names(undiagnosed) <- c("Year","Mild","Moderate","Severe","VerySevere")
message(paste(capture.output(undiagnosed), collapse = "\n"))
undiagnosed$Diagnosis <- "undiagnosed"
message("\n")
message("Exacerbation rate for diagnosed COPD patients.\n")
message("\n")
diagnosed <- data.frame(cbind(1:inputs$global_parameters$time_horizon,
output_ex$n_exac_by_ctime_severity_diagnosed/rowSums(output_ex$n_Diagnosed_by_ctime_sex)))
diagnosed[1,2:5] <- c(0,0,0,0)
names(diagnosed) <- c("Year","Mild","Moderate","Severe","VerySevere")
message(paste(capture.output(diagnosed), collapse = "\n"))
diagnosed$Diagnosis <- "diagnosed"
# plot
exac.plot <- tidyr::gather(data=rbind(undiagnosed, diagnosed), key="Exacerbation", value="Rate", Mild:VerySevere)
exac.plotted <- ggplot2::ggplot(exac.plot, aes(x=Year, y=Rate, fill=Diagnosis)) +
geom_bar(stat="identity", position="dodge") + facet_wrap(~Exacerbation, labeller=label_both) +
scale_y_continuous(expand = c(0, 0)) +
xlab("Model Year") + ylab("Annual rate of exacerbations") + theme_bw()
plot(exac.plotted)
message("\n")
terminate_session()
###
message("\n")
message("Now, set the treatment effects to 0 and make sure the number of exacerbations increased among diagnosed patients.\n")
message("\n")
init_session(settings = settings)
input_nt <- model_input$values
input_nt$medication$medication_ln_hr_exac <- rep(0, length(inputs$medication$medication_ln_hr_exac))
res <- run(input = input_nt)
if (res < 0)
stop("Execution stopped.\n")
inputs_nt <- get_inputs()
output_ex_nt <- get_output_ex()
exac.diff <- data.frame(cbind(1:inputs_nt$global_parameters$time_horizon,
output_ex_nt$n_exac_by_ctime_severity_diagnosed - output_ex$n_exac_by_ctime_severity_diagnosed))
names(exac.diff) <- c("Year","Mild","Moderate","Severe","VerySevere")
message("Without treatment, there was an average of:\n")
message(mean(exac.diff$Mild),"more mild exacerbations,\n")
message(mean(exac.diff$Moderate),"more moderate exacerbations,\n")
message(mean(exac.diff$Severe),"more severe exacerbations, and\n")
message(mean(exac.diff$VerySevere),"more very severe exacerbations per year.\n")
###
message("\n")
message("Now, set all COPD patients to diagnosed, then undiagnosed, and compare the exacerbation rates.\n")
message("\n")
init_session(settings = settings)
input_nd <- model_input$values
input_nd$diagnosis$logit_p_prevalent_diagnosis_by_sex <- cbind(male=c(intercept=-100, age=-0.0152, smoking=0.1068, fev1=-0.6146,
cough=0.075, phlegm=0.283, wheeze=-0.0275, dyspnea=0.5414,
case_detection=0),
female=c(intercept=-100-0.1638, age=-0.0152, smoking=0.1068, fev1=-0.6146,
cough=0.075, phlegm=0.283, wheeze=-0.0275, dyspnea=0.5414,
case_detection=0))
input_nd$diagnosis$p_hosp_diagnosis <- 0
input_nd$diagnosis$logit_p_diagnosis_by_sex <- cbind(male=c(intercept=-100, age=-0.0324, smoking=0.3711, fev1=-0.8032,
gpvisits=0.0087, cough=0.208, phlegm=0.4088, wheeze=0.0321, dyspnea=0.722,
case_detection=0),
female=c(intercept=-100-0.4873, age=-0.0324, smoking=0.3711, fev1=-0.8032,
gpvisits=0.0087, cough=0.208, phlegm=0.4088, wheeze=0.0321, dyspnea=0.722,
case_detection=0))
input_nd$diagnosis$logit_p_overdiagnosis_by_sex <- cbind(male=c(intercept=-100, age=0.0025, smoking=0.6911, gpvisits=0.0075,
cough=0.7264, phlegm=0.7956, wheeze=0.66, dyspnea=0.8798,
case_detection=0),
female=c(intercept=-100+0.2597, age=0.0025, smoking=0.6911, gpvisits=0.0075,
cough=0.7264, phlegm=0.7956, wheeze=0.66, dyspnea=0.8798,
case_detection=0))
res <- run(input = input_nd)
if (res < 0)
stop("Execution stopped.\n")
output_ex_nd <- get_output_ex()
exac_rate_nodiag <- rowSums(output_ex_nd$n_exac_by_ctime_severity)/rowSums(output_ex_nd$n_COPD_by_ctime_sex)
terminate_session()
###
init_session(settings = settings)
input_d <- model_input$values
input_d$diagnosis$logit_p_prevalent_diagnosis_by_sex <- cbind(male=c(intercept=100, age=-0.0152, smoking=0.1068, fev1=-0.6146,
cough=0.075, phlegm=0.283, wheeze=-0.0275, dyspnea=0.5414,
case_detection=0),
female=c(intercept=100-0.1638, age=-0.0152, smoking=0.1068, fev1=-0.6146,
cough=0.075, phlegm=0.283, wheeze=-0.0275, dyspnea=0.5414,
case_detection=0))
input_d$diagnosis$p_hosp_diagnosis <- 1
input_d$diagnosis$logit_p_diagnosis_by_sex <- cbind(male=c(intercept=100, age=-0.0324, smoking=0.3711, fev1=-0.8032,
gpvisits=0.0087, cough=0.208, phlegm=0.4088, wheeze=0.0321, dyspnea=0.722,
case_detection=0),
female=c(intercept=100-0.4873, age=-0.0324, smoking=0.3711, fev1=-0.8032,
gpvisits=0.0087, cough=0.208, phlegm=0.4088, wheeze=0.0321, dyspnea=0.722,
case_detection=0))
res <- run(input = input_d)
if (res < 0)
stop("Execution stopped.\n")
inputs_d <- get_inputs()
output_ex_d <- get_output_ex()
exac_rate_diag <- rowSums(output_ex_d$n_exac_by_ctime_severity)/rowSums(output_ex_d$n_COPD_by_ctime_sex)
##
message("Annual exacerbation rate (this is also plotted):\n")
message("\n")
trt_effect<- data.frame(Year=1:inputs_d$global_parameters$time_horizon,
Diagnosed = exac_rate_diag,
Undiagnosed = exac_rate_nodiag)
trt_effect$Delta <- (trt_effect$Undiagnosed - trt_effect$Diagnosed)/trt_effect$Undiagnosed
message(paste(capture.output(trt_effect), collapse = "\n"))
message("\n")
message("Treatment reduces the rate of exacerbations by a mean of:", mean(trt_effect$Delta),"\n")
# plot
trt.plot <- tidyr::gather(data=trt_effect, key="Diagnosis", value="Rate", Diagnosed:Undiagnosed)
trt.plotted <- ggplot2::ggplot(trt.plot, aes(x=Year, y=Rate, col=Diagnosis)) +
geom_line() + geom_point() + expand_limits(y = 0) +
theme_bw() + ylab("Annual exacerbation rate") + xlab("Years")
plot(trt.plotted)
terminate_session()
}
#' Returns results of Case Detection strategies
#' @param n_sim number of agents
#' @param p_of_CD probability of recieving case detection given that an agent meets the selection criteria
#' @param min_age minimum age that can recieve case detection
#' @param min_pack_years minimum pack years that can recieve case detection
#' @param only_smokers set to 1 if only smokers should recieve case detection
#' @param CD_method Choose one case detection method: CDQ195", "CDQ165", "FlowMeter", "FlowMeter_CDQ"
#' @return results of case detection strategy compared to no case detection
#' @export
test_case_detection <- function(n_sim = 1e+04, p_of_CD=0.1, min_age=40, min_pack_years=0, only_smokers=0, CD_method="CDQ195") {
message("Comparing a case detection strategy to no case detection.\n")
petoc()
settings <- default_settings
settings$record_mode <- record_mode["record_mode_none"]
# settings$agent_stack_size <- 0
settings$n_base_agents <- n_sim
settings$event_stack_size <- 0
init_session(settings = settings)
input <- model_input$values
input$diagnosis$p_case_detection <- p_of_CD
input$diagnosis$min_cd_age <- min_age
input$diagnosis$min_cd_pack_years <- min_pack_years
input$diagnosis$min_cd_smokers <-only_smokers
input$diagnosis$logit_p_prevalent_diagnosis_by_sex <- cbind(male=c(intercept=1.0543, age=-0.0152, smoking=0.1068, fev1=-0.6146,
cough=0.075, phlegm=0.283, wheeze=-0.0275, dyspnea=0.5414,
case_detection=input$diagnosis$case_detection_methods[1,CD_method]),
female=c(intercept=1.0543-0.1638, age=-0.0152, smoking=0.1068, fev1=-0.6146,
cough=0.075, phlegm=0.283, wheeze=-0.0275, dyspnea=0.5414,
case_detection=input$diagnosis$case_detection_methods[1,CD_method]))
input$diagnosis$logit_p_diagnosis_by_sex <- cbind(male=c(intercept=-2, age=-0.0324, smoking=0.3711, fev1=-0.8032,
gpvisits=0.0087, cough=0.208, phlegm=0.4088, wheeze=0.0321, dyspnea=0.722,
case_detection=input$diagnosis$case_detection_methods[1,CD_method]),
female=c(intercept=-2-0.4873, age=-0.0324, smoking=0.3711, fev1=-0.8032,
gpvisits=0.0087, cough=0.208, phlegm=0.4088, wheeze=0.0321, dyspnea=0.722,
case_detection=input$diagnosis$case_detection_methods[1,CD_method]))
input$diagnosis$logit_p_overdiagnosis_by_sex <- cbind(male=c(intercept=-5.2169, age=0.0025, smoking=0.6911, gpvisits=0.0075,
cough=0.7264, phlegm=0.7956, wheeze=0.66, dyspnea=0.8798,
case_detection=input$diagnosis$case_detection_methods[2,CD_method]),
female=c(intercept=-5.2169+0.2597, age=0.0025, smoking=0.6911, gpvisits=0.0075,
cough=0.7264, phlegm=0.7956, wheeze=0.66, dyspnea=0.8798,
case_detection=input$diagnosis$case_detection_methods[2,CD_method]))
message("\n")
message("Here are your inputs for the case detection strategy:\n")
message("\n")
message(paste(capture.output(input$diagnosis), collapse = "\n"))
res <- run(input = input)
if (res < 0)
stop("Execution stopped.\n")
inputs <- get_inputs()
output <- get_output()
output_ex <- get_output_ex()
# Exacerbations
exac <- output$total_exac
names(exac) <- c("Mild","Moderate","Severe","VerySevere")
# rate
total.gold <- colSums(output_ex$n_COPD_by_ctime_severity[,2:5])
names(total.gold) <- c("GOLD1","GOLD2","GOLD3","GOLD4")
exac.gs <- data.frame(output_ex$n_exac_by_gold_severity)
colnames(exac.gs) <- c("Mild","Moderate","Severe","VerySevere")
exac_rate <- rbind(GOLD1=exac.gs[1,]/total.gold[1],
GOLD2=exac.gs[2,]/total.gold[2],
GOLD3=exac.gs[3,]/total.gold[3],
GOLD4=exac.gs[4,]/total.gold[4])
exac_rate$CD <- "Case detection"
exac_rate$GOLD <- rownames(exac_rate)
# GOLD
gold <- data.frame(CD="Case detection",
Proportion=colMeans(output_ex$n_COPD_by_ctime_severity/rowSums(output_ex$n_alive_by_ctime_sex)))
gold$GOLD <- c("NoCOPD","GOLD1","GOLD2","GOLD3","GOLD4")
terminate_session()
## Rerunning with no case detection
init_session(settings = settings)
input_nocd <- model_input$values
input_nocd$diagnosis$p_case_detection <- 0
message("\n")
message("Now setting the probability of case detection to", input_nocd$diagnosis$p_case_detection, "and re-running the model\n")
message("\n")
res <- run(input = input_nocd)
if (res < 0)
stop("Execution stopped.\n")
inputs_nocd <- get_inputs()
output_nocd <- get_output()
output_ex_nocd <- get_output_ex()
# Exacerbations
exac_nocd <- output_nocd$total_exac
names(exac_nocd) <- c("Mild","Moderate","Severe","VerySevere")
# rate
total.gold_nocd <- colSums(output_ex_nocd$n_COPD_by_ctime_severity[,2:5])
names(total.gold_nocd) <- c("GOLD1","GOLD2","GOLD3","GOLD4")
exac.gs_nocd <- data.frame(output_ex_nocd$n_exac_by_gold_severity)
colnames(exac.gs_nocd) <- c("Mild","Moderate","Severe","VerySevere")
exac_rate_nocd <- rbind(GOLD1=exac.gs_nocd[1,]/total.gold_nocd[1],
GOLD2=exac.gs_nocd[2,]/total.gold_nocd[2],
GOLD3=exac.gs_nocd[3,]/total.gold_nocd[3],
GOLD4=exac.gs_nocd[4,]/total.gold_nocd[4])
exac_rate_nocd$CD <- "No Case detection"
exac_rate_nocd$GOLD <- rownames(exac_rate_nocd)
# GOLD
gold_nocd<- data.frame(CD="No case detection",
Proportion=colMeans(output_ex_nocd$n_COPD_by_ctime_severity/rowSums(output_ex_nocd$n_alive_by_ctime_sex)))
gold_nocd$GOLD <- c("NoCOPD","GOLD1","GOLD2","GOLD3","GOLD4")
## Difference between CD and No CD
# Exacerbations
exac.diff <- data.frame(cbind(CD=exac, NOCD=exac_nocd))
exac.diff$Delta <- exac.diff$CD - exac.diff$NOCD
message("Here are total number of exacerbations by severity:\n")
message("\n")
message(paste(capture.output(exac.diff), collapse = "\n"))
message("\n")
message("The annual rate of exacerbations with case detection is:\n")
message(paste(capture.output(exac_rate[,1:4]), collapse = "\n"))
message("\n")
message("The annual rate of exacerbations without case detection is:\n")
message(paste(capture.output(exac_rate_nocd[,1:4]), collapse = "\n"))
message("\n")
message("This data is also plotted.\n")
#plot
exac.plot <- tidyr::gather(rbind(exac_rate, exac_rate_nocd), key="Exacerbation", value="Rate", Mild:VerySevere)
exac.plotted <-ggplot2::ggplot(exac.plot, aes(x=Exacerbation, y=Rate, fill=CD)) +
geom_bar(stat="identity", position="dodge") + facet_wrap(~GOLD, scales="free_y") +
scale_y_continuous(expand = expand_scale(mult=c(0, 0.1))) +
xlab("Exacerbation") + ylab("Annual rate of exacerbations") + theme_bw()
exac.plotted <- exac.plotted + theme(axis.text.x=element_text(angle=45, hjust=1)) +
theme(legend.title = element_blank())
plot(exac.plotted)
# GOLD
# plot
message("\n")
message("The average proportion of agents in each gold stage is also plotted.\n")
gold.plot <- rbind(gold, gold_nocd)
gold.plot$GOLD <- factor(gold.plot$GOLD, levels=c("NoCOPD","GOLD1","GOLD2","GOLD3","GOLD4"))
gold.plotted <- ggplot2::ggplot(gold.plot, aes(x=GOLD, y=Proportion, fill=CD)) +
geom_bar(stat="identity", position="dodge") +
scale_y_continuous(expand = c(0,0), limits=c(0,1)) +
xlab("GOLD stage") + ylab("Average proportion") + theme_bw()
gold.plotted <- gold.plotted + theme(legend.title = element_blank())
plot(gold.plotted)
message("\n")
terminate_session()
}
#' Returns results of validation tests for overdiagnosis
#'
#' This function returns the proportion of non-COPD subjects overdiagnosed
#' over model time.
#' @param n_sim number of agents
#' @param jurisdiction character string specifying the jurisdiction ("canada" or "us"). Default is "canada"
#' @return validation test results
#' @export
validate_overdiagnosis <- function(n_sim = 1e+04, jurisdiction = "canada") {
message("Let's take a look at overdiagnosis\n")
# Check jurisdiction
if (tolower(jurisdiction) != "canada") {
stop("Validation for jurisdiction '", jurisdiction, "' has not been implemented yet. Currently only 'canada' is supported.")
}
petoc()
settings <- default_settings
settings$record_mode <- record_mode["record_mode_none"]
settings$agent_stack_size <- 0
settings$n_base_agents <- n_sim
settings$event_stack_size <- 0
init_session(settings = settings)
input <- model_input$values
res <- run(input = input)
if (res < 0)
stop("Execution stopped.\n")
inputs <- get_inputs()
output_ex <- get_output_ex()
message("Here are the proportion of non-COPD subjects overdiagnosed over model time: \n")
overdiag <- data.frame(Year=1:inputs$global_parameters$time_horizon,
NonCOPD=output_ex$n_COPD_by_ctime_severity[,1],
Overdiagnosed=rowSums(output_ex$n_Overdiagnosed_by_ctime_sex))
overdiag$Proportion <- overdiag$Overdiagnosed/overdiag$NonCOPD
message(paste(capture.output(overdiag), collapse = "\n"))
message("The average proportion overdiagnosed from year", round(length(overdiag$Proportion)/2,0), "to", length(overdiag$Proportion), "is",
mean(overdiag$Proportion[(round(length(overdiag$Proportion)/2,0)):(length(overdiag$Proportion))]),"\n")
overdiag.plot <- tidyr::gather(data=overdiag, key="Variable", value="Number", c(NonCOPD, Overdiagnosed))
overdiag.plotted <- ggplot2::ggplot(overdiag.plot, aes(x=Year, y=Number, col=Variable)) +
geom_line() + geom_point() + expand_limits(y = 0) +
theme_bw() + ylab("Number of non-COPD subjects") + xlab("Years")
plot(overdiag.plotted)
message("\n")
terminate_session()
}
#' Returns results of validation tests for medication module.
#'
#' This function returns plots showing medication usage over time
#' @param n_sim number of agents
#' @param jurisdiction character string specifying the jurisdiction ("canada" or "us"). Default is "canada"
#' @return validation test results for medication
#' @export
validate_medication <- function(n_sim = 5e+04, jurisdiction = "canada") {
message("\n")
message("Plotting medication usage over time:")
message("\n")
# Check jurisdiction
if (tolower(jurisdiction) != "canada") {
stop("Validation for jurisdiction '", jurisdiction, "' has not been implemented yet. Currently only 'canada' is supported.")
}
petoc()
settings <- default_settings
settings$record_mode <- record_mode["record_mode_event"]
settings$agent_stack_size <- 0
settings$n_base_agents <- n_sim
settings$event_stack_size <- settings$n_base_agents * 1.7 * 30
init_session(settings = settings)
input <- model_input$values
res <- run(input = input)
if (res < 0)
stop("Execution stopped.\n")
all_events <- as.data.frame(get_all_events_matrix())
all_annual_events <- all_events[all_events$event==1,] # only annual event
# Prop on each med class over time and by gold
all_annual_events$time <- floor(all_annual_events$local_time + all_annual_events$time_at_creation)
med.plot <- all_annual_events %>%
group_by(time, gold) %>%
count(medication_status) %>%
mutate(prop=n/sum(n))
med.plot$gold <- as.character(med.plot$gold )
# overall among COPD patients
copd <- med.plot %>%
filter(gold>0) %>%
group_by(time, medication_status) %>%
summarise(n=sum(n)) %>%
mutate(prop=n/sum(n), gold="all copd") %>%
select(time, gold, everything())
med.plot <- rbind(med.plot, copd)
med.plot$medication_status <- ifelse(med.plot$medication_status==0,"none",
ifelse(med.plot$medication_status==1,"SABA",
ifelse(med.plot$medication_status==4,"LAMA",
ifelse(med.plot$medication_status==6,"LAMA/LABA",
ifelse(med.plot$medication_status==14,"ICS/LAMA/LABA",9)))))
med.plotted <- ggplot2::ggplot(data=med.plot, aes(x=time, y=prop, col=medication_status)) +
geom_line() + facet_wrap(~gold, labeller=label_both) +
expand_limits(y = 0) + theme_bw() + ylab("Proportion per medication class") + xlab("Years") +
theme(legend.title=element_blank())
plot(med.plotted)
terminate_session()
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.