library(ggplot2)
library(McMasterPandemic)
library(lubridate)
library(tidyr)
library(dplyr)
if (dir.exists(lisa_dir <- "C:/Users/lkanary/OneDrive - Yukon College/Research/NSERC CANMOD/McMaster SEIR Model")) {
setwd(lisa_dir)
}
load_model_from_file = TRUE
options(MP_force_symm_vcov = TRUE)
#"in acute care" means "in hospital but not in the intensive care unit (ICU)"
params1 <- read_params("PHAC.csv")
# update default parameters for the Yukon case
# beta0 -- baseline transmission rate
# N -- population of Yukon
# mu -- proportion of cases that are mild
# phi1 -- proportion of hospitalization cases that are not in the ICU
#
# note: some more of these values will be updated below
# by running exploratory simulations and trajectory
# matching 'by eye'
params1[c("N", "phi1")] <- c(42507, 0.98)
# initial state of the simulation
state1 <- make_state(params=params1)
# start and end dates
sdate <- as.Date("2021-11-01")
edate <- as.Date("2022-01-19")
initial_date = as.Date("2021-08-03")
start_date_offset = as.integer(sdate - initial_date)
# read and process data
covid_data <- ("report_data_yukon_h_and_i.csv"
%>% read.csv
%>% mutate(date = as.Date(date))
%>% filter(date >= ymd(20210803))
%>% filter(between(as.Date(date), sdate, edate))
# report -- new reported cases on that day
# hosp -- new hospital admissions on that day
%>% select(date, report, hosp)
%>% pivot_longer(names_to = "var", -date)
%>% mutate(value=round(value))
)
head(covid_data, n=12)
# establish schedule of time variation of parameters
params_timevar = data.frame(
Date = ymd(
# estimate a new transmission rate on
# these dates (i'm no expert but these
# seemed to "work")
20211115, # nov 15 beta0
20211215, # dec 15 beta0
20211215, # dec 15 mu
20220101 # jan 01 beta0
),
Symbol = c("beta0", "beta0", 'mu', 'beta0'),
Value = NA,
Type = "abs"
)
# NOTE: manual exploration of parameter space
# - this is where i figured out good starting values for
# mu and beta0
# - two types of parameters: params and time_params
# - params is a named vector with names referring to
# the parameter to be optimized
# - optionally change the scale on which optimization occur
# by prefixing these names with log_ or logit_
# - values of this vector are initial values fed to the
# optimizer
# - time_params is a vector of values in the order of
# of the params_timevar schedule above
# if you change the scale )
explore_pars = list(
params = c(
# baseline transmission rate
beta0 = 0.235,
# proportion of cases that are severe
mu = 0.9
),
time_params = c(
## NOTE: this corresponds to the schedule
## set in params_timevar above. any
## parameter can vary but you need
## to declare it in params_timevar.
0.07, 0.63, 0.99, 0.17
)
)
# after simulations, plot the graph and check how good
# the fit is -- go back and adjust the parameter values
# if the fit is 'way off' -- repeat until you have good
# starting values to give to the opt_pars object below
(forecast_sim(
unlist(explore_pars), explore_pars, base_params = params1,
start_date = sdate - start_date_offset,
end_date = edate,
sim_args = list(condense = TRUE),
time_args = list(params_timevar = params_timevar)
)
%>% list(covid_data)
%>% setNames(c("expected", "observed"))
%>% bind_rows(.id = 'source')
%>% filter(var %in% c("report", "hosp"))
%>% ggplot
+ facet_wrap(~var, scale = 'free')
+ geom_line(aes(date, value, colour = source))
)
# declare parameters to be fitted
opt_pars <- explore_pars
# make use of c++ performance gains if you can
if (!is.null(getOption("MP_flex_spec_version"))) {
mm = (make_base_model(
params = params1,
start_date = sdate - start_date_offset,
end_date = edate,
params_timevar = params_timevar,
data = covid_data
)
#%>% update_condense_map(c(
# conv_Incidence = "report",
# lag_1_diff_X = 'hosp'
#))
%>% update_tmb_indices
)
sim_args = list(flexmodel = mm)
} else {
sim_args = list()
}
if (load_model_from_file) {
fitted.mod = readRDS(file = "fitted.mod.rda")
} else {
# fit the model
fitted.mod <- calibrate(
data = covid_data,
time_args = list(params_timevar = params_timevar),
start_date_offset = start_date_offset,
base_params = params1,
opt_pars = opt_pars,
debug = TRUE,
debug_plot = FALSE,
sim_args = sim_args,
#mle2_args = list(use.ginv = FALSE)
)
saveRDS(fitted.mod, file = "fitted.mod.rda")
}
#fitted.mod.orig = fitted.mod
vc = fitted.mod$mle@vcov
#fitted.mod$mle@vcov = (vc + t(vc)) / 2
# fitted parameters, including dispersion parameters (nb_disp)
# that measure the variability around the fitted curves
coef(fitted.mod, 'fitted')
fitted.mod
plot(fitted.mod, data = covid_data)
# set up the forecasting scenario
# (this is a very basic status quo scenario for 30 days
# beyond the final date in the fitting data)
# (if you want to do more than status quo you will
# probably want to modify the schedule of parameter changes
# in forecast_args$time_args$params_timevar)
n_days_to_forecast = 30
forecast_args = fitted.mod$forecast_args
fit_end_date = as.Date(forecast_args$end_date)
forecast_args$end_date = fit_end_date + days(n_days_to_forecast)
# speed up forecasts by making use of c++ if it is available
if (!is.null(getOption("MP_flex_spec_version"))) {
mm = make_base_model(
params = forecast_args$base_params,
start_date = mm$start_date,
end_date = forecast_args$end_date,
params_timevar = forecast_args$time_args$params_timevar,
do_make_state = TRUE,
do_hazard = TRUE
)
forecast_args$sim_args = list(flexmodel = mm)
}
# run the forecasts multiple times to get confidence intervals
# - the result of this function has the following columns
# - date
# - var: variable being simulated
# - lwr: 90% confidence interval lower bound
# - value: median of the prediction distribution
# - upr: 90% confidence interval upper bound
mod.forecasts = forecast_ensemble(
fitted.mod,
# forecast scenario is defined by forecast_args
forecast_args = forecast_args
)
# here is a function for plotting the forecasts for a
# chosen variable (modified from work by Irena Papst).
# dashed horizontal line is where the data end.
plot_forecast = function(forecast, variable, data) {
(forecast
%>% filter(var == variable)
%>% ggplot(aes(x = date))
+ geom_vline(aes(xintercept = fit_end_date),
colour = "grey60", linetype = "dashed")
+ geom_ribbon(aes(ymin = lwr,
ymax = upr),
alpha = 0.3)
+ geom_line(aes(y = value))
+ geom_point(data = filter(data, var == variable),
aes(x = date, y = value),
shape = 1)
+ labs(y = variable)
+ theme(axis.title.x = element_blank())
)
}
# example plots
plot_forecast(mod.forecasts, "report", covid_data)
plot_forecast(mod.forecasts, "ICU", covid_data)
plot_forecast(mod.forecasts, "death", covid_data)
plot_forecast(mod.forecasts, "hosp", covid_data)
# if the variable isn't in the data set, then only the
# forecasts are plotted (no training data)
plot_forecast(mod.forecasts, "E", covid_data)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.