## MOSTLY OBSOLETE
## depends on hidden/secret data; still here as an example
## OBSOLETE?
##' run simulation with specified parameters; extract results
##' matching dates and variable order in data
## FIXME: can this be made into a predict method??
## with newdata, newparams, newinit
## not sure where to put these ...
##' @param beta0 baseline transmission
##' @param E0 starting value
##' @param data data (for subsetting/matching)
##' @param params parameters
##' @param start_date start date
##' @param values_only return a vector rather than a data frame
##' @importFrom dplyr mutate mutate_at %>% as_tibble right_join
##' @export
predfun <- function(beta0,E0,data,
params,
start_date="10-Feb-2020",
values_only=TRUE) {
## global variables
beta0 <- E0 <- data <- params <- start_date <- values_only <- NULL
var <- value <- NULL
## substitute values into base parameter vector
params[["beta0"]] <- beta0
params[["E0"]] <- E0 ## unnecessary?
state <- make_state(N=params[["N"]],E0=E0) ## assume type==ICU1 for now
res <- run_sim(params,state,start_date=start_date,
end_date=max(data$date)) ## FIXME: pass args?
## browser()
dcomp <- select(data,var,date) %>% mutate_at("var",as.character)
res2 <- (aggregate(res)
%>% as_tibble()
%>% tidyr::pivot_longer(-date,names_to="var")
%>% right_join(dcomp,by=c("date","var"))
)
if (values_only) return(pull(res2,value))
return(res2)
}
library(McMasterPandemic)
library(bbmle)
library(plyr)
library(dplyr) ## second!
library(ggplot2); theme_set(theme_bw())
p <- read_params(system.file("params","ICU1.csv",
package="McMasterPandemic"))
## n.b. extra factor levels cause problems below
## FIXME: drop levels upstream?
clean_data <- dplyr::as_tibble(
droplevels(readRDS("../data/01042020_clean.rds")))
## fit only after March 18 (shortcut to estimating effects of control measures)
fit_data <- dplyr::filter(clean_data,date>=as.Date("2020-03-18"))
do_profile <- TRUE
## assumes equal dispersion for all three data types
## (H, ICU, deaths); is this plausible?
t_fit1 <- system.time(m1 <- mle2(value~dnbinom(
mu=predfun(exp(log_beta0),exp(log_E0),
data=clean_data, params=pp),
size=exp(logk)),
data=clean_data,
start=list(log_beta0=0,log_E0=log(300),logk=1),
method="Nelder-Mead")
)
print(t_fit1)
## ~10 seconds (158 function evaluations)
## would like to generalize. How can I pass in new parameters?? Global??
## mle2 is doing ugly things with evaluation environment ...
fitfun <- function(dist="nbinom",
parameters=NULL,
start=list(log_beta0=0,log_E0=log(300),logk=1),
method="Nelder-Mead",
xdata=fit_data,
control=list(),
params=pp
) {
form <- switch(dist,
nbinom=value~dnbinom(mu=predfun(exp(log_beta0),exp(log_E0),
data=data, params=params),
size=exp(logk)),
stop("unknown distribution")
)
tt <- system.time(
m <- mle2(form,parameters=parameters,
start=start,method=method,control=control,
data=data))
attr(m,"time") <- tt
return(m)
}
## m1 <- fitfun()
## would like to use update() but doesn't work
## only change is parameters=list(logk ~ var -1 ),
## i.e. variable specific dispersion params
## FIXME: data is entered in two different places
t_fit1B <- system.time(m1B <- mle2(value~dnbinom(
mu=predfun(exp(log_beta0),exp(log_E0),
data=fit_data, params=pp),
size=exp(logk)),
data=fit_data,
parameters=list(logk ~ var -1 ),
start=list(log_beta0=0,log_E0=log(300),logk=1),
method="Nelder-Mead",
control=list(maxit=2000)))
print(t_fit1B) ## 40 seconds
t_fit1B <- system.time(m1B <- mle2(value~dnbinom(
mu=predfun(exp(log_beta0),exp(log_E0),
data=fit_data, params=pp),
size=exp(logk)),
data=fit_data,
parameters=list(logk ~ var -1 ),
start=list(log_beta0=0,log_E0=log(300),logk=1),
method="Nelder-Mead",
control=list(maxit=2000)))
t_fit2 <- system.time(m2 <- mle2(value~dlnorm(
meanlog=predfun(exp(log_beta0),exp(log_E0),
data=clean_data, params=pp),
sdlog=exp(log_sdlog)),
data=clean_data,
## var-specific log std devs
parameters=list(log_sdlog~var-1),
start=list(log_beta0=0,log_E0=log(300),
log_sdlog=0),
method="Nelder-Mead")
)
print(t_fit2) ## 10 seconds
sqrt(diag(vcov(m1))) ## sd, on log scale (proportional sd)
nb0prof <- NULL
if (do_profile) {
t_prof1 <- system.time(nb0prof <- profile(m1,trace=TRUE))
## 99 seconds
confint(nb0prof)
}
## Wald and profile CIs are v. similar (for log-scaled params)
confint(m1,method="quad")
exp(confint(m1,method="quad"))
cc <- coef(m1)
## predict for specific coefs
pf <- function(cc,...) {
predfun(exp(cc[["log_beta0"]]),exp(cc[["log_E0"]]),
clean_data,
pp,
...)
}
pred_full <- pf(coef(m1), values_only=FALSE)
##
## FIXME: allow for longer time vector
get_preds <- function(fit,seed=NULL,level=0.8) {
if (!is.null(seed)) set.seed(seed)
pars <- MASS::mvrnorm(n=200,mu=coef(fit),Sigma=vcov(fit))
all_pred <- plyr::adply(pars,
.margins=1, ## rowwise
.fun=pf,
values_only=FALSE,
.progress="text")
## ~ 10 seconds
preds <- (all_pred
%>% as_tibble()
%>% dplyr::select(-X1)
%>% group_by(date,var)
%>% dplyr::summarise(median=quantile(value,0.5),
lwr=quantile(value,(1-level)/2),
upr=quantile(value,(1+level)/2))
%>% full_join(pf(coef(fit),values_only=FALSE),by=c("date","var"))
)
return(preds)
}
fitList <- list(nb0=m1,nb_vardisp=m1B,ln_vardisp=m2)
## m2 has non-pos-def vcov? ??
predList <- purrr::map(fitList[1:2], get_preds)
save(fitList,predList,file="mlefit.RData")
## straight NB fit is terrible
gg0 <- (ggplot(predList[["nb_vardisp"]],aes(date,colour=var,fill=var))
+ geom_line(aes(y=value))
+ geom_ribbon(colour=NA,alpha=0.2,aes(ymin=lwr,ymax=upr))
+ geom_line(aes(y=median),lty=2)
+ geom_point(data=clean_data,aes(y=value))
)
gg0 + scale_y_log10()
cc <- coef(m1B)
pp2 <- pp
pp2[["beta0"]] <- exp(cc[["log_beta0"]])
pp2[["E0"]] <- exp(cc[["log_E0"]])
write_params(pp2,"PARAMFILE.csv","NBdispvar fit to H/D/ICU >= March 18")
## thrown off by noise in the first few values?
## control measures/nonlinearity?
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.