#' Run AttMOMO base and baseline adjusted models and return estimated means and variances.
#'
#' @param country Country name.
#' @param StartWeek ISOweek format (YYYY-WXX)
#' @param EndWeek ISOweek format (YYYY-WXX)
#' @param groups list of group names
#' @param pooled list of group names to be pooled (default = NULL)
#' Must be part of groups.
#' @param indicators list if indicator variables.
#' One file for each must be available in memory: IndicatorName_data
#' @param death_data Name of deaths data file in memory.
#' @param ET_data Name of ET data file in memory.
#' @param lags weeks of lagged effect (default = 3, max = 9)
#' @param ptrend significance of trend to be included (default = 0.05)
#' @param p26 significance of half year-sine be included (default = 0.05)
#' @param p52 significance of year-sine be included (default = 0.10)
#' @import data.table
#' @return data with weekly estimated means and variances
#' @export
AttMOMO_estimation <- function(country, StartWeek, EndWeek, groups, pooled = NULL, indicators, death_data, ET_data,
lags = 3, ptrend = 0.05, p26 = 0.05, p52 = 0.10) {
group <- ET <- summer <- winter <- EB <- EAB <- season <- deaths <- VEB <- EET <- VEET <- VEAB <- wk <- . <- anova <- glm <- median <- residuals <- df.residual <- predict.glm <- quasipoisson <- NULL
# country <- "Denmark"
# StartWeek <- '2016-W27'
# EndWeek <- '2021-W26'
# groups = c('00to14', '15to44', '45to64', '65to74', '75to84', '85P', 'Total')
# pooled <- c('00to14', '15to44', '45to64', '65to74', '75to84', '85P')
# indicators <- c('GSIPLS', 'GSCLS')
# death_data <- death_data
# ET_data <- ET_data
# lags <- 3
# ptrend <- 0.05
# p26 <- 0.05
# p52 <- 0.10
# Read and merge data -----------------------------------------------------
# death data
AttData <- data.table::data.table(death_data)
if (sum(colnames(AttData) %in% c('group', 'ISOweek', 'deaths')) != 3) {
stop('Columns group, ISOweek, deaths not in deaths_date')
}
for (g in groups) {
if (!(g %in% unique(AttData$group))) {
stop(paste("group", g, "not in death_data"))
}
if ((min(AttData[g == group,]$ISOweek) > StartWeek) | (max(AttData[g == group,]$ISOweek) < EndWeek)) {
stop(paste("death_data", g, "do not cover", StartWeek, "to", EndWeek))
}
}
AttData <- AttData[(StartWeek <= ISOweek) & (ISOweek <= EndWeek) & (group %in% groups),]
AttData <- AttData[order(group, ISOweek),]
# Extreme temperature data
ET_data <- data.table::data.table(ET_data)
if (sum(colnames(ET_data) %in% c('ISOweek', 'ET')) != 2) {
stop('Columns ISOweek, deaths not in ET_date')
}
if ((min(ET_data$ISOweek) > StartWeek) | (max(ET_data$ISOweek) < EndWeek)) {
stop(paste("ET_data do not cover", StartWeek, "to", EndWeek))
}
AttData <- merge(AttData, ET_data[order(ISOweek), ], by = "ISOweek")
# Indicator data
for (i in indicators) {
X <- try(data.table::data.table(eval(parse(text = paste0(i, '_data')))))
if (inherits(X, "try-error")) {
stop(paste0("Could not read ", i, "_data"))
}
if (sum(colnames(X) %in% c('group', 'ISOweek', i)) != 3) {
stop(paste0('Columns group, ISOweek, ', i, 'not in ', i, '_date'))
}
for (g in groups) {
if (!(g %in% unique(X$group))) {
stop(paste0("group ", g, " not in ", i, "_data"))
}
if ((min(X[g == group,]$ISOweek) > StartWeek) | (max(X[g == group,]$ISOweek) < EndWeek)) {
stop(paste0(i, "_data ", g, " do not cover ", StartWeek, " to ", EndWeek))
}
}
AttData <- merge(AttData, X[order(group, ISOweek),], by = c("group", "ISOweek"))
rm(X)
}
# Prepare data ------------------------------------------------------------
AttData[, `:=`(season = as.numeric(substr(ISOweek,1,4)) - (as.numeric(substr(ISOweek,7,8)) < 27),
summer = as.numeric((21 <= as.numeric(substr(ISOweek,7,8))) & (as.numeric(substr(ISOweek,7,8)) <= 39)),
winter = as.numeric((20 >= as.numeric(substr(ISOweek,7,8))) | (as.numeric(substr(ISOweek,7,8)) >= 40)))]
# Warm/cold summer/winter
AttData[, `:=`(cold_summer = -((ET < 0) * ET) * summer,
warm_summer = ((ET > 0) * ET) * summer,
cold_winter = -((ET < 0) * ET) * winter,
warm_winter = ((ET > 0) * ET) * winter)]
AttData[, `:=`(summer = NULL, winter = NULL)]
# lags
for (i in c(indicators, c('cold_summer', 'warm_summer', 'cold_winter', 'warm_winter'))) {
for (l in 0:lags) {
expr <- parse(text = paste0(i, "_d", l, " := shift(", i, ", ", l, ", type = 'lag')"))
AttData[, eval(expr), by = group]
}
}
# indicator by season
for (i in indicators) {
for (l in 0:lags) {
for (s in unique(AttData$season)) {
expr <- parse(text = paste0(i, "_d", l, "_", s, " := ifelse(", s, " == season, ", i, "_d", l, ", 0)"))
AttData[, eval(expr), by = group]
}
expr <- parse(text = paste0(i, "_d", l, " := NULL"))
AttData[, eval(expr)]
}
}
AttData[, `:=`(cold_summer= NULL, warm_summer = NULL, cold_winter = NULL, warm_winter = NULL)]
AttData[is.na(AttData)] <- 0
# baseline
AttData[, wk := as.numeric(as.factor(ISOweek))]
AttData[, `:=`(const = 1,
sin52 = sin((2*pi/(365.25/7)) * wk),
cos52 = cos((2*pi/(365.25/7)) * wk),
sin26 = sin((4*pi/(365.25/7)) * wk),
cos26 = cos((4*pi/(365.25/7)) * wk))]
# Prediction data ---------------------------------------------------------
# baseline
AttData.B <- copy(AttData)
for (l in 0:lags) {
for (i in c('cold_summer', 'warm_summer', 'cold_winter', 'warm_winter')) {
expr <- parse(text = paste0(i, "_d", l, ":= 0"))
AttData.B[, eval(expr)]
}
for (i in indicators) {
for (s in unique(AttData$season)) {
expr <- parse(text = paste0(i, "_d", l, "_", s, " := 0"))
AttData.B[, eval(expr)]
}
}
}
# ET
AttData.ET <- copy(AttData)
AttData.ET[, `:=`(const = 0, wk = 0, sin52 = 0, cos52 = 0, sin26 = 0, cos26 = 0)]
for (i in indicators) {
for (l in 0:lags) {
for (s in unique(AttData$season)) {
expr <- parse(text = paste0(i, "_d", l, "_", s, " := 0"))
AttData.ET[, eval(expr)]
}
}
}
# indicators
for (i in indicators) {
X <- copy(AttData)
X[, `:=`(const = 0, wk = 0, sin52 = 0, cos52 = 0, sin26 = 0, cos26 = 0)]
for (l in 0:lags) {
for (ir in c('cold_summer', 'warm_summer', 'cold_winter', 'warm_winter')) {
expr <- parse(text = paste0(ir, "_d", l, ":= 0"))
X[, eval(expr)]
}
for (ir in indicators[indicators != i]) {
for (s in unique(AttData$season)) {
expr <- parse(text = paste0(ir, "_d", l, "_", s, " := 0"))
X[, eval(expr)]
}
}
}
assign(paste0("AttData.", i), X)
}
rm(X)
# Estimation --------------------------------------------------------------
# V <- vcov(m) # covariance matrix
# non-baseline parameters
parm <- paste(grep("_d[0-9]", names(AttData), value=TRUE), collapse = " + ")
for (g in groups) {
print(paste("### Group", g, "###"))
f <- paste(c("deaths ~ -1 + const + wk", "sin52 + cos52", "sin26 + cos26", parm), collapse = " + ")
m <- try(glm(f, quasipoisson(identity), data = AttData[group == g,]), silent = TRUE)
if ((!inherits(m, "try-error")) & (median(AttData[group == g,]$deaths) > 0)) {
if (m$converged) {
fa <- paste(c("deaths ~ -1 + const + wk", parm), collapse = " + ")
ma <- glm(fa, quasipoisson(identity), data = AttData[group == g,])
if (anova(m, ma, dispersion = max(1, sum(residuals(m, type = "deviance")^2)/df.residual(m)), test="LRT")$`Pr(>Chi)`[2] > max(p52, p26)) {
m <- ma
} else {
fa <- paste(c("deaths ~ -1 + const + wk", "sin52 + cos52", parm), collapse = " + ")
ma <- glm(fa, quasipoisson(identity), data = AttData[group == g,])
if (anova(m, ma, dispersion = max(1, sum(residuals(m, type = "deviance")^2)/df.residual(m)), test="LRT")$`Pr(>Chi)`[2] > p26) {
m <- ma
} else {
fa <- paste(c("deaths ~ -1 + const + wk", parm), collapse = " + ")
ma <- glm(fa, quasipoisson(identity), data = AttData[group == g,])
if (anova(m, ma, dispersion = max(1, sum(residuals(m, type = "deviance")^2)/df.residual(m)), test="LRT")$`Pr(>Chi)`[2] > p52) {
m <- ma
}
}
}
} else {
f <- paste(c("deaths ~ -1 + const + wk", parm), collapse = " + ")
m <- try(glm(f, quasipoisson(identity), data = AttData[group == g,]), silent = TRUE)
if ((inherits(m, "try-error")) | (!m$converged)) {
print(paste("The model did not converge. A Simple model with only trend used i.e. no effect of indicators"))
f <- paste(c("deaths ~ -1 + const + wk"))
m <- glm(f, quasipoisson(identity), data = AttData[group == g,])
}
}
} else {
if (median(AttData[group == g,]$deaths) == 0) {msg <- "Zero inflated." } else {msg <- NULL}
if (inherits(m, "try-error")) print(paste("Could not fit model.", msg, "Simple model with only trend used i.e. no effect of indicators"))
f <- paste(c("deaths ~ -1 + const + wk"))
m <- glm(f, quasipoisson(identity), data = AttData[group == g,])
}
# Remove NA colinearity
f <- paste("deaths ~ -1 +", paste(names(m$coefficients[!is.na(m$coefficients)]), collapse = ' + '))
m <- glm(f, quasipoisson(identity), data = AttData[group == g,])
print(summary(m, dispersion = max(1, sum(residuals(m, type = "deviance")^2)/df.residual(m))))
# Predictions -------------------------------------------------------------
# Prediction baseline
AttData[group == g, `:=`(EB = predict.glm(m, newdata = AttData.B[group == g,], se.fit=TRUE)$fit)]
AttData[group == g, `:=`(VEB = (max(1, sum(residuals(m)^2)/df.residual(m)))*EB +
predict.glm(m, newdata = AttData.B[group == g,], dispersion = max(1, sum(residuals(m)^2)/df.residual(m)), se.fit=TRUE)$se.fit^2)]
# Prediction ET
AttData[group == g, `:=`(EET = predict.glm(m, newdata = AttData.ET[group == g,], se.fit=TRUE)$fit,
VEET = predict.glm(m, newdata = AttData.ET[group == g,], dispersion = max(1, sum(residuals(m)^2)/df.residual(m)), se.fit=TRUE)$se.fit^2)]
# Predictions indicators
for (i in indicators) {
expr <- parse(text = paste0("`:=`(E", i, " = predict.glm(m, newdata = AttData.", i, "[group == g,], se.fit=TRUE)$fit,
VE", i, " = predict.glm(m, newdata = AttData.", i, "[group == g,], dispersion = max(1, sum(residuals(m)^2)/df.residual(m)), se.fit=TRUE)$se.fit^2)"))
AttData[group == g, eval(expr)]
}
# Adjusted baseline
X <- copy(AttData)
for (i in indicators) {
for (l in 0:lags) {
for (s in unique(AttData$season)) {
expr <- parse(text = paste0(i, "_d", l, "_", s, " := ifelse(E", i," >= 0, 0,", i, "_d", l, "_", s,")"))
X[, eval(expr)]
}
}
}
AttData[group == g, `:=`(EAB = predict.glm(m, newdata = X[group == g,], se.fit=TRUE)$fit)]
AttData[group == g, `:=`(VEAB = (max(1, sum(residuals(m)^2)/df.residual(m)))*EAB +
predict.glm(m, newdata = X[group == g,], dispersion = max(1, sum(residuals(m)^2)/df.residual(m)), se.fit=TRUE)$se.fit^2)]
# Adjusted predictions indicators
for (i in indicators) {
X <- copy(AttData)
X[, `:=`(const = 0, wk = 0, sin52 = 0, cos52 = 0, sin26 = 0, cos26 = 0)]
for (l in 0:lags) {
for (ir in c('cold_summer', 'warm_summer', 'cold_winter', 'warm_winter')) {
expr <- parse(text = paste0(ir, "_d", l, ":= 0"))
X[, eval(expr)]
}
for (s in unique(AttData$season)) {
for (ir in indicators[indicators != i]) {
expr <- parse(text = paste0(ir, "_d", l, "_", s, " := 0"))
X[, eval(expr)]
}
expr <- parse(text = paste0(i, "_d", l, "_", s, " := ifelse(E", i," < 0, 0,", i, "_d", l, "_", s,")"))
X[, eval(expr)]
}
}
expr <- parse(text = paste0("`:=`(EA", i, " = predict.glm(m, newdata = X[group == g,], se.fit=TRUE)$fit,
VEA", i, " = predict.glm(m, newdata = X[group == g,], dispersion = max(1, sum(residuals(m)^2)/df.residual(m)), se.fit=TRUE)$se.fit^2)"))
AttData[group == g, eval(expr)]
}
rm(X)
}
# Clean up
AttData[, `:=`(const = NULL, wk = NULL, sin52 = NULL, cos52 = NULL, sin26 = NULL, cos26 = NULL)]
for (l in 0:lags) {
for (i in c('cold_summer', 'warm_summer', 'cold_winter', 'warm_winter')) {
expr <- parse(text = paste0(i, "_d", l, " := NULL"))
AttData[, eval(expr)]
}
for (i in indicators) {
for (s in unique(AttData$season)) {
expr <- parse(text = paste0(i, "_d", l, "_", s, " := NULL"))
AttData[, eval(expr)]
}
}
}
AttData[, season := NULL]
# rm(f, fa, expr, parm, g, i, l, m, ma, AttData.B, AttData.ET)
rm(fa, expr, parm, g, i, l, ma, AttData.B, AttData.ET)
# Pooled total ------------------------------------------------------------
if (!is.null(pooled)) {
pooledData <- AttData[group %in% pooled,
.(group = 'TotalPooled',
deaths = sum(deaths, na.rm = TRUE),
ET = mean(ET, na.rm = TRUE),
EB = sum(EB, na.rm = TRUE),
VEB = sum(VEB, na.rm = TRUE),
EET = sum(EET, na.rm = TRUE),
VEET = sum(VEET, na.rm = TRUE),
EAB = sum(EAB, na.rm = TRUE),
VEAB = sum(VEAB, na.rm = TRUE)
), keyby = ISOweek]
for (i in indicators) {
expr <- parse(text = paste0(".(", i, " = NA,
E", i, " = sum(E", i, ", na.rm = TRUE),
VE", i, " = sum(VE", i, ", na.rm = TRUE),
EA", i, " = sum(EA", i, ", na.rm = TRUE),
VEA", i, " = sum(VEA", i, ", na.rm = TRUE))"))
pooledData <- merge(pooledData, AttData[group %in% pooled, eval(expr), keyby = ISOweek], by = "ISOweek", all.x = TRUE)
}
AttData <- rbind(AttData, pooledData)
}
AttData <- cbind(country, AttData)
AttData <- AttData[order(country, group, ISOweek)]
return(AttData)
}
# # Residual autocorellation and heteroskedasticity
# g <- 'Total'
# parm <- paste(grep("_d[0-9]", names(AttData), value=TRUE), collapse = " + ")
# f <- paste(c("deaths ~ 1 + wk", "sin52 + cos52", "sin26 + cos26", parm), collapse = " + ")
# m<- glm(f, quasipoisson(identity), data = AttData[group == g,])
# summary(m)
# res <- data.table(res = m$residuals)
# res$n <- 1:nrow(res)
# X <- acf(res$res)
# mod2 = lm(res ~ n, data = res)
# summary(mod2)
# plot(res$res ~ res$n)
# Get R2 - don't work in function
# print(logLik(m, REML = FALSE))
# X <- subset(AttData, group == "Total")
# m <- glm2::glm2(f, quasipoisson(identity), data = AttData[which(AttData$group == "Total"),])
# R2 <- rsq::rsq(m, adj = FALSE, type = 'v')
# Adj.R2 <- rsq::rsq(m, adj = TRUE, type = 'v')
# print(paste("R2:", R2))
# print(paste("Adj.R2:", Adj.R2))
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.