knitr::opts_chunk$set(echo = TRUE)
library(tidyverse) library(flm.NE.WNV) library(MuMIn) # Read in population data pop = flm.NE.WNV::NE_county_pops %>% filter(year >= 2002) %>% as.data.frame() county_lookup <- pop %>% filter(year == "2010") %>% select(County, fips)%>% as.data.frame() #Human cases, simulated, based on predicted values from the fitted model. cases = flm.NE.WNV::sampledat %>% select(County, year, cases)%>% # # drop Arthur back to 0 to test new zero filling methods # mutate(cases = case_when(County == "Arthur" ~ 0, # TRUE ~ cases)) %>% as.data.frame() # Read in environmental data NEdat = flm.NE.WNV::NEdat %>% rename(tmean = temp) %>% mutate(fips = sprintf("31%03d", cofips)) %>% left_join(county_lookup, by = "fips") %>% select(County, fips, year, month, tmean, ppt)%>% as.data.frame() # Read in spi spi = flm.NE.WNV::spi %>% mutate(County = trimws(County), County = gsub(" ", "", County)) %>% left_join(county_lookup, by = "County") %>% rename(spi = spi1) %>% select(County, fips, year, month, spi)%>% as.data.frame() # Read in spei spei = flm.NE.WNV::spei %>% mutate(County = trimws(County), County = gsub(" ", "", County)) %>% left_join(county_lookup, by = "County") %>% rename(spei = spei1) %>% select(County, fips, year, month, spei)%>% as.data.frame() # Specify the last date to use -- package data incomplete for 2019 target.date = "2016-02-01" #target.date = "2019-03-01" -- this should fail with an error # Specify the first year of data to use start.year = 2002 # Specify lag lengths to use lag.lengths = c(12, 18) #c(12, 18, 24, 30, 36) #For consistent results with simulated data in.seed = 4872957 # set = NULL if you have actual case data
Our approach struggles if there are counties with no cases, so we eliminate those counties (two in the simulated data).
data <- assemble.data.lags(pop, cases, NEdat, spi, spei, target.date, start.year, in.seed, lag.lengths) allLagsT <- data[[1]] # 2004 - 2017 allLags0 <- data[[2]] # 2018 # how many counties lack cases? Should be none. allLagsT %>% group_by(County) %>% summarize(total_cases = sum(cases)) %>% filter(total_cases == 0) allLagsT %>% group_by(County) %>% summarize(total_cases = sum(cases)) %>% filter(total_cases > 0) # to identify counties to fill in allunits <- unique(cases$County) missingunits <- !(allunits %in% unique(allLagsT$County)) allunits[missingunits]
What does the state total look like plotted against time?
allLagsT %>% group_by(year) %>% summarize(total_cases = sum(cases)) %>% ggplot() + geom_point(mapping = aes(x = year, y = total_cases))
This chunk assembles data and runs models, using all combinations of lagged data for mean temperature, precipitation, SPI and SPEI.
Note: It takes about 5 minutes to run the models with a lag of 9. It takes 25 minutes to run all the models with the set of default lags.
flm.results = call.flm(pop, cases, NEdat, spi, spei, target.date, start.year, in.seed = in.seed, lag.lengths = 12, fillzeros = TRUE, nsim = 10, predict_from = "best")
top <- flm.results[[3]]$AICbest mod <- summary(flm.results[[3]]$fittedModels[[top]]) mod
flm.results[[1]][[top]]
dist_lag_terms <- extract_functional(flm.results[[3]]$fittedModels[[top]]) %>% mutate(lcl = fit - 1.96*se, ucl = fit + 1.96*se) ggplot(data = dist_lag_terms) + geom_line(mapping = aes(x = x, y = fit)) + geom_ribbon(mapping = aes(x = x, ymin = lcl, ymax = ucl), alpha = 0.2)+ facet_wrap(~label)
aictable <- model.sel(flm.results[[3]]$fittedModel)
model.sims <- map(flm.results[[3]]$fittedModels, simulate, nsim = 100) names(model.sims) <- 1:64 model.crps <- map(model.sims, ~scoringRules::crps_sample(allLagsT$cases, .)) %>% map(~bind_cols(allLagsT[,c("County","year")], crps = .)) crps.table <- model.crps %>% bind_rows(.id = "model") %>% group_by(model) %>% summarize(mean_crps = mean(crps), se_crps = sd(crps)/sqrt(n())) %>% arrange(mean_crps) %>% mutate(order = row_number()) ggplot(crps.table, mapping = aes(x = order, y = mean_crps)) + geom_point()
Are they related to the AIC scores?
crps.table$AICc <- flm.results[[3]]$AICfits[as.numeric(crps.table$model)] ggplot(data = crps.table, mapping = aes(x = mean_crps, y = AICc)) + geom_point()
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.