predictivemodels/ForHealthImpacts.R

# Set up libraries you'll need (you'll need to install with `install.packages`
# before you can use for the first time)
library(devtools)
library(dplyr)
library(tree)
library(randomForest)
library(gbm)
library(readxl)

# Load predictive models
load("predictivemodels/unpr.tree.rose.RData")
load("predictivemodels/bag.tree.rose.RData")
load("predictivemodels/boost.tree.rose.RData")
load("predictivemodels/rf_mod.RData")

# Load data on population projections and land area
proj_pops <- read_excel("predictivemodels/CountyPop_Brooke.xlsx")
colnames(proj_pops) <- gsub(" ", "", colnames(proj_pops))
land_area <- read.csv("predictivemodels/land_area.csv",
                      header = TRUE, as.is = TRUE)
load("predictivemodels/base_deaths.RData")

# Function to pull population data based on a start year
pull_proj_pops <- function(proj_pops, start_year){
        end_year <- start_year + 19
        year_range <- paste(c(start_year, end_year), collapse = "-")
        pop_data <- proj_pops
        colnames(pop_data)[colnames(pop_data) == year_range] <- "pop"

        pop_data <- pop_data %>%
                dplyr::select(city, pop) %>%
                dplyr::group_by(city) %>%
                dplyr::summarize(pop100 = sum(pop)) %>%
                dplyr::left_join(land_area, by = "city") %>%
                dplyr::mutate(pop.density = pop100 / arealand) %>%
                dplyr::select(-arealand) %>%
                dplyr::left_join(base_deaths, by = "city")

        return(pop_data)
}

## ------------------------------------------------------------------------

# Code to run to get results

out <- "~/tmp/results"  ## Replace with the path to where you have heatwave
                        ## dataframes stored

# Predict frequency of very dangerous heatwaves using the bagging model
apply_all_models(out = out, FUN = "tree_frequency", start_year = 2076)
apply_all_models(out = out, FUN = "tree_frequency",
                 city_specific = TRUE, start_year = 1985)

# Predict exposure (person-days) to very dangerous heatwaves using the bagging
# model
apply_all_models(out = out, FUN = "tree_exposure", start_year = 1985)
apply_all_models(out = out, FUN = "bag_exposure",
                 city_specific = TRUE, start_year = 1985)

# Predict exposure (days) to very dangerous heatwaves using the bagging model
apply_all_models(out = out, FUN = "bag_days", start_year = 1985)
apply_all_models(out = out, FUN = "bag_days",
                 city_specific = TRUE, start_year = 1985)

# Predict excess deaths
apply_all_models(out = out, FUN = "rf_excess_deaths", start_year = 1985)
apply_all_models(out = out, FUN = "rf_excess_deaths",
                 city_specific = TRUE, start_year = 1985)

# Predict excess deaths with a simple model (assume all heat waves increase)
# relative risk 1.057763
apply_all_models(out = out, FUN = "simple_excess_deaths", start_year = 1985)
apply_all_models(out = out, FUN = "simple_excess_deaths",
                 city_specific = TRUE, start_year = 1985)

# Example of saving model results to file
to_save <- apply_all_models(out = out, FUN = "bag_frequency",
                            city_specific = TRUE, start_year = 1985)
write.csv(to_save, file = "~/tmp/To_Save.csv", ## Replace with filename you want
          row.names = FALSE)

## ------------------------------------------------------------------------

# Functions you'll apply across all the heatwaves

custom_tree_frequency <- function(hw_datafr){
        predictions <- ifelse(hw_datafr$max.temp.quantile >= 0.9989,
                              "very", "other" )
        adj_very <-adj_for_precision(predictions = predictions,
                                     precision = 0.012,
                                     false_omission = 0.0)
        return(adj_very)
}

custom_tree_exposure <- function(hw_datafr, start_year){
        hw_datafr <- add_pop_area(hw_datafr, start_year = start_year)

        predictions <- ifelse(hw_datafr$max.temp.quantile >= 0.9989,
                              "very", "other" )

        adj_exp <- process_exposure(hw_datafr = hw_datafr,
                                    prediction = predictions,
                                    precision = 0.012,
                                    false_omission = 0.0)
        return(adj_exp)
}

custom_tree_days <- function(hw_datafr, start_year){
        hw_datafr <- add_pop_area(hw_datafr, start_year = start_year)

        predictions <- ifelse(hw_datafr$max.temp.quantile >= 0.9989,
                              "very", "other" )

        adj_days <- process_days(hw_datafr = hw_datafr,
                                 prediction = predictions,
                                 precision = 0.012,
                                 false_omission = 0.0)
        return(adj_days)
}

tree_frequency <- function(hw_datafr, start_year){
        hw_datafr <- add_pop_area(hw_datafr, start_year = start_year)

        predictions <- predict(unpr.tree.rose,
                               newdata = hw_datafr,
                               type = "class")

        adj_very <-adj_for_precision(predictions = predictions,
                                     precision = 0.026,
                                     false_omission = 0.0)
        return(adj_very)
}

tree_exposure <- function(hw_datafr, start_year){
        hw_datafr <- add_pop_area(hw_datafr, start_year = start_year)

        predictions <- predict(unpr.tree.rose,
                               newdata = hw_datafr,
                               type = "class")

        adj_exp <- process_exposure(hw_datafr = hw_datafr,
                                    prediction = predictions,
                                    precision = 0.026,
                                    false_omission = 0.0)
        return(adj_exp)
}

tree_days <- function(hw_datafr, start_year){
        hw_datafr <- add_pop_area(hw_datafr, start_year = start_year)

        predictions <- predict(unpr.tree.rose,
                               newdata = hw_datafr,
                               type = "class")

        adj_days <- process_days(hw_datafr = hw_datafr,
                                 prediction = predictions,
                                 precision = 0.026,
                                 false_omission = 0.0)
        return(adj_days)
}

bag_frequency <- function(hw_datafr, start_year){
        hw_datafr <- add_pop_area(hw_datafr, start_year = start_year)

        predictions <- predict(bag.tree.rose,
                               newdata = hw_datafr)

        adj_very <-adj_for_precision(predictions = predictions,
                                     precision = 0.026,
                                     false_omission = 0.0)
        return(adj_very)
}

bag_exposure <- function(hw_datafr, start_year){
        hw_datafr <- add_pop_area(hw_datafr, start_year = start_year)

        predictions <- predict(bag.tree.rose,
                               newdata = hw_datafr)

        adj_exp <- process_exposure(hw_datafr = hw_datafr,
                                    prediction = predictions,
                                    precision = 0.026,
                                    false_omission = 0.0)
        return(adj_exp)
}

bag_days <- function(hw_datafr, start_year){
        hw_datafr <- add_pop_area(hw_datafr, start_year = start_year)

        predictions <- predict(bag.tree.rose,
                               newdata = hw_datafr)

        adj_days <- process_days(hw_datafr = hw_datafr,
                                 prediction = predictions,
                                 precision = 0.026,
                                 false_omission = 0.0)
        return(adj_days)
}

boost_frequency <- function(hw_datafr, start_year){
        hw_datafr <- add_pop_area(hw_datafr, start_year = start_year)

        predictions <- predict(boost.tree.rose,
                               newdata = hw_datafr,
                               n.trees = 500)
        predictions <- ifelse(predictions > 0, "very", "other")

        adj_very <-adj_for_precision(predictions = predictions,
                                     precision = 0.023,
                                     false_omission = 0.0)
        return(adj_very)
}

boost_exposure <- function(hw_datafr, start_year){
        hw_datafr <- add_pop_area(hw_datafr, start_year = start_year)

        predictions <- predict(boost.tree.rose,
                               newdata = hw_datafr,
                               n.trees = 500)
        predictions <- ifelse(predictions > 0, "very", "other")

        adj_exp <- process_exposure(hw_datafr = hw_datafr,
                                    prediction = predictions,
                                    precision = 0.023,
                                    false_omission = 0.0)
        return(adj_exp)
}

boost_days <- function(hw_datafr, start_year){
        hw_datafr <- add_pop_area(hw_datafr, start_year = start_year)

        predictions <- predict(boost.tree.rose,
                               newdata = hw_datafr,
                               n.trees = 500)
        predictions <- ifelse(predictions > 0, "very", "other")

        adj_days <- process_days(hw_datafr = hw_datafr,
                                 prediction = predictions,
                                 precision = 0.023,
                                 false_omission = 0.0)
        return(adj_days)
}

simple_excess_deaths <- function(hw_datafr, start_year){
        hw_datafr <- add_pop_area(hw_datafr, start_year = start_year) %>%
                dplyr::select(mean.temp, max.temp, min.temp, length,
                              start.doy, start.month, days.above.80,
                              days.above.85, days.above.90,
                              days.above.95, days.above.99th,
                              days.above.99.5th, first.in.season,
                              mean.temp.quantile, max.temp.quantile,
                              min.temp.quantile, mean.temp.1,
                              mean.summer.temp, pop100, pop.density,
                              mort_rate)

        pred_log_rr <- log(1.057763)

        hw_length <- hw_datafr$length
        base_mort <- hw_datafr$pop100 * hw_datafr$mort_rate

        exp_excess <- hw_length * base_mort * (exp(pred_log_rr) - 1)

        return(sum(exp_excess))
}

rf_excess_deaths <- function(hw_datafr, start_year){
        hw_datafr <- add_pop_area(hw_datafr, start_year = start_year) %>%
                dplyr::select(mean.temp, max.temp, min.temp, length,
                              start.doy, start.month, days.above.80,
                              days.above.85, days.above.90,
                              days.above.95, days.above.99th,
                              days.above.99.5th, first.in.season,
                              mean.temp.quantile, max.temp.quantile,
                              min.temp.quantile, mean.temp.1,
                              mean.summer.temp, pop100, pop.density,
                              mort_rate)

        pred_log_rr <- predict(rf_mod, newdata = hw_datafr)

        hw_length <- hw_datafr$length
        base_mort <- hw_datafr$pop100 * hw_datafr$mort_rate

        exp_excess <- hw_length * base_mort * (exp(pred_log_rr) - 1)

        return(sum(exp_excess))
}

# Helper functions for those functions (don't use directory in
# `apply_all_models`)
process_exposure <- function(hw_datafr, predictions, precision, false_omission){

        exp_projs <- data.frame(predictions = predictions,
                                length = hw_datafr$length,
                                pop = hw_datafr$pop100) %>%
                mutate(exposure = length * pop) %>%
                group_by(predictions) %>%
                summarise(exposure = sum(exposure))

        # Add "very" with no exposure if necessary
        if(!("very" %in% exp_projs$predictions)){
                exp_projs <- rbind(exp_projs,
                                   data.frame(predictions = "very",
                                              exposure = 0))
        }

        adj_exp <- exp_projs$exposure[exp_projs$predictions == "other"] *
                               false_omission +
                exp_projs$exposure[exp_projs$predictions == "very"] * precision

        return(adj_exp)
}

process_days <- function(hw_datafr, predictions, precision, false_omission){

        exp_projs <- data.frame(predictions = predictions,
                                length = hw_datafr$length) %>%
                rename(exposure = length) %>%
                group_by(predictions) %>%
                summarise(exposure = sum(exposure))

        # Add "very" with no exposure if necessary
        if(!("very" %in% exp_projs$predictions)){
                exp_projs <- rbind(exp_projs,
                                   data.frame(predictions = "very",
                                              exposure = 0))
        }

        adj_exp <- exp_projs$exposure[exp_projs$predictions == "other"] *
                false_omission +
                exp_projs$exposure[exp_projs$predictions == "very"] * precision

        return(adj_exp)
}

adj_for_precision <- function(predictions, precision, false_omission){
        tot_very <- sum(predictions == "very")
        tot_less <- sum(predictions == "less")
        out <- tot_very * precision + tot_less * false_omission
        return(out)
}

add_pop_area <- function(hw_datafr, start_year){
        hw_datafr$city <- as.character(hw_datafr$city)
        pop_data <- pull_proj_pops(proj_pops, start_year)

        hw_datafr <- left_join(hw_datafr, pop_data, by = "city")
        return(hw_datafr)
}

########################
## Make histogram for Fig. 1
########################
# example_ens <- read.csv("predictivemodels/1-4.csv", as.is = TRUE)
#
# example_ens <- add_pop_area(example_ens, start_year = 1985) %>%
#         dplyr::select(mean.temp, max.temp, min.temp, length,
#                       start.doy, start.month, days.above.80,
#                       days.above.85, days.above.90,
#                       days.above.95, days.above.99th,
#                       days.above.99.5th, first.in.season,
#                       mean.temp.quantile, max.temp.quantile,
#                       min.temp.quantile, mean.temp.1,
#                       mean.summer.temp, pop100, pop.density,
#                       mort_rate)
#
# pred_log_rr <- predict(rf_mod, newdata = example_ens)
# to_plot <- data.frame(rr = exp(pred_log_rr))
#
# library(ggplot2)
# library(ggthemes)
#
# pdf("predictivemodels/Fig1.pdf", width = 6, height = 3.7)
# ggplot(to_plot, aes(x = rr)) +
#         geom_histogram(bins = 30, fill = "gray", color = "black") +
#         xlab("Relative risk compared to non-heatwave day") +
#         ylab("# of heatwaves") +
#         geom_vline(aes(xintercept = 1.057), color = "darkred",
#                    alpha = 0.7, size = 2) +
#         theme_few()
# dev.off()

# load("predictivemodels/ListOfAllHeatwaves.Rdata")
# all.hws <- hw_data
#
# example_obs <- all.hws %>%
#         dplyr::select(mean.temp, max.temp, min.temp, length,
#                       start.doy, start.month, days.above.80,
#                       days.above.85, days.above.90,
#                       days.above.95, days.above.99th,
#                       days.above.99.5th, first.in.season,
#                       mean.temp.quantile, max.temp.quantile,
#                       min.temp.quantile, mean.temp.1,
#                       mean.summer.temp, pop100, pop.density)
#
# pred_log_rr <- predict(rf_mod, newdata = example_obs)
# to_plot <- data.frame(pred_rr = exp(pred_log_rr))
#
# library(ggplot2)
# library(ggthemes)
#
# # pdf("predictivemodels/Fig1_obs.pdf", width = 6, height = 3.7)
# png("predictivemodels/Fig1_obs.png", width = 800, height = 490)
# ggplot(to_plot, aes(x = pred_rr)) +
#         geom_histogram(bins = 30, fill = "black", alpha = 0.7) +
#         xlab("Relative risk compared to non-heatwave day") +
#         ylab("# of heatwaves") +
#         geom_vline(aes(xintercept = exp(mean(pred_log_rr))), color = "darkred",
#                    alpha = 0.7, size = 2) +
#         theme_few(base_size = 20)
# dev.off()
#
# all.hws <- cbind(all.hws, to_plot)
# worst_hws <- all.hws %>%
#         arrange(desc(pred_rr)) %>%
#         slice(1:50) %>%
#         mutate(type = "worst")
# least_hws <- all.hws %>%
#         arrange(pred_rr) %>%
#         slice(1:50) %>%
#         mutate(type = "least")
# extremes <- rbind(worst_hws, least_hws)
#
# ggplot(extremes, aes(x = mean.temp.quantile)) +
#         geom_histogram() +
#         facet_wrap(~ type, ncol = 1)
# ggplot(extremes, aes(x = days.above.80)) +
#         geom_histogram() +
#         facet_wrap(~ type, ncol = 1)
# ggplot(extremes, aes(x = max.temp.quantile)) +
#         geom_histogram() +
#         facet_wrap(~ type, ncol = 1)
# ggplot(extremes, aes(x = pop100)) +
#         geom_histogram() +
#         facet_wrap(~ type, ncol = 1)
# ggplot(extremes, aes(x = pop.density)) +
#         geom_histogram() +
#         facet_wrap(~ type, ncol = 1)
# ggplot(extremes, aes(x = days.above.99th)) +
#         geom_histogram() +
#         facet_wrap(~ type, ncol = 1)
# ggplot(extremes, aes(x = mean.temp.1)) +
#         geom_histogram() +
#         facet_wrap(~ type, ncol = 1)
# ggplot(extremes, aes(x = min.temp.quantile)) +
#         geom_histogram() +
#         facet_wrap(~ type, ncol = 1)
# ggplot(extremes, aes(x = start.doy)) +
#         geom_histogram() +
#         facet_wrap(~ type, ncol = 1)
# ggplot(extremes, aes(x = length)) +
#         geom_histogram() +
#         facet_wrap(~ type, ncol = 1)
geanders/futureheatwaves documentation built on May 17, 2019, 12:14 a.m.