knitr::opts_chunk$set(echo = TRUE)
library(tidyverse)
library(magrittr)
library(janitor)
library(knitr)
library(kableExtra)
#source("R/helpers.R")
data <- read_rds("data/munged.RDS")
expl <- names(data)[1:20]
expl <- set_names(expl,expl)
response <- names(data)[21:34]
response <- set_names(response,response)
plot_summary <- function(x,y){
  ggplot(data, aes_string(x = x, y = y)) + stat_summary(fun.y = "sum", geom = "bar", position = "identity") + scale_y_continuous(labels = scales::comma) + coord_flip()
}

summary_plots <- map(response, ~map(expl, plot_summary, y = .x))
summary_plots_names <- imap(summary_plots, ~paste0(.y, "_", names(.x), ".png")) %>% flatten()
walk2(summary_plots_names, flatten(summary_plots), ~ggsave(filename = .x, plot = .y,path = "plots"))
plot_heatmap <- function(group_x, numerator = number_of_deaths, denominator = expected_death_qx2015vbt_by_policy){
  group_x <- sym(group_x)
  numerator <- enquo(numerator)
  denominator <- enquo(denominator)
  data %>%
  select(!! group_x, issue_age, duration, !! numerator, !! denominator) %>% 
  group_by(!! group_x, issue_age, duration) %>% 
  summarise(ae = sum(!! numerator) / sum(!! denominator)) %>% 
  ungroup() %>% 
  nest(issue_age, duration, ae) %>% 
  mutate(plot = map2(data, !! group_x, ~ggplot(.x, aes(issue_age, duration)) +
                      geom_raster(aes(fill = ae)) + scale_fill_gradient2(limits = c(0,2), midpoint = 1) +
                      ggtitle(paste(group_x, .y), subtitle = denominator)
                    )
         ) %>% 
  select(!! group_x, plot)
}

heatmap_list <- data %>% select(1:20) %>% select(-duration, -issue_age, -duration_group, -issue_age_group) %>% names()
heatmap_list <- set_names(heatmap_list, heatmap_list)

heatmap<- heatmap_list %>% map(~plot_heatmap(.x))

heatmap <- seq(1:length(heatmap_list)) %>% map(~gather(heatmap[[.x]], key = "key", value = "value", 1))
heatmap_names <- map(heatmap, ~map2(.x$key, .x$value, ~paste0(.x, "_", .y))) %>% flatten() %>% make.names()
heatmap_names <- gsub("\\.", "_", heatmap_names)
heatmap <- map(heatmap, ~.x$plot) %>% flatten()
walk2(heatmap_names, heatmap, ~ggsave(filename = paste0(.x, ".png"), plot = .y, path = "plots/heatmaps"))

Heatmaps seem to be showing higher levels of mortality in issue age 25. Need to verify.

data %>% filter(issue_age %in% c(24,25,26)) %>% group_by(issue_age, duration_group) %>% summarise(ae = sum(number_of_deaths) / sum(expected_death_qx2015vbt_by_policy)) %>% ggplot(aes(duration_group, ae)) + geom_col() + facet_wrap(~issue_age)

Issue Age 25 seems to be strange. Why signficantly higher AE compared to 24 or 26?

data %>% filter(issue_age == 25) %>% group_by(issue_age, issue_year) %>% summarise(policies_exposed = sum(policies_exposed)) %>% ggplot(aes(issue_year, policies_exposed)) + geom_col() + coord_flip()

data %>% filter(issue_age == 25) %>% group_by(issue_age, issue_year) %>% summarise(deaths = sum(number_of_deaths)) %>% ggplot(aes(issue_year, deaths)) + geom_col() + coord_flip()

data %>% filter(issue_age == 25) %>% group_by(issue_age, issue_year) %>% summarise(ae = sum(number_of_deaths) / sum(expected_death_qx2015vbt_by_policy)) %>% ggplot(aes(issue_year, ae)) + geom_col() + coord_flip()

data %>% filter(issue_age == 25) %>% group_by(issue_age, issue_year) %>% summarise(ae = sum(number_of_deaths) / sum(expected_death_qx2008vbt_by_policy)) %>% ggplot(aes(issue_year, ae)) + geom_col() + coord_flip()

data %>% filter(issue_age == 25) %>% group_by(issue_age, issue_year) %>% summarise(ae = sum(number_of_deaths) / sum(expected_death_qx2001vbt_by_policy)) %>% ggplot(aes(issue_year, ae)) + geom_col() + coord_flip()

Looks like significant unexpected deaths for issue years in the 2000's? Early durations would have higher ae ratios so that's not necessarily abnormal, but it still seems high given the young age.

data %>% filter(issue_age <30 & preferred_indicator == 1 & smoker_status == "NonSmoker" & preferred_class == 2 & number_of_preferred_classes == 2 & insurance_plan == "Term" & duration < 10) %>% group_by(issue_age, duration_group) %>% summarise(deaths = sum(number_of_deaths), expected = sum(expected_death_qx2015vbt_by_policy), policies = sum(policies_exposed), qx = scales::percent(deaths / policies), ae = sum(number_of_deaths) / sum(expected_death_qx2015vbt_by_policy)) %>% arrange(desc(qx))

Wittled down the dataset to what I assume is standard nonsmoker? Issue age 25 has abnormally high amount of deaths compared to other issue ages less than 30. Reasons? Error? Opiods? It could fit the time frame and general age range of 25 - 35 that would likely use opiods, but doesn't explain why other issue ages aren't showing abnormally high values. Over 1 death per 1000.

data %>% filter(issue_age < 75 & duration < 10) %>% group_by(issue_age, duration_group) %>% summarise(deaths = sum(number_of_deaths), expected = sum(expected_death_qx2015vbt_by_policy), policies = sum(policies_exposed), qx = scales::percent(deaths / policies), ae = sum(number_of_deaths) / sum(expected_death_qx2015vbt_by_policy)) %>% arrange(desc(ae))

Issue age 25 makes up 50% of the top ae's for issue ages less than 75 and durations less than 10. Data error?

data %>% filter(issue_age %in% c(24, 25, 26) & duration < 10) %>% group_by(issue_age, duration_group) %>% summarise(deaths = sum(number_of_deaths), expected = sum(expected_death_qx2015vbt_by_policy), policies = sum(policies_exposed), qx = scales::percent(deaths / policies), ae = sum(number_of_deaths) / sum(expected_death_qx2015vbt_by_policy)) %>% ggplot(aes(duration_group, policies)) + geom_col() + facet_wrap(~issue_age)

Issue age 25 has similar numbers of policies exposed as 24 and 26. So unlikely a data grouping issue. Maybe deaths are grouped into issue age 25?

data %>% filter(issue_age < 75 & duration < 10) %>% group_by(issue_age) %>% summarise(deaths = sum(number_of_deaths), expected = sum(expected_death_qx2015vbt_by_policy), policies = sum(policies_exposed), qx = (deaths / policies), ae = sum(number_of_deaths) / sum(expected_death_qx2015vbt_by_policy)) %>% ggplot(aes(issue_age, ae)) + geom_col()


Houstonwp/soadac_2018 documentation built on May 9, 2019, 3:25 a.m.