R/plotSimulation.R

Defines functions .simulation_age_group plotSimulation

library(dplyr)
library(magrittr)
load("data/education_data.rda")
load("data/mortality_data.rda")

.simulation_age_group <- function(simulation_age) {
    if(simulation_age == 000){
        "< 1 year"
    } else if(between(simulation_age, 001, 004)) {
        "1-4 years"
    } else if(between(simulation_age, 005, 009)) {
        "5-9 years"
    } else if(between(simulation_age, 010, 014)) {
        "10-14 years"
    } else if(between(simulation_age, 015, 019)) {
        "15-19 years"
    } else if(between(simulation_age, 020, 024)) {
        "20-24 years"
    } else if(between(simulation_age, 025, 034)) {
        "25-34 years"
    } else if(between(simulation_age, 035, 044)) {
        "35-44 years"
    } else if(between(simulation_age, 045, 054)) {
        "45-54 years"
    } else if(between(simulation_age, 055, 064)) {
        "55-64 years"
    } else if(between(simulation_age, 065, 074)) {
        "65-74 years"
    } else if(between(simulation_age, 075, 084)) {
        "75-84 years"
    } else if(simulation_age >= 085) {
        "85+ years"
    } else {
        "Not Stated"
    }
}

#' plotSimulation
#'
#' @return
#' @export
#'
#' @examples
#'
#' @import dplyr
plotSimulation <- function() {
    years <- c(1940, 1941)
    races <- c("Black or African American", "Other Race", "White")
    sexes <- c("Female", "Male")

    set.seed(1)

    theta <-
        rnorm(1000) %>%
        pnorm()

    simulation <- data.frame()

    for (i in years) {
        for (j in races) {
            for (k in sexes) {
                DOB <- rep(i, 1000)
                race <- rep(j, 1000)
                sex <- rep(k, 1000)
                simulation <-
                    data.frame(DOB, race, sex, theta) %>%
                    rbind(simulation)
            }
        }
    }

    simulation <- cbind(simulation, education = NA, DOD = NA) %>%
        arrange(DOB, race, sex, theta)

    for (i in years) {
        for (j in races) {
            for (k in sexes) {
                stop("JK")

                i <- as.integer(i)

                message(i, " ", j, " ", k)

                persons <-
                    simulation %>%
                    filter(DOB <= i) %>%
                    filter(race == j) %>%
                    filter(sex == k) %>%
                    filter(is.na(DOD)) %>%
                    mutate(age_group = sapply(i - DOB, .simulation_age_group)) %>%
                    arrange(DOB, race, sex, theta)

                education <-
                    education_data %>%
                    filter(Year == ifelse(i < 1962, round(i * 0.1) * 10, i)) %>%
                    filter(Race == j) %>%
                    filter(Sex == k) %>%
                    mutate(Population = ifelse(!is.na(CPS_Population),
                                               CPS_Population,
                                               Census_Population)) %>%
                    group_by(Age_Group) %>%
                    mutate(Sum = sum(Population)) %>%
                    group_by(Age_Group, Education) %>%
                    summarise(Proportion = Population / Sum) %>%
                    filter(Age_Group >= "25-34 years")

                educate <- integer()

                for (x in unique(persons$age_group)) {
                    status <-
                        education %>%
                        filter(Age_Group == x) %>%
                        arrange(desc(Education)) %>%
                        mutate(Number = round(Proportion * nrow(persons))) %$%
                        rep(Education, Number)

                    if(length(status) > nrow(persons)) {
                        educate <-
                            status[1:nrow(persons)] %>%
                            c(educate, .)
                    } else if(length(status) < nrow(persons)) {
                        educate <-
                            rep(status[1], nrow(persons) - length(status)) %>%
                            c(status) %>%
                            c(educate, .)
                    } else {
                        educate <-c(educate, status)
                    }

                }

                mortality <-
                    mortality_data %>%
                    filter(Year == i) %>%
                    filter(Race == j) %>%
                    filter(Sex == k) %>%
                    filter(Age_Group != "Not Stated") %>%
                    mutate(Proportion = Deaths / Population) %>%
                    select(Age_Group, Proportion)

                kill <- integer()

                for(y in unique(persons$age_group)) {
                    status <-
                        mortality %>%
                        filter(Age_Group == y) %>%
                        mutate(Number = round(Proportion * nrow(persons))) %$%
                        rep(i, Number)

                    if(length(status) > nrow(persons)) {
                        kill <-
                            status[1:nrow(persons)] %>%
                            c(kill, .)
                    } else if(length(status) < nrow(persons)) {
                        kill <-
                            rep(NA, nrow(persons) - length(status)) %>%
                            c(status, .) %>%
                            c(kill, .)
                    } else if(length(status) == nrow(persons)) {
                        kill <-c(kill, status)
                    }
                }

                persons$education <- educate
                persons$DOD <- kill
                persons <- persons[, 1:6]
                simulation <-
                    full_join(simulation, persons, c("DOB", "race", "sex", "theta")) %>%
                    mutate(education = education.y) %>%
                    mutate(DOD = DOD.y) %>%
                    arrange(DOB, race, sex, theta) %>%
                    select(DOB, race, sex, theta, education, DOD)
            }
        }
    }

}
schifferl/LagSelectionBias documentation built on May 29, 2019, 3:38 p.m.