library(magrittr)
library(plyr) # needed for ldply; must be loaded BEFORE dplyr
library(tidyverse)
options(tibble.print_max = 65, tibble.print_min = 65) # if more than 60 rows, print 60 - enough for states
# ggplot2 tibble tidyr readr purrr dplyr stringr forcats
library(scales)
library(hms) # hms, for times
library(lubridate) # lubridate, for date/times
library(vctrs)
library(btools) # has a few useful functions -- devtools::install_github("donboyd5/btools")
library(geoWeight)
dbdir <- "C:/Users/donbo/Dropbox (Personal)/"
puf_2017 <- read_csv(file = paste0(dbdir, 'puf_2017_filter.csv'))
ht(puf_2017)
puf_targ <- read_csv(file = paste0(dbdir, 'puf_2017_targets.csv'))
ht(puf_targ)
names(puf_targ)
puf_targ %>%
select(AGI_STUB, STATE, N1_nnz, MARS1_nnz, MARS2_nnz) %>%
mutate(mars12=MARS1_nnz + MARS2_nnz, mdiff=N1_nnz - mars12,
mdpct=mdiff / N1_nnz * 100,
amdpct=abs(mdpct)) %>%
arrange(mdpct)
possible_target_vars <- c("N1_nnz", "MARS1_nnz", "MARS2_nnz", "A00100_sum", "pos_AGI_nnz", "A00200_sum", "N00200_nnz", "A01000_sum",
"N01000_nnz", "A04470_sum", "N04470_nnz", "A17000_sum", "N17000_nnz", "A04800_sum", "N04800_nnz",
"A05800_sum", "N05800_nnz", "A09600_sum", "N09600_nnz",
"A00700_sum", "N00700_nnz")
feasible_target_vars <- intersect(possible_target_vars, names(puf_targ))
setdiff(possible_target_vars, names(puf_targ))
target_vars <- feasible_target_vars[1:10]
# check the targets
checkwide <- puf_targ %>%
select(AGI_STUB, STATE, all_of(target_vars)) %>%
group_by(AGI_STUB) %>%
mutate(across(all_of(target_vars), ~ . / sum(.) * 100))
checkwide %>% filter(AGI_STUB==2)
check <- puf_targ %>%
select(AGI_STUB, STATE, all_of(target_vars)) %>%
group_by(AGI_STUB) %>%
mutate(across(all_of(target_vars), ~ . / sum(.) * 100)) %>%
pivot_longer(-c(AGI_STUB, STATE), names_to="target", values_to="state_pct")
ht(check)
summary(check)
check %>%
group_by(AGI_STUB, STATE) %>%
mutate(pop_pct=state_pct[target=="N1_nnz"],
pdiff=state_pct / pop_pct * 100 - 100,
apdiff=abs(pdiff)) %>%
ungroup %>%
arrange(desc(apdiff))
# make mypuf ----
puf_targ_djb <- puf_targ %>%
select(AGI_STUB, STATE, all_of(feasible_target_vars)) %>%
select(-c(A04470_sum, N04470_nnz,
A17000_sum, N17000_nnz,
A04800_sum, N04800_nnz))
summary(puf_targ_djb) # no NA's
count(puf_targ_djb, STATE)
# compute new state targets, leaving out the current OA
# we need national sums to redistribute across states
natsums <- puf_targ_djb %>%
select(-STATE) %>%
group_by(AGI_STUB) %>%
summarise(across(everything(), sum), .groups="drop")
puf_targ_revised <- puf_targ_djb %>%
filter(STATE != "OA") %>%
pivot_longer(-c(AGI_STUB, STATE)) %>%
left_join(natsums %>% pivot_longer(-AGI_STUB, values_to="value_US"), by = c("AGI_STUB", "name")) %>%
group_by(AGI_STUB, name) %>%
mutate(target_revised=value / sum(value) * value_US) %>%
select(AGI_STUB, STATE, name, value=target_revised) %>%
pivot_wider() %>%
ungroup
summary(puf_targ_revised)
count(puf_targ_revised, STATE)
# now revise the puf, and then check wtd sums against target values
feasible_target_vars
ivars <- c(1, 4, 5, 6)
djb_target_vars <- feasible_target_vars[ivars]
getbase <- function(suffix) {
var_backend <- str_extract(target_vars, "_.*$") # gets the ending part of each variable name
base_vars <- target_vars[which(var_backend==suffix)] %>% str_remove(suffix)
names(base_vars) <- base_vars
base_vars
}
nnz_vars <-getbase("_nnz") # variables for which we want the weighted number of nonzero values
sum_vars <- getbase("_sum") # variables for which we want the weighted sum
sumneg_vars <- getbase("_sumneg") # variables for which we want the weighted sum
pufdjb <- puf_2017 %>%
mutate_at(nnz_vars,
list(nnz = ~ 1 * (. != 0))) %>%
mutate_at(sum_vars,
list(sum = ~ . * (. != 0))) %>%
mutate_at(sumneg_vars,
list(sumneg = ~ . * (. < 0)))
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.