my_examples/mecklenberg_poststrat_example.R

##
## Mecklenberg example used in the methods paper to illustrate post
## stratificaition
##
## This script generates all plots in the post stratification section.
##
## 


library( tidyverse )
library( simITS )

R = 1000
R = 10

# Make plots look right to me.
library( ggthemes )
my_t = theme_tufte() + theme( legend.position="bottom",
                              legend.direction="horizontal", legend.key.width=unit(1,"cm"),
                              panel.border = element_blank() )
theme_set( my_t )


data( "meck_subgroup")
meck = mutate( meck_subgroup, pbail = 100 * pbail )
head( meck )

meck = rename( meck, N = n.cases )

t0 = 0
tmax = max( meck$month )

pis = calculate_group_weights( "category", meck, t0, tmax )
pis


##
#### The different subgroups changing over time
##

# Looking at changing shares across time
ggplot( meck, aes( month, N, col=category ) ) +
  geom_point() + geom_line() +
  geom_smooth( se=FALSE, lty=2 )
ggsave( "my_examples/plots/meck_changing_count.pdf", width=5, height=3.75 )


##
## The proportion as outcome
##

adjdat = adjust_data( meck, "pbail", "category", pis, include_aggregate=TRUE )
head( adjdat )

# Looking at changing shares across time
aa = adjdat %>% dplyr::select( month, starts_with( "pi" )) %>%
  gather( pi_felony, pi_misdem, pi_traffic, key="category", value="proportion" )
head( aa )
ggplot( aa, aes( month, proportion, col=category ) ) +
  geom_point() + geom_line()
ggsave( "my_examples/plots/meck_changing_prop.pdf", width=5, height=3.75 )

# Modeling adjusted and not
envelope.adj = process_outcome_model( "pbail.adj", adjdat, t0=t0, R = R, summarize = TRUE, smooth=FALSE )

envelope = process_outcome_model( "pbail", adjdat, t0=t0, R = R, summarize = TRUE, smooth=FALSE )

head( adjdat )
envelope.felony = process_outcome_model( "pbail_felony", adjdat, t0=t0, R = R, summarize = TRUE, smooth=FALSE )
envelope.misdem = process_outcome_model( "pbail_misdem", adjdat, t0=t0, R = R, summarize = TRUE, smooth=FALSE )
envelope.traffic = process_outcome_model( "pbail_traffic", adjdat, t0=t0, R = R, summarize = TRUE, smooth=FALSE )

env = bind_rows( raw=envelope, 
                 adjusted=envelope.adj, 
                 felony=envelope.felony, 
                 misdem=envelope.misdem, 
                 traffic = envelope.traffic, .id="model")
head( env )
plt <- ggplot( env, aes( month, col=model ) ) +
  geom_line( aes(y=Ystar), lty=2 ) +
  geom_line( aes(y=Y)) + geom_point( aes( y=Y ), size=0.5 ) +
  #geom_line( aes(y=Ysmooth1), lty=2 ) +
  geom_vline( xintercept=t0 )

plt

plt +         facet_wrap( ~model )

# Just look at adjusted vs raw
env2 = bind_rows( raw=envelope, adjusted=envelope.adj, .id="model")

plt <- ggplot( env2, aes( month, col=model ) ) +
  geom_line( aes(y=Ystar), lty=2 ) +
  geom_line( aes(y=Y)) + geom_point( aes( y=Y ), size=0.5 ) +
  #geom_line( aes(y=Ysmooth1), lty=2 ) +
  geom_vline( xintercept=t0 )

plt
ggsave( "my_examples/plots/meck_adjusted.pdf", width=5, height=3.5 )



##
## And with counts
##

head( meck )
adjdat = adjust_data( meck, "n.bail", "category", pis, include_aggregate = TRUE, is_count = TRUE )
head( adjdat )


m2 = gather( meck, N, n.bail, key="var", value="count" )
ggplot( m2, aes( month, count, col=category ) ) +
  facet_wrap( ~ var ) +
  geom_line() + geom_point() +
  geom_smooth( data=filter( m2, month <= t0 ), method="lm", fullrange= TRUE, se = FALSE,  )

# Sanity check
#mm = meck %>% group_by( month ) %>%
#  summarise( N = sum( N ),
#             n.bail = sum( n.bail ) )
#plot( mm$n.cases, mm$N )
#summary( mm$n.cases - mm$N )

# Did the counts shift much?
qplot( n.bail, n.bail.adj, data=adjdat )
ggplot( adjdat, aes( x=month) ) +
  geom_line( aes( y=n.bail ) ) +
  geom_line( aes( y=n.bail.adj ), col="green" )


aa = adjdat %>% dplyr::select( month, starts_with("n.bail" ) ) %>%
  gather( starts_with( "n.bail" ), key="category", value="n.bail" )
head( aa )
ggplot( aa, aes( x=month, y=n.bail, col=category ) ) +
  geom_line() + geom_point()

# Modeling adjusted and not
envelope.adj = process_outcome_model( "n.bail.adj", adjdat, t0=t0, R = 100, summarize = TRUE, smooth=FALSE )

envelope = process_outcome_model( "n.bail", adjdat, t0=t0, R = 100, summarize = TRUE, smooth=FALSE )

head( adjdat )
envelope.misdem = process_outcome_model( "n.bail_misdem", adjdat, t0=t0, R = 100, summarize = TRUE, smooth=FALSE )
envelope.felony = process_outcome_model( "n.bail_felony", adjdat, t0=t0, R = 100, summarize = TRUE, smooth=FALSE )
envelope.traffic = process_outcome_model( "n.bail_traffic", adjdat, t0=t0, R = 100, summarize = TRUE, smooth=FALSE )

env = bind_rows( raw=envelope, adjusted=envelope.adj, felony=envelope.felony, 
                 misdem=envelope.misdem, traffic=envelope.traffic, .id="model")
head( env )

plt <- ggplot( env, aes( month, col=model ) ) +
  geom_line( aes(y=Ystar), lty=2 ) +
  geom_line( aes(y=Y)) + geom_point( aes( y=Y ), size=0.5 ) +
  #geom_line( aes(y=Ysmooth1), lty=2 ) +
  geom_vline( xintercept=t0 )

plt

plt +         facet_wrap( ~model ) +
  geom_line( aes(y=Ybar ), lty=1 )


head( env )
env = mutate( env, impact = Y - Ybar,
              agg = model %in% c("adjusted", "raw"))


plt <- ggplot( env, aes( month,y=impact, col=model, lty=agg ) ) +
  geom_line( lwd=1) + geom_point() +
  geom_vline( xintercept=t0 ) +
  geom_hline( yintercept= 0 )

plt
lmiratrix/simITS documentation built on Sept. 1, 2023, 9:02 p.m.