require(idealstan)
require(ggplot2)
require(dplyr)
require(tidyr)
require(rstan)

knitr::opts_chunk$set(fig.align = 'center',warning = F,message = F)

Introduction

This Rmarkdown file has a series of simulations run on the models offered in idealstan. It simulates a large amount of data and then checks to see if the residuals relative to the true parameters sum to zero, as well as for credible interval coverage. In general, the credible intervals aren't going to be exactly where they should be because the constraints used in estimating the models and identifying the signs aren't themselves included in the simulated data. I.e., one of the ideal points actually follows a positive continuous distribution instead of a random normal distribution once it is constrained, which introduces a slight bias in the recovery of the true parameters. This is an artifact of virtually every latent variable model, and is not of great concern as long as the credible intervals reach close to 90 percent coverage.

First, the standard IRT 2-PL (for this one, the absence discriminations are not a part of the model):

bin_irt_2pl_sim <- id_sim_gen(num_person=20,num_bills=200,diff_sd=1,
                              reg_discrim_sd = 1,
                              absence_discrim_sd = 1)
bin_irt_2pl_est <- id_estimate(idealdata=bin_irt_2pl_sim,
                               model_type=1,
                               restrict_ind_high = as.character(sort(bin_irt_2pl_sim@simul_data$true_person,
                                                                     decreasing=T,
                                                        index=T)$ix[1]),
                               restrict_ind_low = as.character(sort(bin_irt_2pl_sim@simul_data$true_person,
                                                                    decreasing=F, 
                                                        index=T)$ix[1]),
                               fixtype='constrained',
                           person_sd = 3,
<<<<<<< HEAD
                           discrim_miss_sd = 1,
                           discrim_reg_sd = 1,grainsize=1,
                           diff_reg_sd = 1,
                           diff_miss_sd = 1,
                           niter=500,warmup=500,
                           nchains=1,ncores=1,id_refresh=100)
=======
                           discrim_miss_sd = 1,grainsize=1,
                           discrim_reg_sd = 1,
                           diff_reg_sd = 1,
                           diff_miss_sd = 1,
                           niter=500,warmup=500,
                           nchains=1,ncores=4,id_refresh=100)
>>>>>>> develop
id_plot_rhats(bin_irt_2pl_est)
id_sim_coverage(bin_irt_2pl_est) %>% 
  bind_rows(.id='Parameter') %>% 
  ggplot(aes(y=avg,x=Parameter)) +
  stat_summary(fun.args=list(mult=1.96)) + 
  theme_minimal() 

Plot true versus observed values:

id_plot_legis(bin_irt_2pl_est,show_true = T)
id_plot_sims(bin_irt_2pl_est,type='Residuals')
id_plot_sims(bin_irt_2pl_est)
post_pred <- bin_irt_2pl_est %>% 
  id_post_pred(output="missing")
post_pred %>% 
  id_plot_ppc(bin_irt_2pl_est,ppc_pred=.)

Next, inflated 2-PL IRT (binary):

bin_irt_2pl_abs_sim <- id_sim_gen(num_person=20,num_bills=100,ordinal=F,inflate=T,
                               absence_diff_mean=0)
bin_irt_2pl_abs_est <- id_estimate(idealdata=bin_irt_2pl_abs_sim,
                               model_type=2,
                               restrict_ind_high = as.character(sort(bin_irt_2pl_abs_sim@simul_data$true_person,
                                                                     decreasing=T,
                                                        index=T)$ix[1]),
                              restrict_ind_low = as.character(sort(bin_irt_2pl_abs_sim@simul_data$true_person,
                                                                   decreasing=F,
                                                        index=T)$ix[1]),
                               fixtype='constrained')

Plot true versus observed values:

id_plot_legis(bin_irt_2pl_abs_est,show_true = T)
id_sim_coverage(bin_irt_2pl_abs_est) %>% 
  bind_rows(.id='Parameter') %>% 
  ggplot(aes(y=avg,x=Parameter)) +
  stat_summary(fun.args=list(mult=1.96)) + 
  theme_minimal() 
id_plot_sims(bin_irt_2pl_abs_est,type='Residuals')
id_plot_sims(bin_irt_2pl_abs_est)
bin_irt_2pl_abs_est %>% 
  id_post_pred %>% 
  id_plot_ppc(bin_irt_2pl_abs_est,ppc_pred=.)

Now we'll start with the ordinal models. First the uninflated ordinal model:

ord_irt_sim <- id_sim_gen(num_person=20,num_bills=100,inflate=F,
                               absence_diff_mean=0,model_type='ordinal_ratingscale')

ord_irt_est <- id_estimate(idealdata=ord_irt_sim,
                               model_type=3,
                               restrict_ind_high = as.character(sort(ord_irt_sim@simul_data$true_person,decreasing=T,
                                                        index=T)$ix[1]),
                              restrict_ind_low = as.character(sort(ord_irt_sim@simul_data$true_person,decreasing=F,
                                                        index=T)$ix[1]),
                               fixtype='constrained',id_refresh=100)
id_plot_rhats(ord_irt_est)

Plot true versus observed values:

id_plot_legis(ord_irt_est,show_true = T)
id_sim_coverage(ord_irt_est) %>% 
  bind_rows(.id='Parameter') %>% 
  ggplot(aes(y=avg,x=Parameter)) +
  stat_summary(fun.args=list(mult=1.96)) + 
  theme_minimal() 
id_plot_sims(ord_irt_est,type='Residuals')
id_plot_sims(ord_irt_est)
ord_irt_est %>% 
  id_post_pred %>% 
  id_plot_ppc(ord_irt_est,ppc_pred=.)

And we will finish with the inflated ordinal model:

ord_irt_abs_sim <- id_sim_gen(num_person=20,num_bills=100,inflate=T,
                               absence_diff_mean=0,model_type='ordinal_ratingscale')
ord_irt_abs_est <- id_estimate(idealdata=ord_irt_abs_sim,
                               model_type=4,
                               restrict_ind_high = as.character(sort(ord_irt_abs_sim@simul_data$true_person,
                                                                     decreasing=T,
                                                        index=T)$ix[1]),
                              restrict_ind_low = as.character(sort(ord_irt_abs_sim@simul_data$true_person,
                                                                   decreasing=F,
                                                        index=T)$ix[1]),
                               fixtype='constrained')
id_plot_rhats(ord_irt_abs_est)
id_plot_legis(ord_irt_abs_est,show_true = T)
id_sim_coverage(ord_irt_abs_est) %>% 
  bind_rows(.id='Parameter') %>% 
  ggplot(aes(y=avg,x=Parameter)) +
  stat_summary(fun.args=list(mult=1.96)) + 
  theme_minimal() 
id_plot_sims(ord_irt_abs_est,type='Residuals')
id_plot_sims(ord_irt_abs_est)
ord_irt_abs_est %>% 
  id_post_pred %>% 
  id_plot_ppc(ord_irt_abs_est,ppc_pred=.)

We can now try the ordinal graded response version (non-inflated):

ord_irt_grm_sim <- id_sim_gen(num_person=20,num_bills=100,inflate=F,
                               absence_diff_mean=0,model_type='ordinal_grm')
ord_irt_grm_est <- id_estimate(idealdata=ord_irt_grm_sim,
                               model_type=5,
                               restrict_ind_high = as.character(sort(ord_irt_grm_sim@simul_data$true_person,
                                                                     decreasing=T,
                                                        index=T)$ix[1]),
                              restrict_ind_low = as.character(sort(ord_irt_grm_sim@simul_data$true_person,
                                                                   decreasing=F,
                                                        index=T)$ix[1]),
                               fixtype='constrained')
id_plot_rhats(ord_irt_grm_est)
id_sim_coverage(ord_irt_grm_est) %>% 
  bind_rows(.id='Parameter') %>% 
  ggplot(aes(y=avg,x=Parameter)) +
  stat_summary(fun.args=list(mult=1.96)) + 
  theme_minimal() 
id_plot_legis(ord_irt_grm_est,show_true = T)
id_plot_sims(ord_irt_grm_est,type='Residuals')
id_plot_sims(ord_irt_grm_est)
ord_irt_grm_est %>% 
  id_post_pred %>% 
  id_plot_ppc(ord_irt_grm_est,ppc_pred=.)

And now the inflated GRM:

ord_irt_grm_abs_sim <- id_sim_gen(num_person=20,num_bills=100,inflate=T,
                               absence_diff_mean=0,model_type='ordinal_grm')
ord_irt_grm_abs_est <- id_estimate(idealdata=ord_irt_grm_abs_sim,
                               model_type=6,
                               restrict_ind_high = as.character(sort(ord_irt_grm_abs_sim@simul_data$true_person,
                                                                     decreasing=T,
                                                        index=T)$ix[1]),
                              restrict_ind_low = as.character(sort(ord_irt_grm_abs_sim@simul_data$true_person,
                                                                   decreasing=F,
                                                        index=T)$ix[1]),
                               fixtype='constrained')
id_plot_rhats(ord_irt_grm_abs_est)
id_plot_legis(ord_irt_grm_abs_est,show_true = T)
id_sim_coverage(ord_irt_grm_abs_est) %>% 
  bind_rows(.id='Parameter') %>% 
  ggplot(aes(y=avg,x=Parameter)) +
  stat_summary(fun.args=list(mult=1.96)) + 
  theme_minimal() 
id_plot_sims(ord_irt_grm_abs_est,type='Residuals')
id_plot_sims(ord_irt_grm_abs_est)
ord_irt_grm_abs_est %>% 
  id_post_pred %>% 
  id_plot_ppc(ord_irt_grm_abs_est,ppc_pred=.)

Additional Models V0.3

Models I am adding to this release include the Poisson model:

ord_irt_poisson_sim <- id_sim_gen(num_person=20,num_bills=100,inflate=F,
                               absence_diff_mean=0,model_type='poisson')
ord_irt_poisson_est <- id_estimate(idealdata=ord_irt_poisson_sim,
                               model_type=7,
                               restrict_ind_high = as.character(sort(ord_irt_poisson_sim@simul_data$true_person,
                                                                     decreasing=T,
                                                        index=T)$ix[1]),
                              restrict_ind_low = as.character(sort(ord_irt_poisson_sim@simul_data$true_person,
                                                                   decreasing=F,
                                                        index=T)$ix[1]),
                               fixtype='constrained')
id_plot_rhats(ord_irt_poisson_est)
id_plot_legis(ord_irt_poisson_est,show_true = T)
id_sim_coverage(ord_irt_poisson_est) %>% 
  bind_rows(.id='Parameter') %>% 
  ggplot(aes(y=avg,x=Parameter)) +
  stat_summary(fun.args=list(mult=1.96)) + 
  theme_minimal() 
id_plot_sims(ord_irt_poisson_est,type='Residuals')
id_plot_sims(ord_irt_poisson_est)
ord_irt_poisson_est %>% 
  id_post_pred %>% 
  id_plot_ppc(ord_irt_poisson_est,ppc_pred=.)

Now Poisson-inflated:

ord_irt_poisson_abs_sim <- id_sim_gen(num_person=20,num_bills=100,inflate=T,
                               absence_diff_mean=0,model_type='poisson')
ord_irt_poisson_abs_est <- id_estimate(idealdata=ord_irt_poisson_abs_sim,
                               model_type=8,
                               restrict_ind_high = as.character(sort(ord_irt_poisson_abs_sim@simul_data$true_person,
                                                                     decreasing=T,
                                                        index=T)$ix[1]),
                              restrict_ind_low = as.character(sort(ord_irt_poisson_abs_sim@simul_data$true_person,
                                                                   decreasing=F,
                                                        index=T)$ix[1]),
                               fixtype='constrained')
id_plot_rhats(ord_irt_poisson_abs_est)
id_plot_legis(ord_irt_poisson_abs_est,show_true = T)
id_sim_coverage(ord_irt_poisson_abs_est) %>% 
  bind_rows(.id='Parameter') %>% 
  ggplot(aes(y=avg,x=Parameter)) +
  stat_summary(fun.args=list(mult=1.96)) + 
  theme_minimal() 
id_plot_sims(ord_irt_poisson_abs_est,type='Residuals')
id_plot_sims(ord_irt_poisson_abs_est)
ord_irt_poisson_abs_est %>% 
  id_post_pred %>% 
  id_plot_ppc(ord_irt_poisson_abs_est,ppc_pred=.)
ord_irt_poisson_abs_est %>% 
  id_post_pred(output='missing') %>% 
  id_plot_ppc(ord_irt_poisson_abs_est,ppc_pred=.)

A Normally-distributed outcome with no inflation:

ord_irt_normal_sim <- id_sim_gen(num_person=20,num_bills=100,ordinal=F,inflate=F,
                               absence_diff_mean=0,model_type='normal')
ord_irt_normal_est <- id_estimate(idealdata=ord_irt_normal_sim,
                               model_type=9,
                               restrict_ind_high = as.character(sort(ord_irt_normal_sim@simul_data$true_person,
                                                                     decreasing=T,
                                                        index=T)$ix[1]),
                              restrict_ind_low = as.character(sort(ord_irt_normal_sim@simul_data$true_person,
                                                                   decreasing=F,
                                                        index=T)$ix[1]),
                               fixtype='constrained')
id_plot_rhats(ord_irt_normal_est)
id_plot_legis(ord_irt_normal_est,show_true = T)
id_sim_coverage(ord_irt_normal_est) %>% 
  bind_rows(.id='Parameter') %>% 
  ggplot(aes(y=avg,x=Parameter)) +
  stat_summary(fun.args=list(mult=1.96)) + 
  theme_minimal() 
id_plot_sims(ord_irt_normal_est,type='Residuals')
id_plot_sims(ord_irt_normal_est)
ord_irt_normal_est %>% 
  id_post_pred %>% 
  id_plot_ppc(ord_irt_normal_est,ppc_pred=.)

A Normally-distributed outcome with inflation:

ord_irt_normal_abs_sim <- id_sim_gen(num_person=20,num_bills=100,inflate=T,
                               absence_diff_mean=0,model_type='normal')
ord_irt_normal_abs_est <- id_estimate(idealdata=ord_irt_normal_abs_sim,
                               model_type=10,
                               restrict_ind_high = as.character(sort(ord_irt_normal_abs_sim@simul_data$true_person,
                                                                     decreasing=T,
                                                        index=T)$ix[1]),
                              restrict_ind_low = as.character(sort(ord_irt_normal_abs_sim@simul_data$true_person,
                                                                   decreasing=F,
                                                        index=T)$ix[1]),
                               fixtype='constrained')
id_plot_rhats(ord_irt_normal_abs_est)
id_plot_legis(ord_irt_normal_abs_est,show_true = T)
id_sim_coverage(ord_irt_normal_abs_est) %>% 
  bind_rows(.id='Parameter') %>% 
  ggplot(aes(y=avg,x=Parameter)) +
  stat_summary(fun.args=list(mult=1.96)) + 
  theme_minimal() 
id_plot_sims(ord_irt_normal_abs_est,type='Residuals')
id_plot_sims(ord_irt_normal_abs_est)
ord_irt_normal_abs_est %>% 
  id_post_pred %>% 
  id_plot_ppc(ord_irt_normal_abs_est,ppc_pred=.)
ord_irt_normal_abs_est %>% 
  id_post_pred(output='missing') %>% 
  id_plot_ppc(ord_irt_normal_abs_est,ppc_pred=.)

A Lognormal model with no inflation:

ord_irt_lognormal_sim <- id_sim_gen(num_person=20,num_bills=100,ordinal=F,inflate=F,
                               absence_diff_mean=0,model_type='lognormal')
ord_irt_lognormal_est <- id_estimate(idealdata=ord_irt_lognormal_sim,
                               model_type=11,
                               restrict_ind_high = as.character(sort(ord_irt_lognormal_sim@simul_data$true_person,
                                                                     decreasing=T,
                                                        index=T)$ix[1]),
                              restrict_ind_low = as.character(sort(ord_irt_lognormal_sim@simul_data$true_person,
                                                                   decreasing=F,
                                                        index=T)$ix[1]),
                               fixtype='constrained')
id_plot_rhats(ord_irt_lognormal_est)
id_plot_legis(ord_irt_lognormal_est,show_true = T)
id_sim_coverage(ord_irt_lognormal_est) %>% 
  bind_rows(.id='Parameter') %>% 
  ggplot(aes(y=avg,x=Parameter)) +
  stat_summary(fun.args=list(mult=1.96)) + 
  theme_minimal() 
id_plot_sims(ord_irt_lognormal_est,type='Residuals')
id_plot_sims(ord_irt_lognormal_est)

And last but not least, a lognormal model with inflation:

ord_irt_lognormal_abs_sim <- id_sim_gen(num_person=20,num_bills=100,ordinal=F,inflate=F,
                               absence_diff_mean=0,model_type='lognormal')
ord_irt_lognormal_abs_est <- id_estimate(idealdata=ord_irt_lognormal_abs_sim,
                               model_type=12,
                               restrict_ind_high = as.character(sort(ord_irt_lognormal_abs_sim@simul_data$true_person,
                                                                     decreasing=T,
                                                        index=T)$ix[1]),
                              restrict_ind_low = as.character(sort(ord_irt_lognormal_abs_sim@simul_data$true_person,
                                                                   decreasing=F,
                                                        index=T)$ix[1]),
                               fixtype='constrained')
id_plot_rhats(ord_irt_lognormal_abs_est)
id_plot_legis(ord_irt_lognormal_abs_est,show_true = T)
id_sim_coverage(ord_irt_lognormal_abs_est) %>% 
  bind_rows(.id='Parameter') %>% 
  ggplot(aes(y=avg,x=Parameter)) +
  stat_summary(fun.args=list(mult=1.96)) + 
  theme_minimal() 
id_plot_sims(ord_irt_lognormal_abs_est,type='Residuals')
id_plot_sims(ord_irt_lognormal_abs_est)

Time: Random Walk

We won't test all the possible time auto-correlation settings, only a subset of them. I use an inflated binary model as it is a very common one.

While these models are identified, it appears that sometimes given the amount of data, they are only weakly identified in the posterior and Rhat values higher than 1.1 are possible (but are not evidence of multi-modality).

First random walks:

bin_time_irt_2pl_abs_sim <- id_sim_gen(num_person=20,num_bills=125,ordinal=F,inflate=T,
                               absence_diff_mean=0,time_process = 'random',time_points=25,ideal_pts_sd = 1,
diff_sd = 1,
                               absence_discrim_sd = 1,
                               reg_discrim_sd = 1)
bin_time_irt_2pl_abs_est <- id_estimate(idealdata=bin_time_irt_2pl_abs_sim,
                               model_type=2,nchains = 1,ncores = 3,
                               vary_ideal_pts = 'random_walk',
                               save_files = "~/corona_private/data/indices/",
                               restrict_ind_high = as.character(sort(bin_time_irt_2pl_abs_sim@simul_data$drift,
                                                                     decreasing=TRUE,
                                                        index=T)$ix[1]),
                              restrict_ind_low = as.character(sort(bin_time_irt_2pl_abs_sim@simul_data$drift,
                                                                   decreasing=F,
                                                        index=T)$ix[1]),
                               fixtype='constrained',id_refresh = 100)
id_plot_rhats(bin_time_irt_2pl_abs_est)
id_sim_coverage(bin_time_irt_2pl_abs_est) %>% 
  bind_rows(.id='Parameter') %>% 
  ggplot(aes(y=avg,x=Parameter)) +
  stat_summary(fun.args=list(mult=1.96)) + 
  theme_minimal() 

Let's do a time-varying ideal point plot with true point overlay:

id_plot_legis_dyn(bin_time_irt_2pl_abs_est,plot_sim = T) + facet_wrap(~person_id,scales="free_y")
id_plot_sims(bin_time_irt_2pl_abs_est,type='Residuals')
id_plot_sims(bin_time_irt_2pl_abs_est)

Time: AR(1)

And autocorrelation:

bin_time_ar_irt_2pl_abs_sim <- id_sim_gen(num_person=10,num_bills=100,inflate=T,
                               absence_diff_mean=0,time_process = 'AR',time_points=20,time_sd = .2,diff_sd = 1,
                               absence_discrim_sd = 1,
                               reg_discrim_sd = 1)
bin_time_ar_irt_2pl_abs_est <- id_estimate(idealdata=bin_time_ar_irt_2pl_abs_sim,
                               model_type=2,
                               vary_ideal_pts = 'AR1',
                               nchains=3,ncores = 3,
                               restrict_ind_high = as.character(sort(bin_time_ar_irt_2pl_abs_sim@simul_data$drift,
                                                                     decreasing=TRUE,
                                                        index=TRUE)$ix[1]),
                              restrict_ind_low = as.character(sort(bin_time_ar_irt_2pl_abs_sim@simul_data$drift,
                                                                   decreasing=F,
                                                        index=T)$ix[1]),restrict_sd_high = 1,
                               fixtype='constrained',id_refresh = 100)
id_plot_rhats(bin_time_ar_irt_2pl_abs_est)
id_sim_coverage(bin_time_ar_irt_2pl_abs_est) %>% 
  bind_rows(.id='Parameter') %>% 
  ggplot(aes(y=avg,x=Parameter)) +
  stat_summary(fun.args=list(mult=1.96)) + 
  theme_minimal() 

Let's do a time-varying ideal point plot with true point overlay:

id_plot_legis_dyn(bin_time_ar_irt_2pl_abs_est,plot_sim = T) +facet_wrap(~person_id,scales="free_y")
id_plot_sims(bin_time_ar_irt_2pl_abs_est,type='Residuals')
id_plot_sims(bin_time_ar_irt_2pl_abs_est)

Time: Gaussian Process

The Gaussian process differs in that is a non-parametric approach to time-series modeling. It can fit very flexible curves and also works on irregularly-spaced time series.

time_gp_sim <- id_sim_gen(num_person=10,num_bills=250,inflate=T,
                               absence_diff_mean=0,
                          time_process = 'GP',
                          time_points=25,
                          time_sd = .01,
                          diff_sd = 1,
                               absence_discrim_sd = 1,
                               reg_discrim_sd = 1)

time_gp_est <- id_estimate(idealdata=time_gp_sim,
                               model_type=2,
                           use_vb=F,id_refresh=100,
                           nchains=3,niters=500,
                               vary_ideal_pts = 'GP',
                          fix_high = sort(time_gp_sim@simul_data$true_person_mean,
                                                                     decreasing=TRUE)[1],
                          fix_low=sort(time_gp_sim@simul_data$true_person_mean)[1],
restrict_ind_high = as.character(sort(time_gp_sim@simul_data$true_person_mean,
                                                                     decreasing=TRUE,
                                                        index=TRUE)$ix[1]),
                              restrict_ind_low = as.character(sort(time_gp_sim@simul_data$true_person_mean,
                                                                   decreasing=F,
                                                        index=T)$ix[1]),
              gp_sd_par=1e-05,
              gp_m_sd_par = time_gp_sim@simul_data$drift,

                               fixtype='constrained')
id_plot_rhats(time_gp_est)
id_sim_coverage(time_gp_est) %>% 
  bind_rows(.id='Parameter') %>% 
  ggplot(aes(y=avg,x=Parameter)) +
  stat_summary(fun.args=list(mult=1.96)) + 
  theme_minimal() 

Let's do a time-varying ideal point plot with true point overlay:

id_plot_legis_dyn(time_gp_est,plot_sim = T) +facet_wrap(~person_id,scales="free")
id_plot_sims(time_gp_est,type='Residuals')
id_plot_sims(time_gp_est)

Latent Space Model

Another new addition is the latent space model, which is only a slight modification on the IRT model.

Non-inflated:

latent_noninfl_sim <- id_sim_gen(num_person=20,num_bills=125,ordinal=F,
                                       inflate=F,
                                       model_type='binary',
                                       latent_space=T,
                               absence_diff_mean=0,
                               ideal_pts_sd = 1,
                               diff_sd = 1,
                               absence_discrim_sd = 1,
                               reg_discrim_sd = 1)
latent_noninfl_est <- id_estimate(idealdata=latent_noninfl_sim,
                               model_type=13,
                               restrict_ind_high = as.character(sort(latent_noninfl_sim@simul_data$true_person,
                                                                     decreasing=TRUE,
                                                        index=T)$ix[1]),
                              restrict_ind_low = as.character(sort(latent_noninfl_sim@simul_data$true_person,
                                                                   decreasing=F,
                                                        index=T)$ix[1]),
                               fixtype='constrained')
id_plot_rhats(latent_noninfl_est)
id_plot_legis(latent_noninfl_est,show_true = T)
id_sim_coverage(latent_noninfl_est) %>% 
  bind_rows(.id='Parameter') %>% 
  ggplot(aes(y=avg,x=Parameter)) +
  stat_summary(fun.args=list(mult=1.96)) + 
  theme_minimal() 

Inflated:

# you can't have too few people relative to bills
latent_infl_sim <- id_sim_gen(num_person=20,num_bills=125,ordinal=F,
                                       inflate=T,
                                       model_type='binary',
                                       latent_space=T,
                               absence_diff_mean=0,
                               ideal_pts_sd = 1,
                               diff_sd = 1,
                               absence_discrim_sd = 1,
                               reg_discrim_sd = 1)
latent_infl_est <- id_estimate(idealdata=latent_infl_sim,
                               model_type=14,
                               restrict_ind_high = as.character(sort(latent_infl_sim@simul_data$true_person,
                                                                     decreasing=TRUE,
                                                        index=T)$ix[1]),
                              restrict_ind_low = as.character(sort(latent_infl_sim@simul_data$true_person,
                                                                   decreasing=F,
                                                        index=T)$ix[1]),
                               fixtype='constrained',
                              diff_reg_sd = 2,
                             diff_miss_sd=2)
id_plot_rhats(latent_infl_est)
id_plot_legis(latent_infl_est,show_true = T)
id_sim_coverage(latent_infl_est) %>% 
  bind_rows(.id='Parameter') %>% 
  ggplot(aes(y=avg,x=Parameter)) +
  stat_summary(fun.args=list(mult=1.96)) + 
  theme_minimal() 

Test Combined Distributions

New in v1.0 are combined distribution modeling. This section simulates these using separate code due to the complexity of combining distributions together. I simulate here data from Normal, ordinal-rating scale, Poisson and Bernoulli distributions, 10 items from each distribution.

num_bills <- 200
time <- 20
num_per_dist <- (200*20)/4
num_person <- 20

  absence_diff <- rnorm(n=num_bills)
  absence_discrim <- rnorm(n=num_bills)
  # person ideal points common to both types of models (absence and regular)


  ar_adj <- rep(1,num_person)

  # need to pull out some functions to generate data

  .gen_ts_data <- function(t,adj_in,alpha_int,sigma,init_sides) {
  current_val <- new.env()
  current_val$t1 <- 0

  out_vec <- lapply(1:t,function(t_1) {

    if(t_1==1) {
      t_11 <- alpha_int
      current_val$t1 <- t_11
      return(data_frame(t_11))
    } else {
      if(adj_in==1) {
        t_11 <- adj_in*current_val$t1 + rnorm(n=1,sd=sigma)
      } else {
        t_11 <- alpha_int + adj_in*current_val$t1 + rnorm(n=1,sd=sigma)
      }

    }
    current_val$t1 <- t_11
    return(data_frame(t_11))
  })  %>% bind_rows
  return(out_vec)
}

  drift <- rnorm(n=num_person,mean=0,sd=1)

  time_points <- rep(1:time,each=num_bills/time)

  ideal_pts <- lapply(1:num_person, function(i) {
        this_person <- .gen_ts_data(t=time,
                                    adj_in=ar_adj[i],
                                    alpha_int=drift[i],
                                    sigma=.1,
                                    init_sides=ideal_t1[i])
        return(this_person)
      }) %>% bind_cols %>% as.matrix
      ideal_pts <- t(ideal_pts)


  # First generate prob of absences

  person_points <- rep(1:num_person,times=num_bills)
  bill_points <- rep(1:num_bills,each=num_person)
  time_points <- rep(1:time,each=num_bills/time)
  time_points <- rep(time_points,each=num_person)

    pr_absence <- sapply(1:length(person_points),function(n) {
      ideal_pts[person_points[n],time_points[n]]*absence_discrim[bill_points[n]] - absence_diff[bill_points[n]]
    }) %>% plogis()

  reg_diff <- rnorm(n=num_bills,mean=0,sd=1)
  reg_discrim <- rnorm(n=num_bills,mean=0,sd=1)

    pr_vote <- sapply(1:length(person_points),function(n) {
      ideal_pts[person_points[n],time_points[n]]*reg_discrim[bill_points[n]] - reg_diff[bill_points[n]]
    }) %>% plogis()


 bill_points_all <- bill_points
 person_points_all <- person_points

 # do each model in turn

 bill_points <- bill_points_all[1:num_per_dist]
 person_points <- person_points_all[1:num_per_dist]

 bernoulli <- as.numeric(pr_vote[1:num_per_dist]>runif(length(person_points)))

 bernoulli <- ifelse(pr_absence[1:num_per_dist]<(runif(length(person_points))),bernoulli,NA)

  bill_points <- bill_points_all[(num_per_dist+1):(num_per_dist*2)]
 person_points <- person_points_all[(num_per_dist+1):(num_per_dist*2)]

 ordinal_outcomes <- 4

 cutpoints <- quantile(pr_vote[(num_per_dist+1):(num_per_dist*2)],probs=seq(0,1,length.out = ordinal_outcomes+1))
    cutpoints <- cutpoints[2:(length(cutpoints)-1)]

    #Generate outcomes by personlator

    cuts <- sapply(cutpoints,function(y) {
      qlogis(pr_vote[(num_per_dist+1):(num_per_dist*2)]) - y
    },simplify='array')

 ord_rat <- sapply(1:nrow(cuts), function(i) {
      this_cut <- cuts[i,]

      pr_bottom <- 1 - plogis(this_cut[1])

      mid_prs <- sapply(1:(length(this_cut)-1), function(c) {
        plogis(this_cut[c]) - plogis(this_cut[c+1])
      })

      pr_top <- plogis(this_cut[length(this_cut)])

      return(sample(1:(length(this_cut)+1),size=1,prob=c(pr_bottom,mid_prs,pr_top)))
    })

 ord_rat <- ifelse(pr_absence[(num_per_dist+1):(num_per_dist*2)]<(runif(length(person_points))),ord_rat,NA)

 # now poisson

   bill_points <- bill_points_all[(2*num_per_dist+1):(num_per_dist*3)]
 person_points <- person_points_all[(2*num_per_dist+1):(num_per_dist*3)]

  poisson <- rpois(n = length(pr_vote[(2*num_per_dist+1):(num_per_dist*3)]),lambda = exp(pr_vote[(2*num_per_dist+1):(num_per_dist*3)]))

  poisson <- ifelse(pr_absence[(2*num_per_dist+1):(num_per_dist*3)]<(runif(num_per_dist)),poisson,NA)

  # Normal distribution

     bill_points <- bill_points_all[(3*num_per_dist+1):(num_per_dist*4)]
  person_points <- person_points_all[(3*num_per_dist+1):(num_per_dist*4)]

  normal <- rnorm(n = length(pr_vote[(3*num_per_dist+1):(num_per_dist*4)]),mean = pr_vote[(3*num_per_dist+1):(num_per_dist*4)],sd = .5)
  normal <- ifelse(pr_absence[(3*num_per_dist+1):(num_per_dist*4)]<(runif(num_per_dist)),normal,NA)

  # now make data and feed it to id_make

  to_id_make <- tibble(outcome_disc=c(bernoulli,ord_rat,poisson,1:num_per_dist),
                       outcome_cont=c(1:(3*num_per_dist),normal),
                       person_id=person_points_all,
                       item_id=bill_points_all,
                       model_id=c(rep(2,num_per_dist),
                                  rep(4,num_per_dist),
                                  rep(8,num_per_dist),
                                  rep(10,num_per_dist)),
                       time_id=time_points) %>% 
    mutate(ordered_id=4)

  combine_data <- id_make(to_id_make)

  combine_data_est <- id_estimate(combine_data,nchains=4,ncores = 4,fixtype = "prefix",restrict_ind_high = as.character(sort(ideal_pts[,1],
                                                                     decreasing=T,
                                                        index=T)$ix[1]),
                               restrict_ind_low = as.character(sort(ideal_pts[,1],
                                                                    decreasing=F, 
                                                        index=T)$ix[1]),
                               vary_ideal_pts = "random_walk",
                               fix_high = sort(ideal_pts[,1],decreasing=T)[1],
                               fix_low = sort(ideal_pts[,1],decreasing=F)[1],
                               id_refresh = 100)

  # need to add back in true values

  combine_data_est@score_data@simulation <- T
  combine_data_est@score_data@simul_data <- list(num_person=num_person,
                                                 num_bills=num_bills,
                                                 ideal_pts_sd=1,
                                                 true_person=ideal_pts)

  id_plot_legis_dyn(combine_data_est,plot_sim = T) + facet_wrap(~person_id)


saudiwin/idealstan documentation built on Sept. 2, 2023, 1:29 a.m.