require(idealstan) require(ggplot2) require(dplyr) require(tidyr) require(rstan) knitr::opts_chunk$set(fig.align = 'center',warning = F,message = F)
This Rmarkdown file has a series of simulations run on the models offered in idealstan
. It simulates a large amount of data and then checks to see if the residuals relative to the true parameters sum to zero, as well as for credible interval coverage. In general, the credible intervals aren't going to be exactly where they should be because the constraints used in estimating the models and identifying the signs aren't themselves included in the simulated data. I.e., one of the ideal points actually follows a positive continuous distribution instead of a random normal distribution once it is prefix, which introduces a slight bias in the recovery of the true parameters. This is an artifact of virtually every latent variable model, and is not of great concern as long as the credible intervals reach close to 90 percent coverage.
First, the standard IRT 2-PL (for this one, the absence discriminations are not a part of the model):
bin_irt_2pl_sim <- id_sim_gen(num_person=100,num_bills=5,ordinal=F,inflate=F, diff_sd=1, reg_discrim_sd = 1, absence_discrim_sd = 1) bin_irt_2pl_est <- id_estimate(idealdata=bin_irt_2pl_sim, model_type=1, restrict_ind_high = as.character(sort(bin_irt_2pl_sim@simul_data$true_person, decreasing=T, index=T)$ix[1]), restrict_ind_low = as.character(sort(bin_irt_2pl_sim@simul_data$true_person, decreasing=F, index=T)$ix[1]), fixtype='prefix', person_sd = 3, nchains=4,ncores=4) id_plot_rhats(bin_irt_2pl_est)
id_sim_coverage(bin_irt_2pl_est) %>% bind_rows(.id='Parameter') %>% ggplot(aes(y=avg,x=Parameter)) + stat_summary(fun.args=list(mult=1.96)) + theme_minimal()
Plot true versus observed values:
id_plot_legis(bin_irt_2pl_est,show_true = T)
id_plot_sims(bin_irt_2pl_est,type='Residuals')
id_plot_sims(bin_irt_2pl_est)
bin_irt_2pl_est %>% id_post_pred %>% id_plot_ppc(bin_irt_2pl_est,ppc_pred=.)
Next, inflated 2-PL IRT (binary):
bin_irt_2pl_abs_sim <- id_sim_gen(num_person=20,num_bills=100,ordinal=F,inflate=T, absence_diff_mean=0) bin_irt_2pl_abs_est <- id_estimate(idealdata=bin_irt_2pl_abs_sim, model_type=2, restrict_ind_high = as.character(sort(bin_irt_2pl_abs_sim@simul_data$true_person, decreasing=T, index=T)$ix[1]), restrict_ind_low = as.character(sort(bin_irt_2pl_abs_sim@simul_data$true_person, decreasing=F, index=T)$ix[1]), fixtype='prefix') id_plot_rhats(bin_irt_2pl_abs_est)
Plot true versus observed values:
id_plot_legis(bin_irt_2pl_abs_est,show_true = T)
id_sim_coverage(bin_irt_2pl_abs_est) %>% bind_rows(.id='Parameter') %>% ggplot(aes(y=avg,x=Parameter)) + stat_summary(fun.args=list(mult=1.96)) + theme_minimal()
id_plot_sims(bin_irt_2pl_abs_est,type='Residuals')
id_plot_sims(bin_irt_2pl_abs_est)
bin_irt_2pl_abs_est %>% id_post_pred %>% id_plot_ppc(bin_irt_2pl_abs_est,ppc_pred=.)
Now we'll start with the ordinal models. First the uninflated ordinal model:
ord_irt_sim <- id_sim_gen(num_person=20,num_bills=100,inflate=F, absence_diff_mean=0,model_type='ordinal_ratingscale') ord_irt_est <- id_estimate(idealdata=ord_irt_sim, model_type=3, restrict_ind_high = as.character(sort(ord_irt_sim@simul_data$true_person,decreasing=T, index=T)$ix[1]), restrict_ind_low = as.character(sort(ord_irt_sim@simul_data$true_person,decreasing=F, index=T)$ix[1]), fixtype='prefix') id_plot_rhats(ord_irt_est)
Plot true versus observed values:
id_plot_legis(ord_irt_est,show_true = T)
id_sim_coverage(ord_irt_est) %>% bind_rows(.id='Parameter') %>% ggplot(aes(y=avg,x=Parameter)) + stat_summary(fun.args=list(mult=1.96)) + theme_minimal()
id_plot_sims(ord_irt_est,type='Residuals')
id_plot_sims(ord_irt_est)
ord_irt_est %>% id_post_pred %>% id_plot_ppc(ord_irt_est,ppc_pred=.)
And we will finish with the inflated ordinal model:
ord_irt_abs_sim <- id_sim_gen(num_person=20,num_bills=100,inflate=T, absence_diff_mean=0,model_type='ordinal_ratingscale') ord_irt_abs_est <- id_estimate(idealdata=ord_irt_abs_sim, model_type=4, restrict_ind_high = as.character(sort(ord_irt_abs_sim@simul_data$true_person, decreasing=T, index=T)$ix[1]), restrict_ind_low = as.character(sort(ord_irt_abs_sim@simul_data$true_person, decreasing=F, index=T)$ix[1]), fixtype='prefix') id_plot_rhats(ord_irt_abs_est)
id_plot_legis(ord_irt_abs_est,show_true = T)
id_sim_coverage(ord_irt_abs_est) %>% bind_rows(.id='Parameter') %>% ggplot(aes(y=avg,x=Parameter)) + stat_summary(fun.args=list(mult=1.96)) + theme_minimal()
id_plot_sims(ord_irt_abs_est,type='Residuals')
id_plot_sims(ord_irt_abs_est)
ord_irt_abs_est %>% id_post_pred %>% id_plot_ppc(ord_irt_abs_est,ppc_pred=.)
We can now try the ordinal graded response version (non-inflated):
ord_irt_grm_sim <- id_sim_gen(num_person=20,num_bills=100,inflate=F, absence_diff_mean=0,model_type='ordinal_grm') ord_irt_grm_est <- id_estimate(idealdata=ord_irt_grm_sim, model_type=5, restrict_ind_high = as.character(sort(ord_irt_grm_sim@simul_data$true_person, decreasing=T, index=T)$ix[1]), restrict_ind_low = as.character(sort(ord_irt_grm_sim@simul_data$true_person, decreasing=F, index=T)$ix[1]), fixtype='prefix') id_plot_rhats(ord_irt_grm_est)
id_sim_coverage(ord_irt_grm_est) %>% bind_rows(.id='Parameter') %>% ggplot(aes(y=avg,x=Parameter)) + stat_summary(fun.args=list(mult=1.96)) + theme_minimal()
id_plot_legis(ord_irt_grm_est,show_true = T)
id_plot_sims(ord_irt_grm_est,type='Residuals')
id_plot_sims(ord_irt_grm_est)
ord_irt_grm_est %>% id_post_pred %>% id_plot_ppc(ord_irt_grm_est,ppc_pred=.)
And now the inflated GRM:
ord_irt_grm_abs_sim <- id_sim_gen(num_person=20,num_bills=100,inflate=T, absence_diff_mean=0,model_type='ordinal_grm') ord_irt_grm_abs_est <- id_estimate(idealdata=ord_irt_grm_abs_sim, model_type=6, restrict_ind_high = as.character(sort(ord_irt_grm_abs_sim@simul_data$true_person, decreasing=T, index=T)$ix[1]), restrict_ind_low = as.character(sort(ord_irt_grm_abs_sim@simul_data$true_person, decreasing=F, index=T)$ix[1]), fixtype='prefix') id_plot_rhats(ord_irt_grm_abs_est)
id_plot_legis(ord_irt_grm_abs_est,show_true = T)
id_sim_coverage(ord_irt_grm_abs_est) %>% bind_rows(.id='Parameter') %>% ggplot(aes(y=avg,x=Parameter)) + stat_summary(fun.args=list(mult=1.96)) + theme_minimal()
id_plot_sims(ord_irt_grm_abs_est,type='Residuals')
id_plot_sims(ord_irt_grm_abs_est)
ord_irt_grm_abs_est %>% id_post_pred %>% id_plot_ppc(ord_irt_grm_abs_est,ppc_pred=.)
Models I am adding to this release include the Poisson model:
ord_irt_poisson_sim <- id_sim_gen(num_person=20,num_bills=100,inflate=F, absence_diff_mean=0,model_type='poisson') ord_irt_poisson_est <- id_estimate(idealdata=ord_irt_poisson_sim, model_type=7, restrict_ind_high = as.character(sort(ord_irt_poisson_sim@simul_data$true_person, decreasing=T, index=T)$ix[1]), restrict_ind_low = as.character(sort(ord_irt_poisson_sim@simul_data$true_person, decreasing=F, index=T)$ix[1]), fixtype='prefix') id_plot_rhats(ord_irt_poisson_est)
id_plot_legis(ord_irt_poisson_est,show_true = T)
id_sim_coverage(ord_irt_poisson_est) %>% bind_rows(.id='Parameter') %>% ggplot(aes(y=avg,x=Parameter)) + stat_summary(fun.args=list(mult=1.96)) + theme_minimal()
id_plot_sims(ord_irt_poisson_est,type='Residuals')
id_plot_sims(ord_irt_poisson_est)
ord_irt_poisson_est %>% id_post_pred %>% id_plot_ppc(ord_irt_poisson_est,ppc_pred=.)
Now Poisson-inflated:
ord_irt_poisson_abs_sim <- id_sim_gen(num_person=20,num_bills=100,inflate=T, absence_diff_mean=0,model_type='poisson') ord_irt_poisson_abs_est <- id_estimate(idealdata=ord_irt_poisson_abs_sim, model_type=8, restrict_ind_high = as.character(sort(ord_irt_poisson_abs_sim@simul_data$true_person, decreasing=T, index=T)$ix[1]), restrict_ind_low = as.character(sort(ord_irt_poisson_abs_sim@simul_data$true_person, decreasing=F, index=T)$ix[1]), fixtype='prefix') id_plot_rhats(ord_irt_poisson_abs_est)
id_plot_legis(ord_irt_poisson_abs_est,show_true = T)
id_sim_coverage(ord_irt_poisson_abs_est) %>% bind_rows(.id='Parameter') %>% ggplot(aes(y=avg,x=Parameter)) + stat_summary(fun.args=list(mult=1.96)) + theme_minimal()
id_plot_sims(ord_irt_poisson_abs_est,type='Residuals')
id_plot_sims(ord_irt_poisson_abs_est)
ord_irt_poisson_abs_est %>% id_post_pred %>% id_plot_ppc(ord_irt_poisson_abs_est,ppc_pred=.)
ord_irt_poisson_abs_est %>% id_post_pred(output='missing') %>% id_plot_ppc(ord_irt_poisson_abs_est,ppc_pred=.)
A Normally-distributed outcome with no inflation:
ord_irt_normal_sim <- id_sim_gen(num_person=20,num_bills=100,ordinal=F,inflate=F, absence_diff_mean=0,model_type='normal') ord_irt_normal_est <- id_estimate(idealdata=ord_irt_normal_sim, model_type=9, restrict_ind_high = as.character(sort(ord_irt_normal_sim@simul_data$true_person, decreasing=T, index=T)$ix[1]), restrict_ind_low = as.character(sort(ord_irt_normal_sim@simul_data$true_person, decreasing=F, index=T)$ix[1]), fixtype='prefix') id_plot_rhats(ord_irt_normal_est)
id_plot_legis(ord_irt_normal_est,show_true = T)
id_sim_coverage(ord_irt_normal_est) %>% bind_rows(.id='Parameter') %>% ggplot(aes(y=avg,x=Parameter)) + stat_summary(fun.args=list(mult=1.96)) + theme_minimal()
id_plot_sims(ord_irt_normal_est,type='Residuals')
id_plot_sims(ord_irt_normal_est)
ord_irt_normal_est %>% id_post_pred %>% id_plot_ppc(ord_irt_normal_est,ppc_pred=.)
A Normally-distributed outcome with inflation:
ord_irt_normal_abs_sim <- id_sim_gen(num_person=20,num_bills=100,inflate=T, absence_diff_mean=0,model_type='normal') ord_irt_normal_abs_est <- id_estimate(idealdata=ord_irt_normal_abs_sim, model_type=10, restrict_ind_high = as.character(sort(ord_irt_normal_abs_sim@simul_data$true_person, decreasing=T, index=T)$ix[1]), restrict_ind_low = as.character(sort(ord_irt_normal_abs_sim@simul_data$true_person, decreasing=F, index=T)$ix[1]), fixtype='prefix') id_plot_rhats(ord_irt_normal_abs_est)
id_plot_legis(ord_irt_normal_abs_est,show_true = T)
id_sim_coverage(ord_irt_normal_abs_est) %>% bind_rows(.id='Parameter') %>% ggplot(aes(y=avg,x=Parameter)) + stat_summary(fun.args=list(mult=1.96)) + theme_minimal()
id_plot_sims(ord_irt_normal_abs_est,type='Residuals')
id_plot_sims(ord_irt_normal_abs_est)
ord_irt_normal_abs_est %>% id_post_pred %>% id_plot_ppc(ord_irt_normal_abs_est,ppc_pred=.)
ord_irt_normal_abs_est %>% id_post_pred(output='missing') %>% id_plot_ppc(ord_irt_normal_abs_est,ppc_pred=.)
A Lognormal model with no inflation:
ord_irt_lognormal_sim <- id_sim_gen(num_person=20,num_bills=100,ordinal=F,inflate=F, absence_diff_mean=0,model_type='lognormal') ord_irt_lognormal_est <- id_estimate(idealdata=ord_irt_lognormal_sim, model_type=11, restrict_ind_high = as.character(sort(ord_irt_lognormal_sim@simul_data$true_person, decreasing=T, index=T)$ix[1]), restrict_ind_low = as.character(sort(ord_irt_lognormal_sim@simul_data$true_person, decreasing=F, index=T)$ix[1]), fixtype='prefix') id_plot_rhats(ord_irt_lognormal_est)
id_plot_legis(ord_irt_lognormal_est,show_true = T)
id_sim_coverage(ord_irt_lognormal_est) %>% bind_rows(.id='Parameter') %>% ggplot(aes(y=avg,x=Parameter)) + stat_summary(fun.args=list(mult=1.96)) + theme_minimal()
id_plot_sims(ord_irt_lognormal_est,type='Residuals')
id_plot_sims(ord_irt_lognormal_est)
And last but not least, a lognormal model with inflation:
ord_irt_lognormal_abs_sim <- id_sim_gen(num_person=20,num_bills=100,ordinal=F,inflate=F, absence_diff_mean=0,model_type='lognormal') ord_irt_lognormal_abs_est <- id_estimate(idealdata=ord_irt_lognormal_abs_sim, model_type=12, restrict_ind_high = as.character(sort(ord_irt_lognormal_abs_sim@simul_data$true_person, decreasing=T, index=T)$ix[1]), restrict_ind_low = as.character(sort(ord_irt_lognormal_abs_sim@simul_data$true_person, decreasing=F, index=T)$ix[1]), fixtype='prefix') id_plot_rhats(ord_irt_lognormal_abs_est)
id_plot_legis(ord_irt_lognormal_abs_est,show_true = T)
id_sim_coverage(ord_irt_lognormal_abs_est) %>% bind_rows(.id='Parameter') %>% ggplot(aes(y=avg,x=Parameter)) + stat_summary(fun.args=list(mult=1.96)) + theme_minimal()
id_plot_sims(ord_irt_lognormal_abs_est,type='Residuals')
id_plot_sims(ord_irt_lognormal_abs_est)
We won't test all the possible time auto-correlation settings, only a subset of them. I use an inflated binary model as it is a very common one.
While these models are identified, it appears that sometimes given the amount of data, they are only weakly identified in the posterior and Rhat values higher than 1.1 are possible (but are not evidence of multi-modality).
First random walks:
bin_time_irt_2pl_abs_sim <- id_sim_gen(num_person=20,num_bills=30, ordinal=F,inflate=T, absence_diff_mean=0, time_process = 'random', time_points=10, ideal_pts_sd = 1, diff_sd = 1, absence_discrim_sd = 1, reg_discrim_sd = 1) bin_time_irt_2pl_abs_est <- id_estimate(idealdata=bin_time_irt_2pl_abs_sim, model_type=2,nchains=2, id_refresh=100,time_var = 10, niters = 1000,warmup = 500, keep_param=list(person_vary=T,person_int=F,item=TRUE, extra=T), vary_ideal_pts = 'random_walk', restrict_var = F, restrict_ind_high = as.character(sort(bin_time_irt_2pl_abs_sim@simul_data$drift, decreasing=TRUE, index=T)$ix[1]), restrict_ind_low = as.character(sort(bin_time_irt_2pl_abs_sim@simul_data$drift, decreasing=F, index=T)$ix[1]), fixtype='prefix') id_plot_rhats(bin_time_irt_2pl_abs_est)
id_sim_coverage(bin_time_irt_2pl_abs_est) %>% bind_rows(.id='Parameter') %>% ggplot(aes(y=avg,x=Parameter)) + stat_summary(fun.args=list(mult=1.96)) + theme_minimal()
Let's do a time-varying ideal point plot with true point overlay:
id_plot_legis_dyn(bin_time_irt_2pl_abs_est,plot_sim = T) + facet_wrap(~person_id,scales="free_y")
id_plot_sims(bin_time_irt_2pl_abs_est,type='Residuals')
id_plot_sims(bin_time_irt_2pl_abs_est)
And autocorrelation:
bin_time_ar_irt_2pl_abs_sim <- id_sim_gen(num_person=20,num_bills=200,inflate=T, absence_diff_mean=0,time_process = 'AR',time_points=20,time_sd = .5,diff_sd = 1, absence_discrim_sd = 1, reg_discrim_sd = 1) bin_time_ar_irt_2pl_abs_sim@score_matrix$group_id <- forcats::fct_collapse(bin_time_ar_irt_2pl_abs_sim@score_matrix$person_id, `1`=c("1","2","3")) bin_time_ar_irt_2pl_abs_est <- id_estimate(idealdata=bin_time_ar_irt_2pl_abs_sim, use_groups = T, model_type=2, ar1_up = 1,ar1_down = -1, vary_ideal_pts = 'AR1', const_type = "items", time_var = 2,restrict_var = T, restrict_ind_high = as.character(sort(bin_time_ar_irt_2pl_abs_sim@simul_data$true_reg_discrim, decreasing=TRUE, index=TRUE)$ix[1]), restrict_ind_low = as.character(sort(bin_time_ar_irt_2pl_abs_sim@simul_data$true_reg_discrim,, decreasing=F, index=T)$ix[1]), fixtype='prefix') id_plot_rhats(bin_time_ar_irt_2pl_abs_est)
id_sim_coverage(bin_time_ar_irt_2pl_abs_est) %>% bind_rows(.id='Parameter') %>% ggplot(aes(y=avg,x=Parameter)) + stat_summary(fun.args=list(mult=1.96)) + theme_minimal()
Let's do a time-varying ideal point plot with true point overlay:
id_plot_legis_dyn(bin_time_ar_irt_2pl_abs_est,plot_sim = T) +facet_wrap(~person_id)
id_plot_sims(bin_time_ar_irt_2pl_abs_est,type='Residuals')
id_plot_sims(bin_time_ar_irt_2pl_abs_est)
The Gaussian process differs in that is a non-parametric approach to time-series modeling. It can fit very flexible curves and also works on irregularly-spaced time series.
time_gp_sim <- id_sim_gen(num_person=10,num_bills=200,inflate=T, absence_diff_mean=0, time_process = 'GP', time_points=40, time_sd = .01, diff_sd = 1, absence_discrim_sd = 1, reg_discrim_sd = 1) # a little harder to constrain, need to figure out minimums all_low <- apply(time_gp_sim@simul_data$true_person,1,min) time_gp_est <- id_estimate(idealdata=time_gp_sim, model_type=2, use_vb=F, nchains=3,niters=1000, vary_ideal_pts = 'GP', restrict_ind_high = which(all_low==max(all_low)), restrict_ind_low = which(all_low==min(all_low)), fixtype='prefix') id_plot_rhats(time_gp_est)
id_sim_coverage(time_gp_est) %>% bind_rows(.id='Parameter') %>% ggplot(aes(y=avg,x=Parameter)) + stat_summary(fun.args=list(mult=1.96)) + theme_minimal()
Let's do a time-varying ideal point plot with true point overlay:
id_plot_legis_dyn(time_gp_est,plot_sim = T) +facet_wrap(~person_id)
id_plot_sims(time_gp_est,type='Residuals')
id_plot_sims(time_gp_est)
Another new addition is the latent space model, which is only a slight modification on the IRT model.
Non-inflated:
latent_noninfl_sim <- id_sim_gen(num_person=10,num_bills=125,ordinal=F, inflate=F, model_type='binary', latent_space=T, absence_diff_mean=0, ideal_pts_sd = 1, diff_sd = 1, absence_discrim_sd = 1, reg_discrim_sd = 1) latent_noninfl_est <- id_estimate(idealdata=latent_noninfl_sim, model_type=13, restrict_ind_high = as.character(sort(latent_noninfl_sim@simul_data$true_person, decreasing=TRUE, index=T)$ix[1]), restrict_ind_low = as.character(sort(latent_noninfl_sim@simul_data$true_person, decreasing=F, index=T)$ix[1]), fixtype='prefix',id_refresh=100,warmup=250,niters=250,nchains=2,ncores=2) id_plot_rhats(latent_noninfl_est)
id_plot_legis(latent_noninfl_est,show_true = T)
id_sim_coverage(latent_noninfl_est) %>% bind_rows(.id='Parameter') %>% ggplot(aes(y=avg,x=Parameter)) + stat_summary(fun.args=list(mult=1.96)) + theme_minimal()
Inflated:
# you can't have too few people relative to bills latent_infl_sim <- id_sim_gen(num_person=20,num_bills=125,ordinal=F, inflate=T, model_type='binary', latent_space=T, absence_diff_mean=0, ideal_pts_sd = 1, diff_sd = 1, absence_discrim_sd = 1, reg_discrim_sd = 1) latent_infl_est <- id_estimate(idealdata=latent_infl_sim, model_type=14, restrict_ind_high = as.character(sort(latent_infl_sim@simul_data$true_person, decreasing=TRUE, index=T)$ix[1]), restrict_ind_low = as.character(sort(latent_infl_sim@simul_data$true_person, decreasing=F, index=T)$ix[1]), fixtype='prefix', adapt_delta=0.95, diff_reg_sd = 2, diff_miss_sd=2) id_plot_rhats(latent_infl_est)
id_plot_legis(latent_infl_est,show_true = T)
id_sim_coverage(latent_infl_est) %>% bind_rows(.id='Parameter') %>% ggplot(aes(y=avg,x=Parameter)) + stat_summary(fun.args=list(mult=1.96)) + theme_minimal()
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.