require(idealstan) require(ggplot2) require(dplyr) require(tidyr) require(rstan) knitr::opts_chunk$set(fig.align = 'center',warning = F,message = F)
This Rmarkdown file has a series of simulations run on the models offered in idealstan
. It simulates a large amount of data and then checks to see if the residuals relative to the true parameters sum to zero, as well as for credible interval coverage. In general, the credible intervals aren't going to be exactly where they should be because the constraints used in estimating the models and identifying the signs aren't themselves included in the simulated data. I.e., one of the ideal points actually follows a positive continuous distribution instead of a random normal distribution once it is constrained, which introduces a slight bias in the recovery of the true parameters. This is an artifact of virtually every latent variable model, and is not of great concern as long as the credible intervals reach close to 90 percent coverage.
First, the standard IRT 2-PL (for this one, the absence discriminations are not a part of the model):
bin_irt_2pl_sim <- id_sim_gen(num_person=20,num_bills=200,diff_sd=1, reg_discrim_sd = 1, absence_discrim_sd = 1) bin_irt_2pl_est <- id_estimate(idealdata=bin_irt_2pl_sim, model_type=1, restrict_ind_high = as.character(sort(bin_irt_2pl_sim@simul_data$true_person, decreasing=T, index=T)$ix[1]), restrict_ind_low = as.character(sort(bin_irt_2pl_sim@simul_data$true_person, decreasing=F, index=T)$ix[1]), fixtype='constrained', person_sd = 3, <<<<<<< HEAD discrim_miss_sd = 1, discrim_reg_sd = 1,grainsize=1, diff_reg_sd = 1, diff_miss_sd = 1, niter=500,warmup=500, nchains=1,ncores=1,id_refresh=100) ======= discrim_miss_sd = 1,grainsize=1, discrim_reg_sd = 1, diff_reg_sd = 1, diff_miss_sd = 1, niter=500,warmup=500, nchains=1,ncores=4,id_refresh=100) >>>>>>> develop id_plot_rhats(bin_irt_2pl_est)
id_sim_coverage(bin_irt_2pl_est) %>% bind_rows(.id='Parameter') %>% ggplot(aes(y=avg,x=Parameter)) + stat_summary(fun.args=list(mult=1.96)) + theme_minimal()
Plot true versus observed values:
id_plot_legis(bin_irt_2pl_est,show_true = T)
id_plot_sims(bin_irt_2pl_est,type='Residuals')
id_plot_sims(bin_irt_2pl_est)
post_pred <- bin_irt_2pl_est %>% id_post_pred(output="missing") post_pred %>% id_plot_ppc(bin_irt_2pl_est,ppc_pred=.)
Next, inflated 2-PL IRT (binary):
bin_irt_2pl_abs_sim <- id_sim_gen(num_person=20,num_bills=100,ordinal=F,inflate=T, absence_diff_mean=0) bin_irt_2pl_abs_est <- id_estimate(idealdata=bin_irt_2pl_abs_sim, model_type=2, restrict_ind_high = as.character(sort(bin_irt_2pl_abs_sim@simul_data$true_person, decreasing=T, index=T)$ix[1]), restrict_ind_low = as.character(sort(bin_irt_2pl_abs_sim@simul_data$true_person, decreasing=F, index=T)$ix[1]), fixtype='constrained')
Plot true versus observed values:
id_plot_legis(bin_irt_2pl_abs_est,show_true = T)
id_sim_coverage(bin_irt_2pl_abs_est) %>% bind_rows(.id='Parameter') %>% ggplot(aes(y=avg,x=Parameter)) + stat_summary(fun.args=list(mult=1.96)) + theme_minimal()
id_plot_sims(bin_irt_2pl_abs_est,type='Residuals')
id_plot_sims(bin_irt_2pl_abs_est)
bin_irt_2pl_abs_est %>% id_post_pred %>% id_plot_ppc(bin_irt_2pl_abs_est,ppc_pred=.)
Now we'll start with the ordinal models. First the uninflated ordinal model:
ord_irt_sim <- id_sim_gen(num_person=20,num_bills=100,inflate=F, absence_diff_mean=0,model_type='ordinal_ratingscale') ord_irt_est <- id_estimate(idealdata=ord_irt_sim, model_type=3, restrict_ind_high = as.character(sort(ord_irt_sim@simul_data$true_person,decreasing=T, index=T)$ix[1]), restrict_ind_low = as.character(sort(ord_irt_sim@simul_data$true_person,decreasing=F, index=T)$ix[1]), fixtype='constrained',id_refresh=100) id_plot_rhats(ord_irt_est)
Plot true versus observed values:
id_plot_legis(ord_irt_est,show_true = T)
id_sim_coverage(ord_irt_est) %>% bind_rows(.id='Parameter') %>% ggplot(aes(y=avg,x=Parameter)) + stat_summary(fun.args=list(mult=1.96)) + theme_minimal()
id_plot_sims(ord_irt_est,type='Residuals')
id_plot_sims(ord_irt_est)
ord_irt_est %>% id_post_pred %>% id_plot_ppc(ord_irt_est,ppc_pred=.)
And we will finish with the inflated ordinal model:
ord_irt_abs_sim <- id_sim_gen(num_person=20,num_bills=100,inflate=T, absence_diff_mean=0,model_type='ordinal_ratingscale') ord_irt_abs_est <- id_estimate(idealdata=ord_irt_abs_sim, model_type=4, restrict_ind_high = as.character(sort(ord_irt_abs_sim@simul_data$true_person, decreasing=T, index=T)$ix[1]), restrict_ind_low = as.character(sort(ord_irt_abs_sim@simul_data$true_person, decreasing=F, index=T)$ix[1]), fixtype='constrained') id_plot_rhats(ord_irt_abs_est)
id_plot_legis(ord_irt_abs_est,show_true = T)
id_sim_coverage(ord_irt_abs_est) %>% bind_rows(.id='Parameter') %>% ggplot(aes(y=avg,x=Parameter)) + stat_summary(fun.args=list(mult=1.96)) + theme_minimal()
id_plot_sims(ord_irt_abs_est,type='Residuals')
id_plot_sims(ord_irt_abs_est)
ord_irt_abs_est %>% id_post_pred %>% id_plot_ppc(ord_irt_abs_est,ppc_pred=.)
We can now try the ordinal graded response version (non-inflated):
ord_irt_grm_sim <- id_sim_gen(num_person=20,num_bills=100,inflate=F, absence_diff_mean=0,model_type='ordinal_grm') ord_irt_grm_est <- id_estimate(idealdata=ord_irt_grm_sim, model_type=5, restrict_ind_high = as.character(sort(ord_irt_grm_sim@simul_data$true_person, decreasing=T, index=T)$ix[1]), restrict_ind_low = as.character(sort(ord_irt_grm_sim@simul_data$true_person, decreasing=F, index=T)$ix[1]), fixtype='constrained') id_plot_rhats(ord_irt_grm_est)
id_sim_coverage(ord_irt_grm_est) %>% bind_rows(.id='Parameter') %>% ggplot(aes(y=avg,x=Parameter)) + stat_summary(fun.args=list(mult=1.96)) + theme_minimal()
id_plot_legis(ord_irt_grm_est,show_true = T)
id_plot_sims(ord_irt_grm_est,type='Residuals')
id_plot_sims(ord_irt_grm_est)
ord_irt_grm_est %>% id_post_pred %>% id_plot_ppc(ord_irt_grm_est,ppc_pred=.)
And now the inflated GRM:
ord_irt_grm_abs_sim <- id_sim_gen(num_person=20,num_bills=100,inflate=T, absence_diff_mean=0,model_type='ordinal_grm') ord_irt_grm_abs_est <- id_estimate(idealdata=ord_irt_grm_abs_sim, model_type=6, restrict_ind_high = as.character(sort(ord_irt_grm_abs_sim@simul_data$true_person, decreasing=T, index=T)$ix[1]), restrict_ind_low = as.character(sort(ord_irt_grm_abs_sim@simul_data$true_person, decreasing=F, index=T)$ix[1]), fixtype='constrained') id_plot_rhats(ord_irt_grm_abs_est)
id_plot_legis(ord_irt_grm_abs_est,show_true = T)
id_sim_coverage(ord_irt_grm_abs_est) %>% bind_rows(.id='Parameter') %>% ggplot(aes(y=avg,x=Parameter)) + stat_summary(fun.args=list(mult=1.96)) + theme_minimal()
id_plot_sims(ord_irt_grm_abs_est,type='Residuals')
id_plot_sims(ord_irt_grm_abs_est)
ord_irt_grm_abs_est %>% id_post_pred %>% id_plot_ppc(ord_irt_grm_abs_est,ppc_pred=.)
Models I am adding to this release include the Poisson model:
ord_irt_poisson_sim <- id_sim_gen(num_person=20,num_bills=100,inflate=F, absence_diff_mean=0,model_type='poisson') ord_irt_poisson_est <- id_estimate(idealdata=ord_irt_poisson_sim, model_type=7, restrict_ind_high = as.character(sort(ord_irt_poisson_sim@simul_data$true_person, decreasing=T, index=T)$ix[1]), restrict_ind_low = as.character(sort(ord_irt_poisson_sim@simul_data$true_person, decreasing=F, index=T)$ix[1]), fixtype='constrained') id_plot_rhats(ord_irt_poisson_est)
id_plot_legis(ord_irt_poisson_est,show_true = T)
id_sim_coverage(ord_irt_poisson_est) %>% bind_rows(.id='Parameter') %>% ggplot(aes(y=avg,x=Parameter)) + stat_summary(fun.args=list(mult=1.96)) + theme_minimal()
id_plot_sims(ord_irt_poisson_est,type='Residuals')
id_plot_sims(ord_irt_poisson_est)
ord_irt_poisson_est %>% id_post_pred %>% id_plot_ppc(ord_irt_poisson_est,ppc_pred=.)
Now Poisson-inflated:
ord_irt_poisson_abs_sim <- id_sim_gen(num_person=20,num_bills=100,inflate=T, absence_diff_mean=0,model_type='poisson') ord_irt_poisson_abs_est <- id_estimate(idealdata=ord_irt_poisson_abs_sim, model_type=8, restrict_ind_high = as.character(sort(ord_irt_poisson_abs_sim@simul_data$true_person, decreasing=T, index=T)$ix[1]), restrict_ind_low = as.character(sort(ord_irt_poisson_abs_sim@simul_data$true_person, decreasing=F, index=T)$ix[1]), fixtype='constrained') id_plot_rhats(ord_irt_poisson_abs_est)
id_plot_legis(ord_irt_poisson_abs_est,show_true = T)
id_sim_coverage(ord_irt_poisson_abs_est) %>% bind_rows(.id='Parameter') %>% ggplot(aes(y=avg,x=Parameter)) + stat_summary(fun.args=list(mult=1.96)) + theme_minimal()
id_plot_sims(ord_irt_poisson_abs_est,type='Residuals')
id_plot_sims(ord_irt_poisson_abs_est)
ord_irt_poisson_abs_est %>% id_post_pred %>% id_plot_ppc(ord_irt_poisson_abs_est,ppc_pred=.)
ord_irt_poisson_abs_est %>% id_post_pred(output='missing') %>% id_plot_ppc(ord_irt_poisson_abs_est,ppc_pred=.)
A Normally-distributed outcome with no inflation:
ord_irt_normal_sim <- id_sim_gen(num_person=20,num_bills=100,ordinal=F,inflate=F, absence_diff_mean=0,model_type='normal') ord_irt_normal_est <- id_estimate(idealdata=ord_irt_normal_sim, model_type=9, restrict_ind_high = as.character(sort(ord_irt_normal_sim@simul_data$true_person, decreasing=T, index=T)$ix[1]), restrict_ind_low = as.character(sort(ord_irt_normal_sim@simul_data$true_person, decreasing=F, index=T)$ix[1]), fixtype='constrained') id_plot_rhats(ord_irt_normal_est)
id_plot_legis(ord_irt_normal_est,show_true = T)
id_sim_coverage(ord_irt_normal_est) %>% bind_rows(.id='Parameter') %>% ggplot(aes(y=avg,x=Parameter)) + stat_summary(fun.args=list(mult=1.96)) + theme_minimal()
id_plot_sims(ord_irt_normal_est,type='Residuals')
id_plot_sims(ord_irt_normal_est)
ord_irt_normal_est %>% id_post_pred %>% id_plot_ppc(ord_irt_normal_est,ppc_pred=.)
A Normally-distributed outcome with inflation:
ord_irt_normal_abs_sim <- id_sim_gen(num_person=20,num_bills=100,inflate=T, absence_diff_mean=0,model_type='normal') ord_irt_normal_abs_est <- id_estimate(idealdata=ord_irt_normal_abs_sim, model_type=10, restrict_ind_high = as.character(sort(ord_irt_normal_abs_sim@simul_data$true_person, decreasing=T, index=T)$ix[1]), restrict_ind_low = as.character(sort(ord_irt_normal_abs_sim@simul_data$true_person, decreasing=F, index=T)$ix[1]), fixtype='constrained') id_plot_rhats(ord_irt_normal_abs_est)
id_plot_legis(ord_irt_normal_abs_est,show_true = T)
id_sim_coverage(ord_irt_normal_abs_est) %>% bind_rows(.id='Parameter') %>% ggplot(aes(y=avg,x=Parameter)) + stat_summary(fun.args=list(mult=1.96)) + theme_minimal()
id_plot_sims(ord_irt_normal_abs_est,type='Residuals')
id_plot_sims(ord_irt_normal_abs_est)
ord_irt_normal_abs_est %>% id_post_pred %>% id_plot_ppc(ord_irt_normal_abs_est,ppc_pred=.)
ord_irt_normal_abs_est %>% id_post_pred(output='missing') %>% id_plot_ppc(ord_irt_normal_abs_est,ppc_pred=.)
A Lognormal model with no inflation:
ord_irt_lognormal_sim <- id_sim_gen(num_person=20,num_bills=100,ordinal=F,inflate=F, absence_diff_mean=0,model_type='lognormal') ord_irt_lognormal_est <- id_estimate(idealdata=ord_irt_lognormal_sim, model_type=11, restrict_ind_high = as.character(sort(ord_irt_lognormal_sim@simul_data$true_person, decreasing=T, index=T)$ix[1]), restrict_ind_low = as.character(sort(ord_irt_lognormal_sim@simul_data$true_person, decreasing=F, index=T)$ix[1]), fixtype='constrained') id_plot_rhats(ord_irt_lognormal_est)
id_plot_legis(ord_irt_lognormal_est,show_true = T)
id_sim_coverage(ord_irt_lognormal_est) %>% bind_rows(.id='Parameter') %>% ggplot(aes(y=avg,x=Parameter)) + stat_summary(fun.args=list(mult=1.96)) + theme_minimal()
id_plot_sims(ord_irt_lognormal_est,type='Residuals')
id_plot_sims(ord_irt_lognormal_est)
And last but not least, a lognormal model with inflation:
ord_irt_lognormal_abs_sim <- id_sim_gen(num_person=20,num_bills=100,ordinal=F,inflate=F, absence_diff_mean=0,model_type='lognormal') ord_irt_lognormal_abs_est <- id_estimate(idealdata=ord_irt_lognormal_abs_sim, model_type=12, restrict_ind_high = as.character(sort(ord_irt_lognormal_abs_sim@simul_data$true_person, decreasing=T, index=T)$ix[1]), restrict_ind_low = as.character(sort(ord_irt_lognormal_abs_sim@simul_data$true_person, decreasing=F, index=T)$ix[1]), fixtype='constrained') id_plot_rhats(ord_irt_lognormal_abs_est)
id_plot_legis(ord_irt_lognormal_abs_est,show_true = T)
id_sim_coverage(ord_irt_lognormal_abs_est) %>% bind_rows(.id='Parameter') %>% ggplot(aes(y=avg,x=Parameter)) + stat_summary(fun.args=list(mult=1.96)) + theme_minimal()
id_plot_sims(ord_irt_lognormal_abs_est,type='Residuals')
id_plot_sims(ord_irt_lognormal_abs_est)
We won't test all the possible time auto-correlation settings, only a subset of them. I use an inflated binary model as it is a very common one.
While these models are identified, it appears that sometimes given the amount of data, they are only weakly identified in the posterior and Rhat values higher than 1.1 are possible (but are not evidence of multi-modality).
First random walks:
bin_time_irt_2pl_abs_sim <- id_sim_gen(num_person=20,num_bills=125,ordinal=F,inflate=T, absence_diff_mean=0,time_process = 'random',time_points=25,ideal_pts_sd = 1, diff_sd = 1, absence_discrim_sd = 1, reg_discrim_sd = 1) bin_time_irt_2pl_abs_est <- id_estimate(idealdata=bin_time_irt_2pl_abs_sim, model_type=2,nchains = 1,ncores = 3, vary_ideal_pts = 'random_walk', save_files = "~/corona_private/data/indices/", restrict_ind_high = as.character(sort(bin_time_irt_2pl_abs_sim@simul_data$drift, decreasing=TRUE, index=T)$ix[1]), restrict_ind_low = as.character(sort(bin_time_irt_2pl_abs_sim@simul_data$drift, decreasing=F, index=T)$ix[1]), fixtype='constrained',id_refresh = 100) id_plot_rhats(bin_time_irt_2pl_abs_est)
id_sim_coverage(bin_time_irt_2pl_abs_est) %>% bind_rows(.id='Parameter') %>% ggplot(aes(y=avg,x=Parameter)) + stat_summary(fun.args=list(mult=1.96)) + theme_minimal()
Let's do a time-varying ideal point plot with true point overlay:
id_plot_legis_dyn(bin_time_irt_2pl_abs_est,plot_sim = T) + facet_wrap(~person_id,scales="free_y")
id_plot_sims(bin_time_irt_2pl_abs_est,type='Residuals')
id_plot_sims(bin_time_irt_2pl_abs_est)
And autocorrelation:
bin_time_ar_irt_2pl_abs_sim <- id_sim_gen(num_person=10,num_bills=100,inflate=T, absence_diff_mean=0,time_process = 'AR',time_points=20,time_sd = .2,diff_sd = 1, absence_discrim_sd = 1, reg_discrim_sd = 1) bin_time_ar_irt_2pl_abs_est <- id_estimate(idealdata=bin_time_ar_irt_2pl_abs_sim, model_type=2, vary_ideal_pts = 'AR1', nchains=3,ncores = 3, restrict_ind_high = as.character(sort(bin_time_ar_irt_2pl_abs_sim@simul_data$drift, decreasing=TRUE, index=TRUE)$ix[1]), restrict_ind_low = as.character(sort(bin_time_ar_irt_2pl_abs_sim@simul_data$drift, decreasing=F, index=T)$ix[1]),restrict_sd_high = 1, fixtype='constrained',id_refresh = 100) id_plot_rhats(bin_time_ar_irt_2pl_abs_est)
id_sim_coverage(bin_time_ar_irt_2pl_abs_est) %>% bind_rows(.id='Parameter') %>% ggplot(aes(y=avg,x=Parameter)) + stat_summary(fun.args=list(mult=1.96)) + theme_minimal()
Let's do a time-varying ideal point plot with true point overlay:
id_plot_legis_dyn(bin_time_ar_irt_2pl_abs_est,plot_sim = T) +facet_wrap(~person_id,scales="free_y")
id_plot_sims(bin_time_ar_irt_2pl_abs_est,type='Residuals')
id_plot_sims(bin_time_ar_irt_2pl_abs_est)
The Gaussian process differs in that is a non-parametric approach to time-series modeling. It can fit very flexible curves and also works on irregularly-spaced time series.
time_gp_sim <- id_sim_gen(num_person=10,num_bills=250,inflate=T, absence_diff_mean=0, time_process = 'GP', time_points=25, time_sd = .01, diff_sd = 1, absence_discrim_sd = 1, reg_discrim_sd = 1) time_gp_est <- id_estimate(idealdata=time_gp_sim, model_type=2, use_vb=F,id_refresh=100, nchains=3,niters=500, vary_ideal_pts = 'GP', fix_high = sort(time_gp_sim@simul_data$true_person_mean, decreasing=TRUE)[1], fix_low=sort(time_gp_sim@simul_data$true_person_mean)[1], restrict_ind_high = as.character(sort(time_gp_sim@simul_data$true_person_mean, decreasing=TRUE, index=TRUE)$ix[1]), restrict_ind_low = as.character(sort(time_gp_sim@simul_data$true_person_mean, decreasing=F, index=T)$ix[1]), gp_sd_par=1e-05, gp_m_sd_par = time_gp_sim@simul_data$drift, fixtype='constrained') id_plot_rhats(time_gp_est)
id_sim_coverage(time_gp_est) %>% bind_rows(.id='Parameter') %>% ggplot(aes(y=avg,x=Parameter)) + stat_summary(fun.args=list(mult=1.96)) + theme_minimal()
Let's do a time-varying ideal point plot with true point overlay:
id_plot_legis_dyn(time_gp_est,plot_sim = T) +facet_wrap(~person_id,scales="free")
id_plot_sims(time_gp_est,type='Residuals')
id_plot_sims(time_gp_est)
Another new addition is the latent space model, which is only a slight modification on the IRT model.
Non-inflated:
latent_noninfl_sim <- id_sim_gen(num_person=20,num_bills=125,ordinal=F, inflate=F, model_type='binary', latent_space=T, absence_diff_mean=0, ideal_pts_sd = 1, diff_sd = 1, absence_discrim_sd = 1, reg_discrim_sd = 1) latent_noninfl_est <- id_estimate(idealdata=latent_noninfl_sim, model_type=13, restrict_ind_high = as.character(sort(latent_noninfl_sim@simul_data$true_person, decreasing=TRUE, index=T)$ix[1]), restrict_ind_low = as.character(sort(latent_noninfl_sim@simul_data$true_person, decreasing=F, index=T)$ix[1]), fixtype='constrained') id_plot_rhats(latent_noninfl_est)
id_plot_legis(latent_noninfl_est,show_true = T)
id_sim_coverage(latent_noninfl_est) %>% bind_rows(.id='Parameter') %>% ggplot(aes(y=avg,x=Parameter)) + stat_summary(fun.args=list(mult=1.96)) + theme_minimal()
Inflated:
# you can't have too few people relative to bills latent_infl_sim <- id_sim_gen(num_person=20,num_bills=125,ordinal=F, inflate=T, model_type='binary', latent_space=T, absence_diff_mean=0, ideal_pts_sd = 1, diff_sd = 1, absence_discrim_sd = 1, reg_discrim_sd = 1) latent_infl_est <- id_estimate(idealdata=latent_infl_sim, model_type=14, restrict_ind_high = as.character(sort(latent_infl_sim@simul_data$true_person, decreasing=TRUE, index=T)$ix[1]), restrict_ind_low = as.character(sort(latent_infl_sim@simul_data$true_person, decreasing=F, index=T)$ix[1]), fixtype='constrained', diff_reg_sd = 2, diff_miss_sd=2) id_plot_rhats(latent_infl_est)
id_plot_legis(latent_infl_est,show_true = T)
id_sim_coverage(latent_infl_est) %>% bind_rows(.id='Parameter') %>% ggplot(aes(y=avg,x=Parameter)) + stat_summary(fun.args=list(mult=1.96)) + theme_minimal()
New in v1.0 are combined distribution modeling. This section simulates these using separate code due to the complexity of combining distributions together. I simulate here data from Normal, ordinal-rating scale, Poisson and Bernoulli distributions, 10 items from each distribution.
num_bills <- 200 time <- 20 num_per_dist <- (200*20)/4 num_person <- 20 absence_diff <- rnorm(n=num_bills) absence_discrim <- rnorm(n=num_bills) # person ideal points common to both types of models (absence and regular) ar_adj <- rep(1,num_person) # need to pull out some functions to generate data .gen_ts_data <- function(t,adj_in,alpha_int,sigma,init_sides) { current_val <- new.env() current_val$t1 <- 0 out_vec <- lapply(1:t,function(t_1) { if(t_1==1) { t_11 <- alpha_int current_val$t1 <- t_11 return(data_frame(t_11)) } else { if(adj_in==1) { t_11 <- adj_in*current_val$t1 + rnorm(n=1,sd=sigma) } else { t_11 <- alpha_int + adj_in*current_val$t1 + rnorm(n=1,sd=sigma) } } current_val$t1 <- t_11 return(data_frame(t_11)) }) %>% bind_rows return(out_vec) } drift <- rnorm(n=num_person,mean=0,sd=1) time_points <- rep(1:time,each=num_bills/time) ideal_pts <- lapply(1:num_person, function(i) { this_person <- .gen_ts_data(t=time, adj_in=ar_adj[i], alpha_int=drift[i], sigma=.1, init_sides=ideal_t1[i]) return(this_person) }) %>% bind_cols %>% as.matrix ideal_pts <- t(ideal_pts) # First generate prob of absences person_points <- rep(1:num_person,times=num_bills) bill_points <- rep(1:num_bills,each=num_person) time_points <- rep(1:time,each=num_bills/time) time_points <- rep(time_points,each=num_person) pr_absence <- sapply(1:length(person_points),function(n) { ideal_pts[person_points[n],time_points[n]]*absence_discrim[bill_points[n]] - absence_diff[bill_points[n]] }) %>% plogis() reg_diff <- rnorm(n=num_bills,mean=0,sd=1) reg_discrim <- rnorm(n=num_bills,mean=0,sd=1) pr_vote <- sapply(1:length(person_points),function(n) { ideal_pts[person_points[n],time_points[n]]*reg_discrim[bill_points[n]] - reg_diff[bill_points[n]] }) %>% plogis() bill_points_all <- bill_points person_points_all <- person_points # do each model in turn bill_points <- bill_points_all[1:num_per_dist] person_points <- person_points_all[1:num_per_dist] bernoulli <- as.numeric(pr_vote[1:num_per_dist]>runif(length(person_points))) bernoulli <- ifelse(pr_absence[1:num_per_dist]<(runif(length(person_points))),bernoulli,NA) bill_points <- bill_points_all[(num_per_dist+1):(num_per_dist*2)] person_points <- person_points_all[(num_per_dist+1):(num_per_dist*2)] ordinal_outcomes <- 4 cutpoints <- quantile(pr_vote[(num_per_dist+1):(num_per_dist*2)],probs=seq(0,1,length.out = ordinal_outcomes+1)) cutpoints <- cutpoints[2:(length(cutpoints)-1)] #Generate outcomes by personlator cuts <- sapply(cutpoints,function(y) { qlogis(pr_vote[(num_per_dist+1):(num_per_dist*2)]) - y },simplify='array') ord_rat <- sapply(1:nrow(cuts), function(i) { this_cut <- cuts[i,] pr_bottom <- 1 - plogis(this_cut[1]) mid_prs <- sapply(1:(length(this_cut)-1), function(c) { plogis(this_cut[c]) - plogis(this_cut[c+1]) }) pr_top <- plogis(this_cut[length(this_cut)]) return(sample(1:(length(this_cut)+1),size=1,prob=c(pr_bottom,mid_prs,pr_top))) }) ord_rat <- ifelse(pr_absence[(num_per_dist+1):(num_per_dist*2)]<(runif(length(person_points))),ord_rat,NA) # now poisson bill_points <- bill_points_all[(2*num_per_dist+1):(num_per_dist*3)] person_points <- person_points_all[(2*num_per_dist+1):(num_per_dist*3)] poisson <- rpois(n = length(pr_vote[(2*num_per_dist+1):(num_per_dist*3)]),lambda = exp(pr_vote[(2*num_per_dist+1):(num_per_dist*3)])) poisson <- ifelse(pr_absence[(2*num_per_dist+1):(num_per_dist*3)]<(runif(num_per_dist)),poisson,NA) # Normal distribution bill_points <- bill_points_all[(3*num_per_dist+1):(num_per_dist*4)] person_points <- person_points_all[(3*num_per_dist+1):(num_per_dist*4)] normal <- rnorm(n = length(pr_vote[(3*num_per_dist+1):(num_per_dist*4)]),mean = pr_vote[(3*num_per_dist+1):(num_per_dist*4)],sd = .5) normal <- ifelse(pr_absence[(3*num_per_dist+1):(num_per_dist*4)]<(runif(num_per_dist)),normal,NA) # now make data and feed it to id_make to_id_make <- tibble(outcome_disc=c(bernoulli,ord_rat,poisson,1:num_per_dist), outcome_cont=c(1:(3*num_per_dist),normal), person_id=person_points_all, item_id=bill_points_all, model_id=c(rep(2,num_per_dist), rep(4,num_per_dist), rep(8,num_per_dist), rep(10,num_per_dist)), time_id=time_points) %>% mutate(ordered_id=4) combine_data <- id_make(to_id_make) combine_data_est <- id_estimate(combine_data,nchains=4,ncores = 4,fixtype = "prefix",restrict_ind_high = as.character(sort(ideal_pts[,1], decreasing=T, index=T)$ix[1]), restrict_ind_low = as.character(sort(ideal_pts[,1], decreasing=F, index=T)$ix[1]), vary_ideal_pts = "random_walk", fix_high = sort(ideal_pts[,1],decreasing=T)[1], fix_low = sort(ideal_pts[,1],decreasing=F)[1], id_refresh = 100) # need to add back in true values combine_data_est@score_data@simulation <- T combine_data_est@score_data@simul_data <- list(num_person=num_person, num_bills=num_bills, ideal_pts_sd=1, true_person=ideal_pts) id_plot_legis_dyn(combine_data_est,plot_sim = T) + facet_wrap(~person_id)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.