require(idealstan)
require(ggplot2)
require(dplyr)
require(tidyr)
library(bayesplot)

knitr::opts_chunk$set(fig.align = 'center',warning = F,message = F)

Introduction

This Rmarkdown file has a series of simulations run on the models offered in idealstan. It simulates a large amount of data and then checks to see if the residuals relative to the true parameters sum to zero, as well as for credible interval coverage. In general, the credible intervals aren't going to be exactly where they should be because the constraints used in estimating the models and identifying the signs aren't themselves included in the simulated data. I.e., one of the ideal points actually follows a positive continuous distribution instead of a random normal distribution once it is prefix, which introduces a slight bias in the recovery of the true parameters. This is an artifact of virtually every latent variable model, and is not of great concern as long as the credible intervals reach close to 90 percent coverage.

First, the standard IRT 2-PL (for this one, the absence discriminations are not a part of the model):

bin_irt_2pl_sim <- id_sim_gen(num_person=100,num_items=20,ordinal=F,inflate=F)
bin_irt_2pl_est <- id_estimate(idealdata=bin_irt_2pl_sim,
                               model_type=1,init_pathfinder = TRUE,
                               restrict_ind_high = as.character(sort(bin_irt_2pl_sim@simul_data$true_person,
                                                                     decreasing=T,
                                                        index=T)$ix[1]),
                               restrict_ind_low = as.character(sort(bin_irt_2pl_sim@simul_data$true_person,
                                                                    decreasing=F, 
                                                        index=T)$ix[1]),
                               fix_high = sort(bin_irt_2pl_sim@simul_data$true_person,
                                                                     decreasing=T)[1],fix_low = sort(bin_irt_2pl_sim@simul_data$true_person,
                                                                     decreasing=F)[1],

                               fixtype='prefix',
                           person_sd = 3,
                           nchains=2,ncores=4,niters = 500,warmup=500)
id_plot_rhats(bin_irt_2pl_est)
id_sim_coverage(bin_irt_2pl_est) %>% 
  bind_rows(.id='Parameter') %>% 
  ggplot(aes(y=avg,x=Parameter)) +
  stat_summary(fun.args=list(mult=1.96)) + 
  theme_minimal() 

Plot true versus observed values:

id_plot_legis(bin_irt_2pl_est,show_true = T)
id_plot_sims(bin_irt_2pl_est,type='Residuals')
id_plot_sims(bin_irt_2pl_est)
new_data <- bin_irt_2pl_est@score_data@func_args$score_data

c1 <- id_post_pred(bin_irt_2pl_est, newdata=new_data)

  id_plot_ppc(bin_irt_2pl_est,ppc_pred=c1,combine_item=FALSE,
              prompt_plot=FALSE)

IRT 2-PL Binary + Covariate

Try this same model but add an external covariate with an ideal point effect of +2

bin_irt_2pl_cov_sim <- id_sim_gen(num_person=100,num_items=20,ordinal=F,inflate=F,
                              cov_effect = 3)
bin_irt_2pl_cov_est <- id_estimate(idealdata=bin_irt_2pl_cov_sim,
                               model_type=1,init_pathfinder = TRUE,
                               restrict_ind_high = as.character(sort(bin_irt_2pl_cov_sim@simul_data$true_person,
                                                                     decreasing=T,
                                                        index=T)$ix[1]),
                               restrict_ind_low = as.character(sort(bin_irt_2pl_cov_sim@simul_data$true_person,
                                                                    decreasing=F, 
                                                        index=T)$ix[1]),
                               fix_high = sort(bin_irt_2pl_cov_sim@simul_data$true_person,
                                                                     decreasing=T)[1],fix_low = sort(bin_irt_2pl_cov_sim@simul_data$true_person,
                                                                     decreasing=F)[1],

                               fixtype='prefix',
                           person_sd = 3,
                           nchains=2,ncores=4,niters = 500,warmup=500)
id_plot_rhats(bin_irt_2pl_cov_est)
id_sim_coverage(bin_irt_2pl_cov_est) %>% 
  bind_rows(.id='Parameter') %>% 
  ggplot(aes(y=avg,x=Parameter)) +
  stat_summary(fun.args=list(mult=1.96)) + 
  theme_minimal() 

Plot true versus observed values:

id_plot_legis(bin_irt_2pl_cov_est,show_true = T)
id_plot_sims(bin_irt_2pl_cov_est,type='Residuals')
id_plot_sims(bin_irt_2pl_cov_est)
new_data <- bin_irt_2pl_cov_est@score_data@func_args$score_data

c1 <- id_post_pred(bin_irt_2pl_cov_est, newdata=new_data)

  id_plot_ppc(bin_irt_2pl_cov_est,ppc_pred=c1,combine_item=FALSE,
              prompt_plot=FALSE)

Now check for covariate effects of ideal point covariates

First, plot estimated coefficient legis_x against true value:

mcmc_recover_hist(bin_irt_2pl_cov_est@stan_samples$draws("legis_x"),
                  true=bin_irt_2pl_cov_sim@simul_data$cov_effect)
eps <- 1e-4

  new_data1 <- mutate(bin_irt_2pl_cov_sim@score_matrix,
                      person_x = person_x - eps / 2)

  new_data2 <- mutate(bin_irt_2pl_cov_sim@score_matrix,
                      person_x = person_x + eps / 2)

  # now predict new outcome distributions given these slightly different datasets

  cov_mod_pred1 <- id_post_pred(bin_irt_2pl_cov_est,newdata=new_data1,
                                 use_cores=4,draws="all",
                                item_subset=levels(new_data1$item),
                                 type="epred")

  cov_mod_pred2 <- id_post_pred(bin_irt_2pl_cov_est,newdata=new_data2,
                                 use_cores=4,draws="all",
                                item_subset=levels(new_data2$item),
                                 type="epred")

  # now we need to difference the datasets at the item level to calculate the effects
  # this will create some lists that will then be appended together to a big data frame
  # with one observation per posterior draw

    print("Looping over items")

    # difference the predictions

  c1 <- purrr::map2(cov_mod_pred1[[1]],
                    cov_mod_pred2[[1]],
                    function(small,big) {

                      # difference the effects

                      (big - small) / eps

                    })

  # combine into datasets for each item with item id + person (senator) id

  c2 <- lapply(c1, function(mat) {

    out_data <- attr(mat, "data")
    colnames(mat) <- out_data$person_id

    as_tibble(mat) %>% 
      mutate(draws=1:n(),
             item_id=unique(out_data$item_id)) %>% 
      gather(key="person_id",value="estimate",-draws,-item_id) %>% 
      mutate(person_id=as.numeric(person_id),
             estimate=as.numeric(estimate))

  }) %>% bind_rows

  # merge in some original data
  to_merge <- mutate(bin_irt_2pl_cov_est@score_data@score_matrix, 
                     item_orig=item_id,
                     person_orig=person_id,
                     person_id=as.numeric(person_id),
                     item_id=as.numeric(item_id)) %>% 
    select(person_id, item_id, group_id,item_orig, person_orig) %>% 
    distinct

  # add in party data so we can calculate party-specific effects

  c2 <- left_join(c2, to_merge, 
                  by=c("item_id","person_id"))

  # get effect separately by democrats/republicans

   by_party <- group_by(c2, draws, group_id, item_id, item_orig) %>% 
    summarize(mean_est1=mean(estimate)) %>% 
    group_by(group_id, item_id, item_orig) %>% 
    summarize(mean_est=mean(mean_est1),
              low_est=quantile(mean_est1, .05),
              high_est=quantile(mean_est1, .95))

   # merge in item discrimination to add some color / show high-discrim votes

   item_discrim <- filter(bin_irt_2pl_cov_est@summary,
                         grepl(x=variable, pattern="sigma\\_reg\\_free")) %>% 
    mutate(item_id=as.numeric(stringr::str_extract(variable, "[0-9]+")))

   by_party <- left_join(by_party,
                        select(item_discrim, median, item_id))

   # plot the result

   by_party %>% 
    ggplot(aes(y=mean_est,
               x=reorder(item_id,mean_est))) +
    geom_linerange(aes(ymin=low_est,
                       ymax=high_est,
                       colour=`median`)) +
    ggthemes::theme_tufte() + 
    scale_colour_viridis_c(name="Discrimination") +
    coord_flip() +
    geom_hline(yintercept=0,linetype=2) +
    ggthemes::theme_clean() +
    theme(axis.text.y=element_blank(),
          axis.ticks.y=element_blank()) +
  theme(legend.position = "bottom")

Inflated 2-PL IRT (binary):

Next, inflated 2-PL IRT (binary):

bin_irt_2pl_abs_sim <- id_sim_gen(num_person=20,num_items=100,ordinal=F,inflate=T,
                               absence_diff_mean=0)
bin_irt_2pl_abs_est <- id_estimate(idealdata=bin_irt_2pl_abs_sim,
                               model_type=2,
                               restrict_ind_high = as.character(sort(bin_irt_2pl_abs_sim@simul_data$true_person,
                                                                     decreasing=T,
                                                        index=T)$ix[1]),
                              restrict_ind_low = as.character(sort(bin_irt_2pl_abs_sim@simul_data$true_person,
                                                                   decreasing=F,
                                                        index=T)$ix[1]),
                              fix_high=sort(bin_irt_2pl_abs_sim@simul_data$true_person,
                                                                     decreasing=T)[1],
                              fix_low=sort(bin_irt_2pl_abs_sim@simul_data$true_person,
                                                                     decreasing=F)[1],
                               fixtype='prefix')
id_plot_rhats(bin_irt_2pl_abs_est)

Plot true versus observed values:

id_plot_legis(bin_irt_2pl_abs_est,show_true = T)
id_sim_coverage(bin_irt_2pl_abs_est) %>% 
  bind_rows(.id='Parameter') %>% 
  ggplot(aes(y=avg,x=Parameter)) +
  stat_summary(fun.args=list(mult=1.96)) + 
  theme_minimal() 
id_plot_sims(bin_irt_2pl_abs_est,type='Residuals')
id_plot_sims(bin_irt_2pl_abs_est)
bin_irt_2pl_abs_est %>% 
  id_post_pred %>% 
  id_plot_ppc(bin_irt_2pl_abs_est,ppc_pred=.,combine_item=FALSE,prompt_plot=FALSE)

Now we'll start with the ordinal models. First the uninflated ordinal model:

ord_irt_sim <- id_sim_gen(num_person=20,num_items=100,inflate=F,
                               absence_diff_mean=0,model_type='ordinal_ratingscale')
ord_irt_est <- id_estimate(idealdata=ord_irt_sim,
                               model_type=3,
                               restrict_ind_high = as.character(sort(ord_irt_sim@simul_data$true_person,decreasing=T,
                                                        index=T)$ix[1]),
                              restrict_ind_low = as.character(sort(ord_irt_sim@simul_data$true_person,decreasing=F,
                                                        index=T)$ix[1]),
                           fix_high=sort(ord_irt_sim@simul_data$true_person,decreasing=T)[1],
                           fix_low=sort(ord_irt_sim@simul_data$true_person,decreasing=F)[1],
                               fixtype='prefix')
id_plot_rhats(ord_irt_est)

Plot true versus observed values:

id_plot_legis(ord_irt_est,show_true = T)
id_sim_coverage(ord_irt_est) %>% 
  bind_rows(.id='Parameter') %>% 
  ggplot(aes(y=avg,x=Parameter)) +
  stat_summary(fun.args=list(mult=1.96)) + 
  theme_minimal() 
id_plot_sims(ord_irt_est,type='Residuals')
id_plot_sims(ord_irt_est)
ord_irt_est %>% 
  id_post_pred %>% 
  id_plot_ppc(ord_irt_est,ppc_pred=.)

And we will finish with the inflated ordinal model:

ord_irt_abs_sim <- id_sim_gen(num_person=20,num_items=100,inflate=T,
                               absence_diff_mean=0,model_type='ordinal_ratingscale')
ord_irt_abs_est <- id_estimate(idealdata=ord_irt_abs_sim,
                               model_type=4,
                               restrict_ind_high = as.character(sort(ord_irt_abs_sim@simul_data$true_person,
                                                                     decreasing=T,
                                                        index=T)$ix[1]),
                              restrict_ind_low = as.character(sort(ord_irt_abs_sim@simul_data$true_person,
                                                                   decreasing=F,
                                                        index=T)$ix[1]),
                              fix_high = sort(ord_irt_abs_sim@simul_data$true_person,
                                                                     decreasing=T)[1],
                              fix_low = sort(ord_irt_abs_sim@simul_data$true_person,
                                                                     decreasing=F)[1],chains=2,ncores=4,
                               fixtype='prefix')
id_plot_rhats(ord_irt_abs_est)
id_plot_legis(ord_irt_abs_est,show_true = T)
id_sim_coverage(ord_irt_abs_est) %>% 
  bind_rows(.id='Parameter') %>% 
  ggplot(aes(y=avg,x=Parameter)) +
  stat_summary(fun.args=list(mult=1.96)) + 
  theme_minimal() 
id_plot_sims(ord_irt_abs_est,type='Residuals')
id_plot_sims(ord_irt_abs_est)
ord_irt_abs_est %>% 
  id_post_pred %>% 
  id_plot_ppc(ord_irt_abs_est,ppc_pred=.)

We can now try the ordinal graded response version (non-inflated):

ord_irt_grm_sim <- id_sim_gen(num_person=20,num_items=100,inflate=F,
                               absence_diff_mean=0,model_type='ordinal_grm')
ord_irt_grm_est <- id_estimate(idealdata=ord_irt_grm_sim,
                               model_type=5,
                               restrict_ind_high = as.character(sort(ord_irt_grm_sim@simul_data$true_person,
                                                                     decreasing=T,
                                                        index=T)$ix[1]),
                              restrict_ind_low = as.character(sort(ord_irt_grm_sim@simul_data$true_person,
                                                                   decreasing=F,
                                                        index=T)$ix[1]),
                               fixtype='prefix')
id_plot_rhats(ord_irt_grm_est)
id_sim_coverage(ord_irt_grm_est) %>% 
  bind_rows(.id='Parameter') %>% 
  ggplot(aes(y=avg,x=Parameter)) +
  stat_summary(fun.args=list(mult=1.96)) + 
  theme_minimal() 
id_plot_legis(ord_irt_grm_est,show_true = T)
id_plot_sims(ord_irt_grm_est,type='Residuals')
id_plot_sims(ord_irt_grm_est)
ord_irt_grm_est %>% 
  id_post_pred %>% 
  id_plot_ppc(ord_irt_grm_est,ppc_pred=.)

And now the inflated GRM:

ord_irt_grm_abs_sim <- id_sim_gen(num_person=20,num_items=100,inflate=T,
                               absence_diff_mean=0,model_type='ordinal_grm')
ord_irt_grm_abs_est <- id_estimate(idealdata=ord_irt_grm_abs_sim,
                               model_type=6,
                               restrict_ind_high = as.character(sort(ord_irt_grm_abs_sim@simul_data$true_person,
                                                                     decreasing=T,
                                                        index=T)$ix[1]),
                              restrict_ind_low = as.character(sort(ord_irt_grm_abs_sim@simul_data$true_person,
                                                                   decreasing=F,
                                                        index=T)$ix[1]),
                               fixtype='prefix')
id_plot_rhats(ord_irt_grm_abs_est)
id_plot_legis(ord_irt_grm_abs_est,show_true = T)
id_sim_coverage(ord_irt_grm_abs_est) %>% 
  bind_rows(.id='Parameter') %>% 
  ggplot(aes(y=avg,x=Parameter)) +
  stat_summary(fun.args=list(mult=1.96)) + 
  theme_minimal() 
id_plot_sims(ord_irt_grm_abs_est,type='Residuals')
id_plot_sims(ord_irt_grm_abs_est)
ord_irt_grm_abs_est %>% 
  id_post_pred %>% 
  id_plot_ppc(ord_irt_grm_abs_est,ppc_pred=.)

Additional Models V0.3

Models I am adding to this release include the Poisson model:

ord_irt_poisson_sim <- id_sim_gen(num_person=20,num_items=100,inflate=F,
                                  absence_diff_mean=0,model_type='poisson')
ord_irt_poisson_est <- id_estimate(idealdata=ord_irt_poisson_sim,
                               model_type=7,restrict_ind_high = as.character(sort(ord_irt_poisson_sim@simul_data$true_person,
                                                                     decreasing=T,
                                                        index=T)$ix[1]),
                              restrict_ind_low = as.character(sort(ord_irt_poisson_sim@simul_data$true_person,
                                                                   decreasing=F,
                                                        index=T)$ix[1]),
                              fix_high=sort(ord_irt_poisson_sim@simul_data$true_person,
                                                                     decreasing=T)[1],
                              fix_low=sort(ord_irt_poisson_sim@simul_data$true_person,
                                                                   decreasing=F)[1],
                               fixtype='prefix',nchains=2,ncores=8)
id_plot_rhats(ord_irt_poisson_est)
id_plot_legis(ord_irt_poisson_est,show_true = T)
id_sim_coverage(ord_irt_poisson_est) %>% 
  bind_rows(.id='Parameter') %>% 
  ggplot(aes(y=avg,x=Parameter)) +
  stat_summary(fun.args=list(mult=1.96)) + 
  theme_minimal() 
id_plot_sims(ord_irt_poisson_est,type='Residuals')
id_plot_sims(ord_irt_poisson_est)
ord_irt_poisson_est %>% 
  id_post_pred %>% 
  id_plot_ppc(ord_irt_poisson_est,ppc_pred=.)

Now Poisson-inflated:

ord_irt_poisson_abs_sim <- id_sim_gen(num_person=20,num_items=100,inflate=T,
                               absence_diff_mean=0,model_type='poisson')
ord_irt_poisson_abs_est <- id_estimate(idealdata=ord_irt_poisson_abs_sim,
                               model_type=8,ncores = 8,
                               nchains=2,
                               restrict_ind_high = as.character(sort(ord_irt_poisson_abs_sim@simul_data$true_person,
                                                                     decreasing=T,
                                                        index=T)$ix[1]),
                              restrict_ind_low = as.character(sort(ord_irt_poisson_abs_sim@simul_data$true_person,
                                                                   decreasing=F,
                                                        index=T)$ix[1]),
                              fix_high = sort(ord_irt_poisson_abs_sim@simul_data$true_person,
                                                                     decreasing=T)[1],fix_low = sort(ord_irt_poisson_abs_sim@simul_data$true_person,
                                                                     decreasing=F)[1],
                               fixtype='prefix')
id_plot_rhats(ord_irt_poisson_abs_est)
id_plot_legis(ord_irt_poisson_abs_est,show_true = T)
id_sim_coverage(ord_irt_poisson_abs_est) %>% 
  bind_rows(.id='Parameter') %>% 
  ggplot(aes(y=avg,x=Parameter)) +
  stat_summary(fun.args=list(mult=1.96)) + 
  theme_minimal() 
id_plot_sims(ord_irt_poisson_abs_est,type='Residuals')
id_plot_sims(ord_irt_poisson_abs_est)
ord_irt_poisson_abs_est %>% 
  id_post_pred %>% 
  id_plot_ppc(ord_irt_poisson_abs_est,ppc_pred=.)
ord_irt_poisson_abs_est %>% 
  id_post_pred(output='missing') %>% 
  id_plot_ppc(ord_irt_poisson_abs_est,ppc_pred=.)

A Normally-distributed outcome with no inflation:

ord_irt_normal_sim <- id_sim_gen(num_person=20,num_items=100,ordinal=F,inflate=F,
                               absence_diff_mean=0,model_type='normal')
ord_irt_normal_est <- id_estimate(idealdata=ord_irt_normal_sim,
                               model_type=9,restrict_sd_high = .1,
                               restrict_sd_low=.1,
                               restrict_ind_high = as.character(sort(ord_irt_normal_sim@simul_data$true_person,
                                                                     decreasing=T,
                                                        index=T)$ix[1]),
                              restrict_ind_low = as.character(sort(ord_irt_normal_sim@simul_data$true_person,
                                                                   decreasing=F,
                                                        index=T)$ix[1]),fix_high = sort(ord_irt_normal_sim@simul_data$true_person,
                                                                     decreasing=T)[1],fix_low = sort(ord_irt_normal_sim@simul_data$true_person,
                                                                     decreasing=F)[1],
                               fixtype='prefix',compile_optim=F)
id_plot_rhats(ord_irt_normal_est)
id_plot_legis(ord_irt_normal_est,show_true = T)
id_sim_coverage(ord_irt_normal_est) %>% 
  bind_rows(.id='Parameter') %>% 
  ggplot(aes(y=avg,x=Parameter)) +
  stat_summary(fun.args=list(mult=1.96)) + 
  theme_minimal() 
id_plot_sims(ord_irt_normal_est,type='Residuals')
id_plot_sims(ord_irt_normal_est)
ord_irt_normal_est %>% 
  id_post_pred %>% 
  id_plot_ppc(ord_irt_normal_est,ppc_pred=.)

A Normally-distributed outcome with inflation:

ord_irt_normal_abs_sim <- id_sim_gen(num_person=20,num_items=100,inflate=T,
                               absence_diff_mean=0,model_type='normal')
ord_irt_normal_abs_est <- id_estimate(idealdata=ord_irt_normal_abs_sim,
                               model_type=10,
                               restrict_ind_high = as.character(sort(ord_irt_normal_abs_sim@simul_data$true_person,
                                                                     decreasing=T,
                                                        index=T)$ix[1]),
                              restrict_ind_low = as.character(sort(ord_irt_normal_abs_sim@simul_data$true_person,
                                                                   decreasing=F,
                                                        index=T)$ix[1]),
                              fix_high = sort(ord_irt_normal_abs_sim@simul_data$true_person,
                                                                     decreasing=T)[1],fix_low = sort(ord_irt_normal_abs_sim@simul_data$true_person,
                                                                     decreasing=F)[1],nchains = 2,ncores=4,
                               fixtype='prefix')
id_plot_rhats(ord_irt_normal_abs_est)
id_plot_legis(ord_irt_normal_abs_est,show_true = T)
id_sim_coverage(ord_irt_normal_abs_est) %>% 
  bind_rows(.id='Parameter') %>% 
  ggplot(aes(y=avg,x=Parameter)) +
  stat_summary(fun.args=list(mult=1.96)) + 
  theme_minimal() 
id_plot_sims(ord_irt_normal_abs_est,type='Residuals')
id_plot_sims(ord_irt_normal_abs_est)
ord_irt_normal_abs_est %>% 
  id_post_pred %>% 
  id_plot_ppc(ord_irt_normal_abs_est,ppc_pred=.)
ord_irt_normal_abs_est %>% 
  id_post_pred(output='missing') %>% 
  id_plot_ppc(ord_irt_normal_abs_est,ppc_pred=.)

A Lognormal model with no inflation:

ord_irt_lognormal_sim <- id_sim_gen(num_person=20,num_items=100,ordinal=F,inflate=F,
                               absence_diff_mean=0,model_type='lognormal')
ord_irt_lognormal_est <- id_estimate(idealdata=ord_irt_lognormal_sim,
                               model_type=11,
                               restrict_ind_high = as.character(sort(ord_irt_lognormal_sim@simul_data$true_person,
                                                                     decreasing=T,
                                                        index=T)$ix[1]),
                              restrict_ind_low = as.character(sort(ord_irt_lognormal_sim@simul_data$true_person,
                                                                   decreasing=F,
                                                        index=T)$ix[1]),
                              fix_high = sort(ord_irt_lognormal_sim@simul_data$true_person,
                                                                     decreasing=T)[1],fix_low = sort(ord_irt_lognormal_sim@simul_data$true_person,
                                                                     decreasing=F)[1],nchains = 2,ncores=4,
                               fixtype='prefix')
id_plot_rhats(ord_irt_lognormal_est)
id_plot_legis(ord_irt_lognormal_est,show_true = T)
id_sim_coverage(ord_irt_lognormal_est) %>% 
  bind_rows(.id='Parameter') %>% 
  ggplot(aes(y=avg,x=Parameter)) +
  stat_summary(fun.args=list(mult=1.96)) + 
  theme_minimal() 
id_plot_sims(ord_irt_lognormal_est,type='Residuals')
id_plot_sims(ord_irt_lognormal_est)
ord_irt_lognormal_est %>% 
  id_post_pred %>% 
  id_plot_ppc(ord_irt_lognormal_est,ppc_pred=.) + 
  scale_x_log10()

And last but not least, a lognormal model with inflation:

ord_irt_lognormal_abs_sim <- id_sim_gen(num_person=20,num_items=100,ordinal=F,inflate=T,
                               absence_diff_mean=0,model_type='lognormal')
ord_irt_lognormal_abs_est <- id_estimate(idealdata=ord_irt_lognormal_abs_sim,
                               model_type=12,
                               restrict_ind_high = as.character(sort(ord_irt_lognormal_abs_sim@simul_data$true_person,
                                                                     decreasing=T,
                                                        index=T)$ix[1]),
                              restrict_ind_low = as.character(sort(ord_irt_lognormal_abs_sim@simul_data$true_person,
                                                                   decreasing=F,
                                                        index=T)$ix[1]),fix_high = sort(ord_irt_lognormal_abs_sim@simul_data$true_person,
                                                                     decreasing=T)[1],fix_low = sort(ord_irt_lognormal_abs_sim@simul_data$true_person,
                                                                     decreasing=F)[1],nchains = 2,ncores=4,

                               fixtype='prefix')
id_plot_rhats(ord_irt_lognormal_abs_est)
id_plot_legis(ord_irt_lognormal_abs_est,show_true = T)
id_sim_coverage(ord_irt_lognormal_abs_est) %>% 
  bind_rows(.id='Parameter') %>% 
  ggplot(aes(y=avg,x=Parameter)) +
  stat_summary(fun.args=list(mult=1.96)) + 
  theme_minimal() 
id_plot_sims(ord_irt_lognormal_abs_est,type='Residuals')
id_plot_sims(ord_irt_lognormal_abs_est)
ord_irt_lognormal_abs_est %>% 
  id_post_pred %>% 
  id_plot_ppc(ord_irt_lognormal_abs_est,ppc_pred=.) + 
  scale_x_log10()
ord_irt_lognormal_abs_est %>% 
  id_post_pred(output='missing') %>% 
  id_plot_ppc(ord_irt_lognormal_abs_est,ppc_pred=.)

Ordered Beta: Uninflated

ordbeta_sim <- id_sim_gen(num_person=20,num_items=100,ordinal=F,inflate=F,
                               absence_diff_mean=0,model_type='ordbeta',
                          phi=1)
ordbeta_est <- id_estimate(idealdata=ordbeta_sim,
                               model_type=15,
                               restrict_ind_high = as.character(sort(ordbeta_sim@simul_data$true_person,
                                                                     decreasing=T,
                                                        index=T)$ix[1]),
                              restrict_ind_low = as.character(sort(ordbeta_sim@simul_data$true_person,
                                                                   decreasing=F,
                                                        index=T)$ix[1]),
                              fix_high = sort(ordbeta_sim@simul_data$true_person,
                                                                     decreasing=T)[1],fix_low = sort(ordbeta_sim@simul_data$true_person,
                                                                     decreasing=F)[1],nchains = 2,ncores=8,
                               fixtype='prefix')
id_plot_rhats(ordbeta_est)
id_plot_legis(ordbeta_est,show_true = T)
id_sim_coverage(ordbeta_est) %>% 
  bind_rows(.id='Parameter') %>% 
  ggplot(aes(y=avg,x=Parameter)) +
  stat_summary(fun.args=list(mult=1.96)) + 
  theme_minimal() 
id_plot_sims(ordbeta_est,type='Residuals')
id_plot_sims(ordbeta_est)
ordbeta_est %>% 
  id_post_pred %>% 
  id_plot_ppc(ordbeta_est,ppc_pred=.)

Ordered Beta: Inflated

ordbeta_abs_sim <- id_sim_gen(num_person=20,num_items=100,ordinal=F,inflate=T,
                               absence_diff_mean=0,model_type='ordbeta')
ordbeta_abs_est <- id_estimate(idealdata=ordbeta_abs_sim,
                               model_type=16,
                               restrict_ind_high = as.character(sort(ordbeta_abs_sim@simul_data$true_person,
                                                                     decreasing=T,
                                                        index=T)$ix[1]),
                              restrict_ind_low = as.character(sort(ordbeta_abs_sim@simul_data$true_person,
                                                                   decreasing=F,
                                                        index=T)$ix[1]),fix_high = sort(ordbeta_abs_sim@simul_data$true_person,
                                                                     decreasing=T)[1],fix_low = sort(ordbeta_abs_sim@simul_data$true_person,
                                                                     decreasing=F)[1],nchains = 2,ncores=8,

                               fixtype='prefix')
id_plot_rhats(ordbeta_abs_est)
id_plot_legis(ordbeta_abs_est,show_true = T)
id_sim_coverage(ordbeta_abs_est) %>% 
  bind_rows(.id='Parameter') %>% 
  ggplot(aes(y=avg,x=Parameter)) +
  stat_summary(fun.args=list(mult=1.96)) + 
  theme_minimal() 
id_plot_sims(ordbeta_abs_est,type='Residuals')
id_plot_sims(ordbeta_abs_est)
ordbeta_abs_est %>% 
  id_post_pred %>% 
  id_plot_ppc(ordbeta_abs_est,ppc_pred=.)
ordbeta_abs_est %>% 
  id_post_pred(output='missing') %>% 
  id_plot_ppc(ordbeta_abs_est,ppc_pred=.,combine_item=FALSE)

Time: Random Walk

We won't test all the possible time auto-correlation settings, only a subset of them. I use an inflated binary model as it is a very common one.

While these models are identified, it appears that sometimes given the amount of data, they are only weakly identified in the posterior and Rhat values higher than 1.1 are possible (but are not evidence of multi-modality).

First random walks:

bin_time_irt_2pl_abs_sim <- id_sim_gen(num_person=50,num_items=100,
                                       ordinal=F,inflate=T,
                               absence_diff_mean=0,
                               time_process = 'random',
                               time_points=10)

# as.character(sort(bin_time_irt_2pl_abs_sim@simul_data$true_reg_discrim,
#                                                                   
#                                                                   decreasing=TRUE,
#                                                         index=T)$ix[1])

# as.character(sort(bin_time_irt_2pl_abs_sim@simul_data$true_reg_discrim,
#                                                                    decreasing=F,
#                                                         index=T)$ix[1])

# 5*sort(bin_time_irt_2pl_abs_sim@simul_data$true_reg_discrim,
#                                                                   
#                                                                   decreasing=TRUE)[1]

best_first_time <- 4 

best_second_time <- 500

bin_time_irt_2pl_abs_est <- id_estimate(idealdata=bin_time_irt_2pl_abs_sim,
                               model_type=2,nchains=2,ncores = 6,
                               id_refresh=100,time_var = 10,
                               niters = 500,warmup = 500,const_type = "items",
                          keep_param=list(person_vary=T,person_int=F,item=TRUE,
                                                           extra=T),
                          restrict_sd_high=10,
                          restrict_sd_low=10,
                               vary_ideal_pts = 'random_walk',
                          restrict_var = F,
                          restrict_ind_high = as.character(sort(bin_time_irt_2pl_abs_sim@simul_data$true_reg_discrim,
                                                                     decreasing=TRUE,
                                                        index=TRUE)$ix[1]),
                              restrict_ind_low = as.character(sort(bin_time_irt_2pl_abs_sim@simul_data$true_reg_discrim,
                                                                   decreasing=F,
                                                        index=T)$ix[1]),
                               fixtype='prefix')
id_plot_rhats(bin_time_irt_2pl_abs_est) + theme(text=element_text(family=""))
id_sim_coverage(bin_time_irt_2pl_abs_est) %>% 
  bind_rows(.id='Parameter') %>% 
  ggplot(aes(y=avg,x=Parameter)) +
  stat_summary(fun.args=list(mult=1.96)) + 
  theme_minimal() 

Let's do a time-varying ideal point plot with true point overlay:

id_plot_legis_dyn(bin_time_irt_2pl_abs_est,plot_sim = T) + facet_wrap(~person_id,scales="free_y")
id_plot_sims(bin_time_irt_2pl_abs_est,type='Residuals')
id_plot_sims(bin_time_irt_2pl_abs_est)

Time: Splines

bin_time_spline_irt_2pl_abs_sim <- id_sim_gen(num_person=100,num_items=100,inflate=F,
                               absence_diff_mean=0,
                               time_process = 'splines',
                               time_points=10,time_sd = .25,
                               spline_degree=3)

sort_ids_high <- sort(bin_time_spline_irt_2pl_abs_sim@simul_data$true_reg_discrim,
                                                                     decreasing=TRUE,
                                                        index=TRUE)$ix

high_restrict <- c(min(sort_ids_high[1:20]),
                       round(median(sort_ids_high[1:20])),
                   max(sort_ids_high[1:20]))

sort_ids_low <- sort(bin_time_spline_irt_2pl_abs_sim@simul_data$true_reg_discrim,,
                                                                   decreasing=F,
                                                        index=T)$ix

low_restrict <- c(min(sort_ids_low[1:20]),
                       round(median(sort_ids_low[1:20])),
                   max(sort_ids_low[1:20]))

bin_time_spline_irt_2pl_abs_est <- id_estimate(idealdata=bin_time_spline_irt_2pl_abs_sim,
                                           use_groups = F,
                               model_type=1,
                               time_center_cutoff = 50,
                               ncores = parallel::detectCores(),
                               vary_ideal_pts = 'splines',prior_only = FALSE,
                               spline_knots=NULL,spline_degree = 2,
                               const_type = "items",person_sd = 5,
                               diff_reg_sd = 5,
                               discrim_reg_scale = 2,discrim_reg_shape = 2,
                               time_var = .1,restrict_var = F,
                               restrict_ind_high = as.character(high_restrict),
                              restrict_ind_low =as.character(low_restrict),
                               fixtype='prefix',adapt_delta=0.95)
id_plot_rhats(bin_time_spline_irt_2pl_abs_est)
id_sim_coverage(bin_time_spline_irt_2pl_abs_est) %>% 
  bind_rows(.id='Parameter') %>% 
  ggplot(aes(y=avg,x=Parameter)) +
  stat_summary(fun.args=list(mult=1.96)) + 
  theme_minimal() 

Let's do a time-varying ideal point plot with true point overlay:

id_plot_legis_dyn(bin_time_spline_irt_2pl_abs_est,plot_lines = 10) + guides(colour="none") + facet_wrap(~person_id,scales="free_y")
id_plot_sims(bin_time_spline_irt_2pl_abs_est,type='Residuals')
id_plot_sims(bin_time_spline_irt_2pl_abs_est)

Time: AR(1)

And autocorrelation:

bin_time_ar_irt_2pl_abs_sim <- id_sim_gen(num_person=20,num_items=100,inflate=T,
                               absence_diff_mean=0,time_process = 'AR',time_points=20,time_sd = .1)

bin_time_ar_irt_2pl_abs_sim@score_matrix$group_id <- forcats::fct_collapse(bin_time_ar_irt_2pl_abs_sim@score_matrix$person_id,
                                                                           `1`=c("1","2","3"))

bin_time_ar_irt_2pl_abs_est <- id_estimate(idealdata=bin_time_ar_irt_2pl_abs_sim,
                               model_type=2,
                               ncores = parallel::detectCores(),
                               vary_ideal_pts = 'AR1',
                               const_type = "items",
                               restrict_ind_high = as.character(sort(bin_time_ar_irt_2pl_abs_sim@simul_data$true_reg_discrim,
                                                                     decreasing=TRUE,
                                                        index=TRUE)$ix[1]),
                              restrict_ind_low = as.character(sort(bin_time_ar_irt_2pl_abs_sim@simul_data$true_reg_discrim,,
                                                                   decreasing=F,
                                                        index=T)$ix[1]),
                               fixtype='prefix')
id_plot_rhats(bin_time_ar_irt_2pl_abs_est)

Let's do a time-varying ideal point plot with true point overlay:

id_plot_legis_dyn(bin_time_ar_irt_2pl_abs_est,plot_lines = 10) + guides(colour="none") + facet_wrap(~person_id,scales="free_y")

Time: Gaussian Process

The Gaussian process differs in that is a non-parametric approach to time-series modeling. It can fit very flexible curves and also works on irregularly-spaced time series.

time_gp_sim <- id_sim_gen(num_person=10,num_items=200,inflate=T,
                               absence_diff_mean=0,
                          time_process = 'GP',
                          time_points=20)

time_gp_est <- id_estimate(idealdata=time_gp_sim,
                               model_type=2,
                           use_vb=F,const_type="items",
                           restrict_ind_high = as.character(sort(time_gp_sim@simul_data$true_reg_discrim,
                                                                     decreasing=TRUE,
                                                        index=TRUE)$ix[1]),
                              restrict_ind_low = as.character(sort(time_gp_sim@simul_data$true_reg_discrim,,
                                                                   decreasing=F,
                                                        index=T)$ix[1]),
                           nchains=2,ncores=8,warmup=1000,niters=500,
                               vary_ideal_pts = 'GP',
                               fixtype='prefix')
id_plot_rhats(time_gp_est)
id_sim_coverage(time_gp_est) %>% 
  bind_rows(.id='Parameter') %>% 
  ggplot(aes(y=avg,x=Parameter)) +
  stat_summary(fun.args=list(mult=1.96)) + 
  theme_minimal() 

Let's do a time-varying ideal point plot with true point overlay:

id_plot_legis_dyn(time_gp_est,plot_sim = T) +facet_wrap(~person_id)
id_plot_sims(time_gp_est,type='Residuals')
id_plot_sims(time_gp_est)

Latent Space Model

Another new addition is the latent space model, which is only a slight modification on the IRT model.

Non-inflated Latent Space

latent_noninfl_sim <- id_sim_gen(num_person=10,num_items=300,ordinal=F,
                                       inflate=F,
                                       model_type='binary',
                                       latent_space=T)
latent_noninfl_est <- id_estimate(idealdata=latent_noninfl_sim,
                               model_type=13,
                               restrict_ind_high = as.character(sort(latent_noninfl_sim@simul_data$true_person,
                                                                     decreasing=TRUE,
                                                        index=T)$ix[1:2]),
                              restrict_ind_low = as.character(sort(latent_noninfl_sim@simul_data$true_person,
                                                                   decreasing=F,
                                                        index=T)$ix[1:2]),fix_high = sort(latent_noninfl_sim@simul_data$true_person,
                                                                     decreasing=T)[1:2],fix_low = sort(latent_noninfl_sim@simul_data$true_person,
                                                                     decreasing=F)[1:2],
                               fixtype='prefix',id_refresh=100,warmup=500,niters=500,nchains=2,ncores=8)
id_plot_rhats(latent_noninfl_est)
id_plot_legis(latent_noninfl_est,show_true = T)
id_sim_coverage(latent_noninfl_est) %>% 
  bind_rows(.id='Parameter') %>% 
  ggplot(aes(y=avg,x=Parameter)) +
  stat_summary(fun.args=list(mult=1.96)) + 
  theme_minimal() 
latent_noninfl_est %>% 
  id_post_pred %>% 
  id_plot_ppc(latent_noninfl_est,ppc_pred=.)

Latent Space Inflated:

# you can't have too few people relative to bills
latent_infl_sim <- id_sim_gen(num_person=10,num_items=400,ordinal=F,
                                       inflate=T,
                                       model_type='binary',
                                       latent_space=T)
latent_infl_est <- id_estimate(idealdata=latent_infl_sim,
                               model_type=14,
                               restrict_ind_high = as.character(sort(latent_infl_sim@simul_data$true_person,
                                                                     decreasing=TRUE,
                                                        index=T)$ix[1]),
                              restrict_ind_low = as.character(sort(latent_infl_sim@simul_data$true_person,
                                                                   decreasing=F,
                                                        index=T)$ix[1]),fix_high = sort(latent_infl_sim@simul_data$true_person,
                                                                     decreasing=T)[1],fix_low = sort(latent_infl_sim@simul_data$true_person,
                                                                     decreasing=F)[1],
                               fixtype='prefix',nchains=2,ncores=8,
                              adapt_delta=0.95,
                              diff_reg_sd = 2,
                             diff_miss_sd=2)
id_plot_rhats(latent_infl_est)
id_plot_legis(latent_infl_est,show_true = T)
id_sim_coverage(latent_infl_est) %>% 
  bind_rows(.id='Parameter') %>% 
  ggplot(aes(y=avg,x=Parameter)) +
  stat_summary(fun.args=list(mult=1.96)) + 
  theme_minimal() 
latent_infl_est %>% 
  id_post_pred %>% 
  id_plot_ppc(latent_infl_est,ppc_pred=.)


saudiwin/idealstan documentation built on April 11, 2025, 4:37 p.m.