require(idealstan) require(ggplot2) require(dplyr) require(tidyr) library(bayesplot) knitr::opts_chunk$set(fig.align = 'center',warning = F,message = F)
This Rmarkdown file has a series of simulations run on the models offered in idealstan
. It simulates a large amount of data and then checks to see if the residuals relative to the true parameters sum to zero, as well as for credible interval coverage. In general, the credible intervals aren't going to be exactly where they should be because the constraints used in estimating the models and identifying the signs aren't themselves included in the simulated data. I.e., one of the ideal points actually follows a positive continuous distribution instead of a random normal distribution once it is prefix, which introduces a slight bias in the recovery of the true parameters. This is an artifact of virtually every latent variable model, and is not of great concern as long as the credible intervals reach close to 90 percent coverage.
First, the standard IRT 2-PL (for this one, the absence discriminations are not a part of the model):
bin_irt_2pl_sim <- id_sim_gen(num_person=100,num_items=20,ordinal=F,inflate=F) bin_irt_2pl_est <- id_estimate(idealdata=bin_irt_2pl_sim, model_type=1,init_pathfinder = TRUE, restrict_ind_high = as.character(sort(bin_irt_2pl_sim@simul_data$true_person, decreasing=T, index=T)$ix[1]), restrict_ind_low = as.character(sort(bin_irt_2pl_sim@simul_data$true_person, decreasing=F, index=T)$ix[1]), fix_high = sort(bin_irt_2pl_sim@simul_data$true_person, decreasing=T)[1],fix_low = sort(bin_irt_2pl_sim@simul_data$true_person, decreasing=F)[1], fixtype='prefix', person_sd = 3, nchains=2,ncores=4,niters = 500,warmup=500) id_plot_rhats(bin_irt_2pl_est)
id_sim_coverage(bin_irt_2pl_est) %>% bind_rows(.id='Parameter') %>% ggplot(aes(y=avg,x=Parameter)) + stat_summary(fun.args=list(mult=1.96)) + theme_minimal()
Plot true versus observed values:
id_plot_legis(bin_irt_2pl_est,show_true = T)
id_plot_sims(bin_irt_2pl_est,type='Residuals')
id_plot_sims(bin_irt_2pl_est)
new_data <- bin_irt_2pl_est@score_data@func_args$score_data c1 <- id_post_pred(bin_irt_2pl_est, newdata=new_data) id_plot_ppc(bin_irt_2pl_est,ppc_pred=c1,combine_item=FALSE, prompt_plot=FALSE)
Try this same model but add an external covariate with an ideal point effect of +2
bin_irt_2pl_cov_sim <- id_sim_gen(num_person=100,num_items=20,ordinal=F,inflate=F, cov_effect = 3) bin_irt_2pl_cov_est <- id_estimate(idealdata=bin_irt_2pl_cov_sim, model_type=1,init_pathfinder = TRUE, restrict_ind_high = as.character(sort(bin_irt_2pl_cov_sim@simul_data$true_person, decreasing=T, index=T)$ix[1]), restrict_ind_low = as.character(sort(bin_irt_2pl_cov_sim@simul_data$true_person, decreasing=F, index=T)$ix[1]), fix_high = sort(bin_irt_2pl_cov_sim@simul_data$true_person, decreasing=T)[1],fix_low = sort(bin_irt_2pl_cov_sim@simul_data$true_person, decreasing=F)[1], fixtype='prefix', person_sd = 3, nchains=2,ncores=4,niters = 500,warmup=500) id_plot_rhats(bin_irt_2pl_cov_est)
id_sim_coverage(bin_irt_2pl_cov_est) %>% bind_rows(.id='Parameter') %>% ggplot(aes(y=avg,x=Parameter)) + stat_summary(fun.args=list(mult=1.96)) + theme_minimal()
Plot true versus observed values:
id_plot_legis(bin_irt_2pl_cov_est,show_true = T)
id_plot_sims(bin_irt_2pl_cov_est,type='Residuals')
id_plot_sims(bin_irt_2pl_cov_est)
new_data <- bin_irt_2pl_cov_est@score_data@func_args$score_data c1 <- id_post_pred(bin_irt_2pl_cov_est, newdata=new_data) id_plot_ppc(bin_irt_2pl_cov_est,ppc_pred=c1,combine_item=FALSE, prompt_plot=FALSE)
First, plot estimated coefficient legis_x
against true value:
mcmc_recover_hist(bin_irt_2pl_cov_est@stan_samples$draws("legis_x"), true=bin_irt_2pl_cov_sim@simul_data$cov_effect)
eps <- 1e-4 new_data1 <- mutate(bin_irt_2pl_cov_sim@score_matrix, person_x = person_x - eps / 2) new_data2 <- mutate(bin_irt_2pl_cov_sim@score_matrix, person_x = person_x + eps / 2) # now predict new outcome distributions given these slightly different datasets cov_mod_pred1 <- id_post_pred(bin_irt_2pl_cov_est,newdata=new_data1, use_cores=4,draws="all", item_subset=levels(new_data1$item), type="epred") cov_mod_pred2 <- id_post_pred(bin_irt_2pl_cov_est,newdata=new_data2, use_cores=4,draws="all", item_subset=levels(new_data2$item), type="epred") # now we need to difference the datasets at the item level to calculate the effects # this will create some lists that will then be appended together to a big data frame # with one observation per posterior draw print("Looping over items") # difference the predictions c1 <- purrr::map2(cov_mod_pred1[[1]], cov_mod_pred2[[1]], function(small,big) { # difference the effects (big - small) / eps }) # combine into datasets for each item with item id + person (senator) id c2 <- lapply(c1, function(mat) { out_data <- attr(mat, "data") colnames(mat) <- out_data$person_id as_tibble(mat) %>% mutate(draws=1:n(), item_id=unique(out_data$item_id)) %>% gather(key="person_id",value="estimate",-draws,-item_id) %>% mutate(person_id=as.numeric(person_id), estimate=as.numeric(estimate)) }) %>% bind_rows # merge in some original data to_merge <- mutate(bin_irt_2pl_cov_est@score_data@score_matrix, item_orig=item_id, person_orig=person_id, person_id=as.numeric(person_id), item_id=as.numeric(item_id)) %>% select(person_id, item_id, group_id,item_orig, person_orig) %>% distinct # add in party data so we can calculate party-specific effects c2 <- left_join(c2, to_merge, by=c("item_id","person_id")) # get effect separately by democrats/republicans by_party <- group_by(c2, draws, group_id, item_id, item_orig) %>% summarize(mean_est1=mean(estimate)) %>% group_by(group_id, item_id, item_orig) %>% summarize(mean_est=mean(mean_est1), low_est=quantile(mean_est1, .05), high_est=quantile(mean_est1, .95)) # merge in item discrimination to add some color / show high-discrim votes item_discrim <- filter(bin_irt_2pl_cov_est@summary, grepl(x=variable, pattern="sigma\\_reg\\_free")) %>% mutate(item_id=as.numeric(stringr::str_extract(variable, "[0-9]+"))) by_party <- left_join(by_party, select(item_discrim, median, item_id)) # plot the result by_party %>% ggplot(aes(y=mean_est, x=reorder(item_id,mean_est))) + geom_linerange(aes(ymin=low_est, ymax=high_est, colour=`median`)) + ggthemes::theme_tufte() + scale_colour_viridis_c(name="Discrimination") + coord_flip() + geom_hline(yintercept=0,linetype=2) + ggthemes::theme_clean() + theme(axis.text.y=element_blank(), axis.ticks.y=element_blank()) + theme(legend.position = "bottom")
Next, inflated 2-PL IRT (binary):
bin_irt_2pl_abs_sim <- id_sim_gen(num_person=20,num_items=100,ordinal=F,inflate=T, absence_diff_mean=0) bin_irt_2pl_abs_est <- id_estimate(idealdata=bin_irt_2pl_abs_sim, model_type=2, restrict_ind_high = as.character(sort(bin_irt_2pl_abs_sim@simul_data$true_person, decreasing=T, index=T)$ix[1]), restrict_ind_low = as.character(sort(bin_irt_2pl_abs_sim@simul_data$true_person, decreasing=F, index=T)$ix[1]), fix_high=sort(bin_irt_2pl_abs_sim@simul_data$true_person, decreasing=T)[1], fix_low=sort(bin_irt_2pl_abs_sim@simul_data$true_person, decreasing=F)[1], fixtype='prefix') id_plot_rhats(bin_irt_2pl_abs_est)
Plot true versus observed values:
id_plot_legis(bin_irt_2pl_abs_est,show_true = T)
id_sim_coverage(bin_irt_2pl_abs_est) %>% bind_rows(.id='Parameter') %>% ggplot(aes(y=avg,x=Parameter)) + stat_summary(fun.args=list(mult=1.96)) + theme_minimal()
id_plot_sims(bin_irt_2pl_abs_est,type='Residuals')
id_plot_sims(bin_irt_2pl_abs_est)
bin_irt_2pl_abs_est %>% id_post_pred %>% id_plot_ppc(bin_irt_2pl_abs_est,ppc_pred=.,combine_item=FALSE,prompt_plot=FALSE)
Now we'll start with the ordinal models. First the uninflated ordinal model:
ord_irt_sim <- id_sim_gen(num_person=20,num_items=100,inflate=F, absence_diff_mean=0,model_type='ordinal_ratingscale') ord_irt_est <- id_estimate(idealdata=ord_irt_sim, model_type=3, restrict_ind_high = as.character(sort(ord_irt_sim@simul_data$true_person,decreasing=T, index=T)$ix[1]), restrict_ind_low = as.character(sort(ord_irt_sim@simul_data$true_person,decreasing=F, index=T)$ix[1]), fix_high=sort(ord_irt_sim@simul_data$true_person,decreasing=T)[1], fix_low=sort(ord_irt_sim@simul_data$true_person,decreasing=F)[1], fixtype='prefix') id_plot_rhats(ord_irt_est)
Plot true versus observed values:
id_plot_legis(ord_irt_est,show_true = T)
id_sim_coverage(ord_irt_est) %>% bind_rows(.id='Parameter') %>% ggplot(aes(y=avg,x=Parameter)) + stat_summary(fun.args=list(mult=1.96)) + theme_minimal()
id_plot_sims(ord_irt_est,type='Residuals')
id_plot_sims(ord_irt_est)
ord_irt_est %>% id_post_pred %>% id_plot_ppc(ord_irt_est,ppc_pred=.)
And we will finish with the inflated ordinal model:
ord_irt_abs_sim <- id_sim_gen(num_person=20,num_items=100,inflate=T, absence_diff_mean=0,model_type='ordinal_ratingscale') ord_irt_abs_est <- id_estimate(idealdata=ord_irt_abs_sim, model_type=4, restrict_ind_high = as.character(sort(ord_irt_abs_sim@simul_data$true_person, decreasing=T, index=T)$ix[1]), restrict_ind_low = as.character(sort(ord_irt_abs_sim@simul_data$true_person, decreasing=F, index=T)$ix[1]), fix_high = sort(ord_irt_abs_sim@simul_data$true_person, decreasing=T)[1], fix_low = sort(ord_irt_abs_sim@simul_data$true_person, decreasing=F)[1],chains=2,ncores=4, fixtype='prefix') id_plot_rhats(ord_irt_abs_est)
id_plot_legis(ord_irt_abs_est,show_true = T)
id_sim_coverage(ord_irt_abs_est) %>% bind_rows(.id='Parameter') %>% ggplot(aes(y=avg,x=Parameter)) + stat_summary(fun.args=list(mult=1.96)) + theme_minimal()
id_plot_sims(ord_irt_abs_est,type='Residuals')
id_plot_sims(ord_irt_abs_est)
ord_irt_abs_est %>% id_post_pred %>% id_plot_ppc(ord_irt_abs_est,ppc_pred=.)
We can now try the ordinal graded response version (non-inflated):
ord_irt_grm_sim <- id_sim_gen(num_person=20,num_items=100,inflate=F, absence_diff_mean=0,model_type='ordinal_grm') ord_irt_grm_est <- id_estimate(idealdata=ord_irt_grm_sim, model_type=5, restrict_ind_high = as.character(sort(ord_irt_grm_sim@simul_data$true_person, decreasing=T, index=T)$ix[1]), restrict_ind_low = as.character(sort(ord_irt_grm_sim@simul_data$true_person, decreasing=F, index=T)$ix[1]), fixtype='prefix') id_plot_rhats(ord_irt_grm_est)
id_sim_coverage(ord_irt_grm_est) %>% bind_rows(.id='Parameter') %>% ggplot(aes(y=avg,x=Parameter)) + stat_summary(fun.args=list(mult=1.96)) + theme_minimal()
id_plot_legis(ord_irt_grm_est,show_true = T)
id_plot_sims(ord_irt_grm_est,type='Residuals')
id_plot_sims(ord_irt_grm_est)
ord_irt_grm_est %>% id_post_pred %>% id_plot_ppc(ord_irt_grm_est,ppc_pred=.)
And now the inflated GRM:
ord_irt_grm_abs_sim <- id_sim_gen(num_person=20,num_items=100,inflate=T, absence_diff_mean=0,model_type='ordinal_grm') ord_irt_grm_abs_est <- id_estimate(idealdata=ord_irt_grm_abs_sim, model_type=6, restrict_ind_high = as.character(sort(ord_irt_grm_abs_sim@simul_data$true_person, decreasing=T, index=T)$ix[1]), restrict_ind_low = as.character(sort(ord_irt_grm_abs_sim@simul_data$true_person, decreasing=F, index=T)$ix[1]), fixtype='prefix') id_plot_rhats(ord_irt_grm_abs_est)
id_plot_legis(ord_irt_grm_abs_est,show_true = T)
id_sim_coverage(ord_irt_grm_abs_est) %>% bind_rows(.id='Parameter') %>% ggplot(aes(y=avg,x=Parameter)) + stat_summary(fun.args=list(mult=1.96)) + theme_minimal()
id_plot_sims(ord_irt_grm_abs_est,type='Residuals')
id_plot_sims(ord_irt_grm_abs_est)
ord_irt_grm_abs_est %>% id_post_pred %>% id_plot_ppc(ord_irt_grm_abs_est,ppc_pred=.)
Models I am adding to this release include the Poisson model:
ord_irt_poisson_sim <- id_sim_gen(num_person=20,num_items=100,inflate=F, absence_diff_mean=0,model_type='poisson') ord_irt_poisson_est <- id_estimate(idealdata=ord_irt_poisson_sim, model_type=7,restrict_ind_high = as.character(sort(ord_irt_poisson_sim@simul_data$true_person, decreasing=T, index=T)$ix[1]), restrict_ind_low = as.character(sort(ord_irt_poisson_sim@simul_data$true_person, decreasing=F, index=T)$ix[1]), fix_high=sort(ord_irt_poisson_sim@simul_data$true_person, decreasing=T)[1], fix_low=sort(ord_irt_poisson_sim@simul_data$true_person, decreasing=F)[1], fixtype='prefix',nchains=2,ncores=8) id_plot_rhats(ord_irt_poisson_est)
id_plot_legis(ord_irt_poisson_est,show_true = T)
id_sim_coverage(ord_irt_poisson_est) %>% bind_rows(.id='Parameter') %>% ggplot(aes(y=avg,x=Parameter)) + stat_summary(fun.args=list(mult=1.96)) + theme_minimal()
id_plot_sims(ord_irt_poisson_est,type='Residuals')
id_plot_sims(ord_irt_poisson_est)
ord_irt_poisson_est %>% id_post_pred %>% id_plot_ppc(ord_irt_poisson_est,ppc_pred=.)
Now Poisson-inflated:
ord_irt_poisson_abs_sim <- id_sim_gen(num_person=20,num_items=100,inflate=T, absence_diff_mean=0,model_type='poisson') ord_irt_poisson_abs_est <- id_estimate(idealdata=ord_irt_poisson_abs_sim, model_type=8,ncores = 8, nchains=2, restrict_ind_high = as.character(sort(ord_irt_poisson_abs_sim@simul_data$true_person, decreasing=T, index=T)$ix[1]), restrict_ind_low = as.character(sort(ord_irt_poisson_abs_sim@simul_data$true_person, decreasing=F, index=T)$ix[1]), fix_high = sort(ord_irt_poisson_abs_sim@simul_data$true_person, decreasing=T)[1],fix_low = sort(ord_irt_poisson_abs_sim@simul_data$true_person, decreasing=F)[1], fixtype='prefix') id_plot_rhats(ord_irt_poisson_abs_est)
id_plot_legis(ord_irt_poisson_abs_est,show_true = T)
id_sim_coverage(ord_irt_poisson_abs_est) %>% bind_rows(.id='Parameter') %>% ggplot(aes(y=avg,x=Parameter)) + stat_summary(fun.args=list(mult=1.96)) + theme_minimal()
id_plot_sims(ord_irt_poisson_abs_est,type='Residuals')
id_plot_sims(ord_irt_poisson_abs_est)
ord_irt_poisson_abs_est %>% id_post_pred %>% id_plot_ppc(ord_irt_poisson_abs_est,ppc_pred=.)
ord_irt_poisson_abs_est %>% id_post_pred(output='missing') %>% id_plot_ppc(ord_irt_poisson_abs_est,ppc_pred=.)
A Normally-distributed outcome with no inflation:
ord_irt_normal_sim <- id_sim_gen(num_person=20,num_items=100,ordinal=F,inflate=F, absence_diff_mean=0,model_type='normal') ord_irt_normal_est <- id_estimate(idealdata=ord_irt_normal_sim, model_type=9,restrict_sd_high = .1, restrict_sd_low=.1, restrict_ind_high = as.character(sort(ord_irt_normal_sim@simul_data$true_person, decreasing=T, index=T)$ix[1]), restrict_ind_low = as.character(sort(ord_irt_normal_sim@simul_data$true_person, decreasing=F, index=T)$ix[1]),fix_high = sort(ord_irt_normal_sim@simul_data$true_person, decreasing=T)[1],fix_low = sort(ord_irt_normal_sim@simul_data$true_person, decreasing=F)[1], fixtype='prefix',compile_optim=F) id_plot_rhats(ord_irt_normal_est)
id_plot_legis(ord_irt_normal_est,show_true = T)
id_sim_coverage(ord_irt_normal_est) %>% bind_rows(.id='Parameter') %>% ggplot(aes(y=avg,x=Parameter)) + stat_summary(fun.args=list(mult=1.96)) + theme_minimal()
id_plot_sims(ord_irt_normal_est,type='Residuals')
id_plot_sims(ord_irt_normal_est)
ord_irt_normal_est %>% id_post_pred %>% id_plot_ppc(ord_irt_normal_est,ppc_pred=.)
A Normally-distributed outcome with inflation:
ord_irt_normal_abs_sim <- id_sim_gen(num_person=20,num_items=100,inflate=T, absence_diff_mean=0,model_type='normal') ord_irt_normal_abs_est <- id_estimate(idealdata=ord_irt_normal_abs_sim, model_type=10, restrict_ind_high = as.character(sort(ord_irt_normal_abs_sim@simul_data$true_person, decreasing=T, index=T)$ix[1]), restrict_ind_low = as.character(sort(ord_irt_normal_abs_sim@simul_data$true_person, decreasing=F, index=T)$ix[1]), fix_high = sort(ord_irt_normal_abs_sim@simul_data$true_person, decreasing=T)[1],fix_low = sort(ord_irt_normal_abs_sim@simul_data$true_person, decreasing=F)[1],nchains = 2,ncores=4, fixtype='prefix') id_plot_rhats(ord_irt_normal_abs_est)
id_plot_legis(ord_irt_normal_abs_est,show_true = T)
id_sim_coverage(ord_irt_normal_abs_est) %>% bind_rows(.id='Parameter') %>% ggplot(aes(y=avg,x=Parameter)) + stat_summary(fun.args=list(mult=1.96)) + theme_minimal()
id_plot_sims(ord_irt_normal_abs_est,type='Residuals')
id_plot_sims(ord_irt_normal_abs_est)
ord_irt_normal_abs_est %>% id_post_pred %>% id_plot_ppc(ord_irt_normal_abs_est,ppc_pred=.)
ord_irt_normal_abs_est %>% id_post_pred(output='missing') %>% id_plot_ppc(ord_irt_normal_abs_est,ppc_pred=.)
A Lognormal model with no inflation:
ord_irt_lognormal_sim <- id_sim_gen(num_person=20,num_items=100,ordinal=F,inflate=F, absence_diff_mean=0,model_type='lognormal') ord_irt_lognormal_est <- id_estimate(idealdata=ord_irt_lognormal_sim, model_type=11, restrict_ind_high = as.character(sort(ord_irt_lognormal_sim@simul_data$true_person, decreasing=T, index=T)$ix[1]), restrict_ind_low = as.character(sort(ord_irt_lognormal_sim@simul_data$true_person, decreasing=F, index=T)$ix[1]), fix_high = sort(ord_irt_lognormal_sim@simul_data$true_person, decreasing=T)[1],fix_low = sort(ord_irt_lognormal_sim@simul_data$true_person, decreasing=F)[1],nchains = 2,ncores=4, fixtype='prefix') id_plot_rhats(ord_irt_lognormal_est)
id_plot_legis(ord_irt_lognormal_est,show_true = T)
id_sim_coverage(ord_irt_lognormal_est) %>% bind_rows(.id='Parameter') %>% ggplot(aes(y=avg,x=Parameter)) + stat_summary(fun.args=list(mult=1.96)) + theme_minimal()
id_plot_sims(ord_irt_lognormal_est,type='Residuals')
id_plot_sims(ord_irt_lognormal_est)
ord_irt_lognormal_est %>% id_post_pred %>% id_plot_ppc(ord_irt_lognormal_est,ppc_pred=.) + scale_x_log10()
And last but not least, a lognormal model with inflation:
ord_irt_lognormal_abs_sim <- id_sim_gen(num_person=20,num_items=100,ordinal=F,inflate=T, absence_diff_mean=0,model_type='lognormal') ord_irt_lognormal_abs_est <- id_estimate(idealdata=ord_irt_lognormal_abs_sim, model_type=12, restrict_ind_high = as.character(sort(ord_irt_lognormal_abs_sim@simul_data$true_person, decreasing=T, index=T)$ix[1]), restrict_ind_low = as.character(sort(ord_irt_lognormal_abs_sim@simul_data$true_person, decreasing=F, index=T)$ix[1]),fix_high = sort(ord_irt_lognormal_abs_sim@simul_data$true_person, decreasing=T)[1],fix_low = sort(ord_irt_lognormal_abs_sim@simul_data$true_person, decreasing=F)[1],nchains = 2,ncores=4, fixtype='prefix') id_plot_rhats(ord_irt_lognormal_abs_est)
id_plot_legis(ord_irt_lognormal_abs_est,show_true = T)
id_sim_coverage(ord_irt_lognormal_abs_est) %>% bind_rows(.id='Parameter') %>% ggplot(aes(y=avg,x=Parameter)) + stat_summary(fun.args=list(mult=1.96)) + theme_minimal()
id_plot_sims(ord_irt_lognormal_abs_est,type='Residuals')
id_plot_sims(ord_irt_lognormal_abs_est)
ord_irt_lognormal_abs_est %>% id_post_pred %>% id_plot_ppc(ord_irt_lognormal_abs_est,ppc_pred=.) + scale_x_log10()
ord_irt_lognormal_abs_est %>% id_post_pred(output='missing') %>% id_plot_ppc(ord_irt_lognormal_abs_est,ppc_pred=.)
ordbeta_sim <- id_sim_gen(num_person=20,num_items=100,ordinal=F,inflate=F, absence_diff_mean=0,model_type='ordbeta', phi=1) ordbeta_est <- id_estimate(idealdata=ordbeta_sim, model_type=15, restrict_ind_high = as.character(sort(ordbeta_sim@simul_data$true_person, decreasing=T, index=T)$ix[1]), restrict_ind_low = as.character(sort(ordbeta_sim@simul_data$true_person, decreasing=F, index=T)$ix[1]), fix_high = sort(ordbeta_sim@simul_data$true_person, decreasing=T)[1],fix_low = sort(ordbeta_sim@simul_data$true_person, decreasing=F)[1],nchains = 2,ncores=8, fixtype='prefix') id_plot_rhats(ordbeta_est)
id_plot_legis(ordbeta_est,show_true = T)
id_sim_coverage(ordbeta_est) %>% bind_rows(.id='Parameter') %>% ggplot(aes(y=avg,x=Parameter)) + stat_summary(fun.args=list(mult=1.96)) + theme_minimal()
id_plot_sims(ordbeta_est,type='Residuals')
id_plot_sims(ordbeta_est)
ordbeta_est %>% id_post_pred %>% id_plot_ppc(ordbeta_est,ppc_pred=.)
ordbeta_abs_sim <- id_sim_gen(num_person=20,num_items=100,ordinal=F,inflate=T, absence_diff_mean=0,model_type='ordbeta') ordbeta_abs_est <- id_estimate(idealdata=ordbeta_abs_sim, model_type=16, restrict_ind_high = as.character(sort(ordbeta_abs_sim@simul_data$true_person, decreasing=T, index=T)$ix[1]), restrict_ind_low = as.character(sort(ordbeta_abs_sim@simul_data$true_person, decreasing=F, index=T)$ix[1]),fix_high = sort(ordbeta_abs_sim@simul_data$true_person, decreasing=T)[1],fix_low = sort(ordbeta_abs_sim@simul_data$true_person, decreasing=F)[1],nchains = 2,ncores=8, fixtype='prefix') id_plot_rhats(ordbeta_abs_est)
id_plot_legis(ordbeta_abs_est,show_true = T)
id_sim_coverage(ordbeta_abs_est) %>% bind_rows(.id='Parameter') %>% ggplot(aes(y=avg,x=Parameter)) + stat_summary(fun.args=list(mult=1.96)) + theme_minimal()
id_plot_sims(ordbeta_abs_est,type='Residuals')
id_plot_sims(ordbeta_abs_est)
ordbeta_abs_est %>% id_post_pred %>% id_plot_ppc(ordbeta_abs_est,ppc_pred=.)
ordbeta_abs_est %>% id_post_pred(output='missing') %>% id_plot_ppc(ordbeta_abs_est,ppc_pred=.,combine_item=FALSE)
We won't test all the possible time auto-correlation settings, only a subset of them. I use an inflated binary model as it is a very common one.
While these models are identified, it appears that sometimes given the amount of data, they are only weakly identified in the posterior and Rhat values higher than 1.1 are possible (but are not evidence of multi-modality).
First random walks:
bin_time_irt_2pl_abs_sim <- id_sim_gen(num_person=50,num_items=100, ordinal=F,inflate=T, absence_diff_mean=0, time_process = 'random', time_points=10) # as.character(sort(bin_time_irt_2pl_abs_sim@simul_data$true_reg_discrim, # # decreasing=TRUE, # index=T)$ix[1]) # as.character(sort(bin_time_irt_2pl_abs_sim@simul_data$true_reg_discrim, # decreasing=F, # index=T)$ix[1]) # 5*sort(bin_time_irt_2pl_abs_sim@simul_data$true_reg_discrim, # # decreasing=TRUE)[1] best_first_time <- 4 best_second_time <- 500 bin_time_irt_2pl_abs_est <- id_estimate(idealdata=bin_time_irt_2pl_abs_sim, model_type=2,nchains=2,ncores = 6, id_refresh=100,time_var = 10, niters = 500,warmup = 500,const_type = "items", keep_param=list(person_vary=T,person_int=F,item=TRUE, extra=T), restrict_sd_high=10, restrict_sd_low=10, vary_ideal_pts = 'random_walk', restrict_var = F, restrict_ind_high = as.character(sort(bin_time_irt_2pl_abs_sim@simul_data$true_reg_discrim, decreasing=TRUE, index=TRUE)$ix[1]), restrict_ind_low = as.character(sort(bin_time_irt_2pl_abs_sim@simul_data$true_reg_discrim, decreasing=F, index=T)$ix[1]), fixtype='prefix') id_plot_rhats(bin_time_irt_2pl_abs_est) + theme(text=element_text(family=""))
id_sim_coverage(bin_time_irt_2pl_abs_est) %>% bind_rows(.id='Parameter') %>% ggplot(aes(y=avg,x=Parameter)) + stat_summary(fun.args=list(mult=1.96)) + theme_minimal()
Let's do a time-varying ideal point plot with true point overlay:
id_plot_legis_dyn(bin_time_irt_2pl_abs_est,plot_sim = T) + facet_wrap(~person_id,scales="free_y")
id_plot_sims(bin_time_irt_2pl_abs_est,type='Residuals')
id_plot_sims(bin_time_irt_2pl_abs_est)
bin_time_spline_irt_2pl_abs_sim <- id_sim_gen(num_person=100,num_items=100,inflate=F, absence_diff_mean=0, time_process = 'splines', time_points=10,time_sd = .25, spline_degree=3) sort_ids_high <- sort(bin_time_spline_irt_2pl_abs_sim@simul_data$true_reg_discrim, decreasing=TRUE, index=TRUE)$ix high_restrict <- c(min(sort_ids_high[1:20]), round(median(sort_ids_high[1:20])), max(sort_ids_high[1:20])) sort_ids_low <- sort(bin_time_spline_irt_2pl_abs_sim@simul_data$true_reg_discrim,, decreasing=F, index=T)$ix low_restrict <- c(min(sort_ids_low[1:20]), round(median(sort_ids_low[1:20])), max(sort_ids_low[1:20])) bin_time_spline_irt_2pl_abs_est <- id_estimate(idealdata=bin_time_spline_irt_2pl_abs_sim, use_groups = F, model_type=1, time_center_cutoff = 50, ncores = parallel::detectCores(), vary_ideal_pts = 'splines',prior_only = FALSE, spline_knots=NULL,spline_degree = 2, const_type = "items",person_sd = 5, diff_reg_sd = 5, discrim_reg_scale = 2,discrim_reg_shape = 2, time_var = .1,restrict_var = F, restrict_ind_high = as.character(high_restrict), restrict_ind_low =as.character(low_restrict), fixtype='prefix',adapt_delta=0.95) id_plot_rhats(bin_time_spline_irt_2pl_abs_est)
id_sim_coverage(bin_time_spline_irt_2pl_abs_est) %>% bind_rows(.id='Parameter') %>% ggplot(aes(y=avg,x=Parameter)) + stat_summary(fun.args=list(mult=1.96)) + theme_minimal()
Let's do a time-varying ideal point plot with true point overlay:
id_plot_legis_dyn(bin_time_spline_irt_2pl_abs_est,plot_lines = 10) + guides(colour="none") + facet_wrap(~person_id,scales="free_y")
id_plot_sims(bin_time_spline_irt_2pl_abs_est,type='Residuals')
id_plot_sims(bin_time_spline_irt_2pl_abs_est)
And autocorrelation:
bin_time_ar_irt_2pl_abs_sim <- id_sim_gen(num_person=20,num_items=100,inflate=T, absence_diff_mean=0,time_process = 'AR',time_points=20,time_sd = .1) bin_time_ar_irt_2pl_abs_sim@score_matrix$group_id <- forcats::fct_collapse(bin_time_ar_irt_2pl_abs_sim@score_matrix$person_id, `1`=c("1","2","3")) bin_time_ar_irt_2pl_abs_est <- id_estimate(idealdata=bin_time_ar_irt_2pl_abs_sim, model_type=2, ncores = parallel::detectCores(), vary_ideal_pts = 'AR1', const_type = "items", restrict_ind_high = as.character(sort(bin_time_ar_irt_2pl_abs_sim@simul_data$true_reg_discrim, decreasing=TRUE, index=TRUE)$ix[1]), restrict_ind_low = as.character(sort(bin_time_ar_irt_2pl_abs_sim@simul_data$true_reg_discrim,, decreasing=F, index=T)$ix[1]), fixtype='prefix') id_plot_rhats(bin_time_ar_irt_2pl_abs_est)
Let's do a time-varying ideal point plot with true point overlay:
id_plot_legis_dyn(bin_time_ar_irt_2pl_abs_est,plot_lines = 10) + guides(colour="none") + facet_wrap(~person_id,scales="free_y")
The Gaussian process differs in that is a non-parametric approach to time-series modeling. It can fit very flexible curves and also works on irregularly-spaced time series.
time_gp_sim <- id_sim_gen(num_person=10,num_items=200,inflate=T, absence_diff_mean=0, time_process = 'GP', time_points=20) time_gp_est <- id_estimate(idealdata=time_gp_sim, model_type=2, use_vb=F,const_type="items", restrict_ind_high = as.character(sort(time_gp_sim@simul_data$true_reg_discrim, decreasing=TRUE, index=TRUE)$ix[1]), restrict_ind_low = as.character(sort(time_gp_sim@simul_data$true_reg_discrim,, decreasing=F, index=T)$ix[1]), nchains=2,ncores=8,warmup=1000,niters=500, vary_ideal_pts = 'GP', fixtype='prefix') id_plot_rhats(time_gp_est)
id_sim_coverage(time_gp_est) %>% bind_rows(.id='Parameter') %>% ggplot(aes(y=avg,x=Parameter)) + stat_summary(fun.args=list(mult=1.96)) + theme_minimal()
Let's do a time-varying ideal point plot with true point overlay:
id_plot_legis_dyn(time_gp_est,plot_sim = T) +facet_wrap(~person_id)
id_plot_sims(time_gp_est,type='Residuals')
id_plot_sims(time_gp_est)
Another new addition is the latent space model, which is only a slight modification on the IRT model.
latent_noninfl_sim <- id_sim_gen(num_person=10,num_items=300,ordinal=F, inflate=F, model_type='binary', latent_space=T) latent_noninfl_est <- id_estimate(idealdata=latent_noninfl_sim, model_type=13, restrict_ind_high = as.character(sort(latent_noninfl_sim@simul_data$true_person, decreasing=TRUE, index=T)$ix[1:2]), restrict_ind_low = as.character(sort(latent_noninfl_sim@simul_data$true_person, decreasing=F, index=T)$ix[1:2]),fix_high = sort(latent_noninfl_sim@simul_data$true_person, decreasing=T)[1:2],fix_low = sort(latent_noninfl_sim@simul_data$true_person, decreasing=F)[1:2], fixtype='prefix',id_refresh=100,warmup=500,niters=500,nchains=2,ncores=8) id_plot_rhats(latent_noninfl_est)
id_plot_legis(latent_noninfl_est,show_true = T)
id_sim_coverage(latent_noninfl_est) %>% bind_rows(.id='Parameter') %>% ggplot(aes(y=avg,x=Parameter)) + stat_summary(fun.args=list(mult=1.96)) + theme_minimal()
latent_noninfl_est %>% id_post_pred %>% id_plot_ppc(latent_noninfl_est,ppc_pred=.)
# you can't have too few people relative to bills latent_infl_sim <- id_sim_gen(num_person=10,num_items=400,ordinal=F, inflate=T, model_type='binary', latent_space=T) latent_infl_est <- id_estimate(idealdata=latent_infl_sim, model_type=14, restrict_ind_high = as.character(sort(latent_infl_sim@simul_data$true_person, decreasing=TRUE, index=T)$ix[1]), restrict_ind_low = as.character(sort(latent_infl_sim@simul_data$true_person, decreasing=F, index=T)$ix[1]),fix_high = sort(latent_infl_sim@simul_data$true_person, decreasing=T)[1],fix_low = sort(latent_infl_sim@simul_data$true_person, decreasing=F)[1], fixtype='prefix',nchains=2,ncores=8, adapt_delta=0.95, diff_reg_sd = 2, diff_miss_sd=2) id_plot_rhats(latent_infl_est)
id_plot_legis(latent_infl_est,show_true = T)
id_sim_coverage(latent_infl_est) %>% bind_rows(.id='Parameter') %>% ggplot(aes(y=avg,x=Parameter)) + stat_summary(fun.args=list(mult=1.96)) + theme_minimal()
latent_infl_est %>% id_post_pred %>% id_plot_ppc(latent_infl_est,ppc_pred=.)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.