library(tidyverse)
# library(tictoc)
library(brms)
library(tidybayes)
library(rio)
# ----- KFT
X19_12_17_Rohdaten_KV_und_TF <- rio::import("../../../../Tina/Analyse/database_Rohdaten/19-12-17_Rohdaten_KV_und_TF.sav")
full_data <- X19_12_17_Rohdaten_KV_und_TF %>% rename(person = Codenr)
# var_specs <- list(response = 'response', item ='item', person = 'person')
var_specs_kft <- list(person_covariables_main_effect = 'Schultyp')
data_kft <- compose_dataset(full_data, KFT_1:KFT_25, variable_specifications = var_specs_kft)
formula_kft <- build_formula()
formula_kft_reg <- build_formula(variable_specifications = var_specs_kft)
sum_score <- data_kft %>% group_by(person, Schultyp) %>% summarise(sum = sum(response))
fit <- brm(formula = response ~ 1, data = data_kft, family = brms::brmsfamily("bernoulli", link = "logit"))
fit2 <- brm(formula = response ~ 1 + Schultyp, data = data_kft, family = brms::brmsfamily("bernoulli", link = "logit"))
fit_base <- birtm(data = data_kft, formula = formula_kft)
fit_reg <- birtm(data = data_kft, formula = formula_kft_reg, variable_specifications = var_specs_kft)
R2_latent_regression(fit_reg, fit_base)
temp_data <- fit_reg$data %>% mutate(Schultyp = factor(Schultyp))
Y <- data.frame(Intercept = 1, Schultyp = (temp_data %>% group_by(person) %>% summarise(Schultyp = median(as.numeric(Schultyp)-1)) %>% ungroup %>% select(Schultyp))) %>% as.matrix
beta <- fixef(fit_reg)[,1] %>% as.matrix()
variance <- matrix(VarCorr(fit_reg)$person$sd[,1]^2)
res <- residuals(fit_reg)[,1]
tam_latent_regression_standardized_solution(variance, beta, Y)
# bayesian_latent_regression(res, beta, Y)
data_klass <- tibble(theta = ranef(fit_base)$person[,1,1], typ = full_data$Schultyp)
data_klass %>% ggplot(aes(x=typ, y=theta)) + geom_violin(draw_quantiles = c(0.25, 0.5, 0.75))
summary(lm(data = data_klass, theta~typ))
mod <- TAM::tam(resp = full_data %>% select(KFT_1:KFT_25), pid = full_data$person)
wle <- TAM::tam.wle(mod)
data_klass <- data_klass %>% mutate(wle = wle$theta)
data_klass %>% ggplot(aes(x=theta, y=wle, color=typ)) + geom_point(alpha = .5) + geom_abline()
summary(lm(data = data_klass, wle~typ))
summary(lm(data = sum_score, scale(sum)~Schultyp))
summary(lme4::lmer(data = data_kft, response ~ (1|person) + Schultyp))
Schultyp <- full_data$Schultyp
dat <- full_data %>% select(KFT_1:KFT_25)
formulay1 <- ~Schultyp
lat2 <- TAM::tam.mml(dat, dataY = full_data$Schultyp, formulaY = formulay1)
se<- tam.se(lat2)
summary(lat2)
# lat1$latreg_stand$beta_stand
# lat1$latreg_stand$R2_theta
formula_kft <- build_formula(model_specifications = list(item_parameter_number = 2))
formula_kft$pforms$logalpha <- logalpha ~ 1
formula_kft$pforms$logalpha <- logalpha ~ 1 + (1 |i| item)
formula_kft$pforms$beta <- beta ~ 0 + (1 |i| item)
# kft_1PL <- brm(formula = formula_kft,
# data = data_kft,
# prior = NULL,
# sample_prior = TRUE,
# save_pars = save_pars(all = TRUE),
# cores = 4,
# file = NULL
# )
kft_1PL_free_alpha <- birtm(data = data_kft, formula = formula_kft, file = 'models/kft_1PL_freealpha', iter = 2000, prior = p2PL)
kft_1PL_free_alpha_fixed_sdTheta <- birtm(data = data_kft, formula = formula_kft, file = 'models/kft_1PL_freealpha_fixed_sdTheta', iter = 2000, prior = p2PL +
brms::prior("constant(1)", class = "sd", group = "person", nlpar = "theta"))
p2PL_fixalpha <- brms::prior("normal(0, 2)", class = "b", nlpar = "skillintercept") +
brms::prior("constant(0)", class = "b", nlpar = "logalpha")
kft_1PL_fixed_alphaMean <- birtm(data = data_kft, formula = formula_kft, file = 'models/kft_1PL_fixalphamean', iter = 2000, prior = p2PL_fixalpha)
kft_2PL_fixed_alphaMean <- birtm(data = data_kft, formula = build_formula(model_specifications = list(item_parameter_number = 2)),
file = 'models/kft_2PL_fixalphamean', iter = 2000, prior = p2PL_fixalpha)
kft_2PL_fixed_sdTheta <- birtm(data = data_kft, formula = build_formula(model_specifications = list(item_parameter_number = 2)),
file = 'models/kft_2PL_fixedThetaSD', iter = 2000, prior = p2PL +
brms::prior("constant(1)", class = "sd", group = "person", nlpar = "theta"), refit = TRUE)
kft_1PL <- birtm(data = data_kft, formula = formula_kft, file = 'models/kft_1PL', iter = 2000)
# temp_kft_1PL <- birtm(data = data_kft, formula = formula_kft, iter = 1000)
# kft_1PL_2 <- birtm_aio(response_data = full_data, response_columns = KFT_1:KFT_5, iter = 1000)
# items <- c('KFT_1', 'KFT_2', 'KFT_3', 'KFT_4', 'KFT_5')
# kft_1PL_3 <- birtm_aio(response_data = full_data, response_columns = items, iter = 1000)
p2PL <- brms::prior("normal(0, 2)", class = "b", nlpar = "skillintercept") +
brms::prior("normal(0, 0.5)", class = "b", nlpar = "logalpha")
# tic()
kft_2PL2 <- birtm_aio(response_data = full_data, response_columns = KFT_1:KFT_25,
model_specifications = list(item_parameter_number = 2),
prior = p2PL, file = 'models/kft_2PL_nocorr2')
# timer <- toc() # 1159.34 sec elapsed
kft_2PL_corr2 <- birtm(data = data_kft, formula = formula_kft,
model_specifications = list(item_parameter_number = 2),
prior = p2PL, file = 'models/kft_2PL_corr2')
full_data_gernderdich <- full_data %>% filter(Geschlecht %in% c('m', 'w'))
kft_1PL_dif <- birtm_aio(response_data = full_data_gernderdich, response_columns = KFT_1:KFT_25,
variable_specifications = list(uniform_dif = 'Geschlecht'),
model_specifications = list(item_parameter_number = 1),
file = 'models/kft_1PL_dif_gender_all_items')
kft_1PL <- birtm_aio(response_data = full_data_gernderdich, response_columns = KFT_1:KFT_25,
# variable_specifications = list(uniform_dif = 'Geschlecht'),
model_specifications = list(item_parameter_number = 1),
file = 'models/kft_1PL_genderfiltered')
kft_1PL_gendercov <- birtm_aio(response_data = full_data_gernderdich, response_columns = KFT_1:KFT_25,
variable_specifications = list(person_covariables_main_effect = 'Geschlecht'),
model_specifications = list(item_parameter_number = 1),
file = 'models/kft_1PL_genderfiltered_perscov')
kft_1PL_schoolcov <- birtm_aio(response_data = full_data, response_columns = KFT_1:KFT_25,
variable_specifications = list(person_covariables_main_effect = 'Schultyp'),
model_specifications = list(item_parameter_number = 1),
file = 'models/kft_1PL_schoolcov')
# data_kft <- compose_dataset(full_data, KFT_1:KFT_25, variable_specifications = list(uniform_dif = 'Geschlecht'))
# formula_kft <- build_formula(variable_specifications = list(uniform_dif = 'Geschlecht'))
get_variables(kft_1PL_dif)
data <- kft_1PL_dif %>% spread_draws(r_item[item,gender])
dif <- data %>% compare_levels(r_item, gender) %>% median_qi()
data(RankCorr, package = "ggdist")
RankCorr %>%
spread_draws(b[i,j])
# --------------- Transformation
X19_12_17_Rohdaten_KV_und_TF <- haven::read_sav("../../../../Tina/Analyse/database_Rohdaten/19-12-17_Rohdaten_KV_und_TF.sav")
data_VS <- X19_12_17_Rohdaten_KV_und_TF
rm(X19_12_17_Rohdaten_KV_und_TF) # l?scht die Verkn?pfung mit dem alten Namen
data_VS <- data_VS[-8,]
data_VS <- data_VS[-164,]
data_VS_TF <- data_VS %>% select(HKaT2:VSuT12) %>% select(-contains("Time"))
# Umkodierung: A = 1; D1, D2, D3 = 0
map_A <- which(data_VS_TF == "A", arr.ind=TRUE)
map_D1 <- which(data_VS_TF == "D1", arr.ind=TRUE)
map_D2 <- which(data_VS_TF == "D2", arr.ind=TRUE)
map_D3 <- which(data_VS_TF == "D3", arr.ind=TRUE)
data_VS_TF_Umkodierung = data.frame(matrix(NA, nrow = 225, ncol = 108))
colnames(data_VS_TF_Umkodierung) <- colnames(data_VS_TF)
data_VS_TF_Umkodierung[map_A] <-1
data_VS_TF_Umkodierung[map_D1] <-0
data_VS_TF_Umkodierung[map_D2] <-0
data_VS_TF_Umkodierung[map_D3] <-0
data_VS_TF <- data_VS_TF_Umkodierung
rm(data_VS_TF_Umkodierung)
# TF 1) Modellrechnung TAM alle Items, Probanden = 225 ####
# Hinzufügen Codenummer zum Datensatz (data_VS_TF)
data_VS_TF_Code <- data.frame(data_VS_TF, data_VS$Codenr) %>% rename(Codenummer = data_VS.Codenr)
tf_1pl <- birtm_aio(response_data = data_VS_TF_Code, response_columns = HKaT2:VSuT12,
variable_specifications = list(person = 'Codenummer'), model_specifications = list())
tf_1pl_lat <- birtm_aio(response_data = data_VS_TF_Code, response_columns = HKaT2:VSuT12,
person_data = data_VS %>% mutate(Codenummer = Codenr, KFTsum = scale(Rohwertsumme_KFT_berechnet)) %>% select(Codenummer, KFTsum),
variable_specifications = list(person = 'Codenummer', person_covariables_main_effect = 'KFTsum'), model_specifications = list())
p2PL <- brms::prior("normal(0, 2)", class = "b", nlpar = "skillintercept") +
brms::prior("normal(0, 0.5)", class = "b", nlpar = "logalpha") +
brms::prior("constant(1)", class = "sd", group = "Codenummer", nlpar = "theta")
tf_2pl_constSD <- birtm_aio(response_data = data_VS_TF_Code, response_columns = HKaT2:VSuT12,
variable_specifications = list(person = 'Codenummer'),
model_specifications = list(item_parameter_number = 2),
prior = p2PL, control = list(adapt_delta = 0.85),
iter = 6000, thin = 5,
)
fct_count(data_VS$Sprachkenntnisse)
data_VS$Sprachkenntnisse = as.factor(data_VS$Sprachkenntnisse)
data_VS$Sprachkenntnisse[data_VS$Sprachkenntnisse == 4] = 3
droplevels(data_VS$Sprachkenntnisse)
fct_count(data_VS$Sprachkenntnisse)
data_VS2 <- data_VS %>% rename(Codenummer = Codenr)
tf_1pl_SRLS <- birtm_aio(response_data = data_VS_TF_Code, response_columns = HKaT2:VSuT12,
person_data = data_VS2 %>% select(Codenummer, Sprachkenntnisse),
variable_specifications = list(person = 'Codenummer', person_covariables_main_effect = 'Sprachkenntnisse'),
model_specifications = list())
f <- build_formula(variable_specifications = list(person = 'Codenummer', person_covariables_main_effect = 'Sprachkenntnisse'),
model_specifications = list(item_parameter_number = 2))
d <- compose_dataset(response_data = data_VS_TF_Code, response_columns = HKaT2:VSuT12,
person_data = data_VS2 %>% select(Codenummer, Sprachkenntnisse),
variable_specifications = list(person = 'Codenummer', person_covariables_main_effect = 'Sprachkenntnisse'))
brms::get_prior(f, d)
p2PL_SRLS_main <- brms::prior("normal(0, 2)", class = "b", nlpar = "skillintercept") +
brms::prior("normal(0, 0.5)", class = "b", nlpar = "logalpha") +
brms::prior("constant(1)", class = "sd", group = "Codenummer", nlpar = "theta") +
brms::prior("constant(0)", class = "b", nlpar = "personcovars", coef = "Sprachkenntnisse1")
tf_2pl_constSD_SRLS_main <- birtm_aio(response_data = data_VS_TF_Code, response_columns = HKaT2:VSuT12,
person_data = data_VS2 %>% select(Codenummer, Sprachkenntnisse),
variable_specifications = list(person = 'Codenummer', person_covariables_main_effect = 'Sprachkenntnisse'),
model_specifications = list(item_parameter_number = 2),
prior = p2PL_SRLS_main, control = list(adapt_delta = 0.9),
iter = 6000, thin = 5,
)
f <- build_formula(variable_specifications = list(person = 'Codenummer', person_covariables_all_dimensions = 'Sprachkenntnisse'),
model_specifications = list(item_parameter_number = 2))
d <- compose_dataset(response_data = data_VS_TF_Code, response_columns = HKaT2:VSuT12,
person_data = data_VS2 %>% select(Codenummer, Sprachkenntnisse),
variable_specifications = list(person = 'Codenummer', person_covariables_all_dimensions = 'Sprachkenntnisse'))
brms::get_prior(f, d)
p2PL_SRLS_mediated <- brms::prior("normal(0, 2)", class = "b", nlpar = "skillintercept") +
brms::prior("normal(0, 0.5)", class = "b", nlpar = "logalpha") +
brms::prior("constant(1)", class = "sd", group = "Codenummer", nlpar = "theta") +
brms::prior("constant(0)", class = "b", nlpar = "theta", coef = "Sprachkenntnisse1")
tf_2pl_constSD_SRLS_mediated <- birtm_aio(response_data = data_VS_TF_Code, response_columns = HKaT2:VSuT12,
person_data = data_VS2 %>% select(Codenummer, Sprachkenntnisse),
variable_specifications = list(person = 'Codenummer', person_covariables_all_dimensions = 'Sprachkenntnisse'),
model_specifications = list(item_parameter_number = 2),
prior = p2PL_SRLS_mediated, control = list(adapt_delta = 0.85),
iter = 6000, thin = 5,
)
cor.test(x = brms::ranef(tf_1pl)$Codenummer[,1,1], y = as.numeric(data_VS$Sprachkenntnisse), method = 'pearson')
cor.test(x = brms::ranef(tf_1pl)$Codenummer[,1,1], y = as.numeric(data_VS$Sprachkenntnisse), method = 'kendall')
# ---- teste neue 3PL-fixed Funktion ----
f <- build_formula(variable_specifications = list(person = 'Codenummer', fixed_pseudo_guess = 'guess'),
model_specifications = list(item_parameter_number = 3))
d <- compose_dataset(response_data = data_VS_TF_Code, response_columns = HKaT2:VSuT12,
variable_specifications = list(person = 'Codenummer', fixed_pseudo_guess = 'guess')) %>% mutate(guess = 1/4)
p3PL <- brms::prior("normal(0, 2)", class = "b", nlpar = "skillintercept") +
brms::prior("normal(0, 0.5)", class = "b", nlpar = "logalpha") +
brms::prior("constant(1)", class = "sd", group = "Codenummer", nlpar = "theta")
fit_3PL_fixed_tf <- birtm(data = d, formula = f, prior = p3PL, control = list(adapt_delta = 0.90))
form_3PL <- brms::bf(response ~ guess + (1 - guess) * inv_logit(skillintercept + exp(logalpha) * theta + beta),
skillintercept ~ 1,
theta ~ 0 + (1 | Codenummer),
logalpha ~ 1 + (1 | item),
beta ~ 0 + (1 | item),
nl = TRUE, family = brms::brmsfamily("bernoulli", link = "identity")
)
fit_3PL_fixed_tf2 <- birtm(data = d, formula = form_3PL, prior = p3PL, control = list(adapt_delta = 0.90))
f2 <- build_formula(variable_specifications = list(person = 'Codenummer'),
model_specifications = list(item_parameter_number = 3))
p3PL2 <- brms::prior("normal(0, 2)", class = "b", nlpar = "skillintercept") +
brms::prior("normal(0, 0.5)", class = "b", nlpar = "logalpha") +
brms::prior("constant(1)", class = "sd", group = "Codenummer", nlpar = "theta") +
brms::prior("normal(-1, 0.5)", class = "b", nlpar = "logitgamma")
fit_3PL_fixed_tf3 <- birtm(data = d, formula = f2, prior = p3PL2, control = list(adapt_delta = 0.90))
form_3PL2 <- brms::bf(response ~ .25 + .75 * inv_logit(skillintercept + exp(logalpha) * theta + beta),
skillintercept ~ 1,
theta ~ 0 + (1 | Codenummer),
logalpha ~ 1 + (1 | item),
beta ~ 0 + (1 | item),
nl = TRUE, family = brms::brmsfamily("bernoulli", link = "identity")
)
fit_3PL_fixed_tf4 <- birtm(data = d, formula = form_3PL2, prior = p3PL, control = list(adapt_delta = 0.90))
form_3PL3 <- brms::bf(response ~ guess + guess * inv_logit(skillintercept + exp(logalpha) * theta + beta),
skillintercept ~ 1,
theta ~ 0 + (1 | Codenummer),
logalpha ~ 1 + (1 | item),
beta ~ 0 + (1 | item),
brms::nlf(guess ~ .25),
nl = TRUE, family = brms::brmsfamily("bernoulli", link = "identity")
)
fit_3PL_fixed_tf5 <- birtm(data = d, formula = form_3PL3, prior = p3PL, control = list(adapt_delta = 0.90))
# ---- 4PL
x <- read.csv(file = 'interactive_scripts/dataset.csv')
answers <- c(7, 6, 8, 2, 1, 5, 1, 6, 3, 2, 4, 5)
names(answers) <- paste0("SPM", seq_along(answers))
spm_long <- x %>%
mutate(person = seq_len(n())) %>%
gather("item", "response", SPM1:SPM12) %>%
mutate(
answer = answers[item],
response2 = as.numeric(response == answer),
item = as.factor(as.numeric(sub("^SPM", "", item)))
)
spm <- spm_long %>% select(person, response2, item) %>% pivot_wider(values_from = response2, names_from = item)
f <- build_formula(variable_specifications = list(response = 'response2', pseudo_guess_dimension = 'item', careless_error_dimension = 'item'),
model_specifications = list(item_parameter_number = 4))
brms::get_prior(formula = f, data = spm_long)
p4PL <- brms::prior("normal(0, 2)", class = "b", nlpar = "skillintercept") +
brms::prior("normal(0, 0.5)", class = "b", nlpar = "logalpha") +
brms::prior("constant(1)", class = "sd", group = "person", nlpar = "theta") +
brms::prior("normal(-2, 0.5)", class = "b", nlpar = "logitgamma") +
brms::prior("normal(-2, 0.5)", class = "b", nlpar = "logitpsi")
prior_4pl <-
prior("normal(0, 2)", class = "b", nlpar = "skillintercept") +
prior("normal(0, 1)", class = "b", nlpar = "logalpha") +
prior("normal(-2, 0.5)", class = "b", nlpar = "logitgamma") +
prior("normal(-2, 0.5)", class = "b", nlpar = "logitpsi") +
prior("normal(0, 1)", class = "sd", group = "person", nlpar = "theta") +
prior("normal(0, 3)", class = "sd", group = "item", nlpar = "beta") +
prior("normal(0, 1)", class = "sd", group = "item", nlpar = "logalpha") +
prior("normal(0, 1)", class = "sd", group = "item", nlpar = "logitgamma") +
prior("normal(0, 1)", class = "sd", group = "item", nlpar = "logitpsi")
fit_4PL_2 <- birtm(data = spm_long, formula = f, prior = prior_4pl, control = list(adapt_delta = 0.95), iter = 3000, warmup = 1000)
# variance explained in latent IRT regression models
form1 <- bf(response2 ~ 1,
family = brms::brmsfamily("bernoulli", link = "logit"))
prio1 <- prior("normal(0, 3)", class = "Intercept")
fit1 <- brm(data = spm_long, formula = form1, prior = prio1, core = 4)
summary(fit1, prior = TRUE)
residuals(fit1) %>% as_tibble() %>% summarise(var_res = var(Estimate), sum2_res = sum(Estimate^2))
var_base <- residuals(fit1) %>% as_tibble() %>% summarise(var_res = var(Estimate)) %>% pull
bayes_R2(fit1)
form2 <- bf(response2 ~ 1 + (1 | person),
family = brms::brmsfamily("bernoulli", link = "logit"))
fit2 <- brm(data = spm_long, formula = form2, prior = prio1, core = 4)
summary(fit2, prior = TRUE)
residuals(fit2) %>% as_tibble() %>% summarise(var_res = var(Estimate), sum2_res = sum(Estimate^2))
var_class <- residuals(fit2) %>% as_tibble() %>% summarise(var_res = var(Estimate)) %>% pull
bayes_R2(fit2)
loo_R2(fit2)
form3 <- bf(response2 ~ 1 + (1 | item) + (1 | person),
family = brms::brmsfamily("bernoulli", link = "logit"))
fit3 <- brm(data = spm_long, formula = form3, prior = prio1, core = 4)
residuals(fit3) %>% as_tibble() %>% summarise(var_res = var(Estimate), sum2_res = sum(Estimate^2))
bayes_R2(fit3)
var_irt <- residuals(fit3) %>% as_tibble() %>% summarise(var_res = var(Estimate)) %>% pull
r2_irt <- 1 - var_irt/var_base
r2_new <- ((bayes_R2(fit3)-bayes_R2(fit2))/(1-bayes_R2(fit2)))[[1]]
r2_new_summ_robust <- ((bayes_R2(fit3, summary = FALSE)-bayes_R2(fit2, summary = FALSE))/(1-bayes_R2(fit2, summary = FALSE))) %>% as_tibble() %>%
median_hdi()
r2_new_summ <- ((bayes_R2(fit3, summary = FALSE)-bayes_R2(fit2, summary = FALSE))/(1-bayes_R2(fit2, summary = FALSE))) %>% as_tibble() %>%
summarise(Estimate = mean(R2), Est.Error = sd(R2), Q2.5 = quantile(R2, .025), Q97.5 = quantile(R2, .975))
r2_new_summ_robust_loo <- ((loo_R2(fit3, summary = FALSE)-loo_R2(fit2, summary = FALSE))/(1-loo_R2(fit2, summary = FALSE))) %>% as_tibble() %>%
median_hdi()
# ---- latent R2 fuction ----
# R2_latent_regression <- function(fit, fit_basemodel, robust = TRUE, loo = FALSE) {
# if (loo) {
# R2_fit <- brms::loo_R2(fit, summary = FALSE)
# R2_base <- brms::loo_R2(fit_basemodel, summary = FALSE)
# } else {
# R2_fit <- brms::bayes_R2(fit, summary = FALSE)
# R2_base <- brms::bayes_R2(fit_basemodel, summary = FALSE)
# }
#
# frac <- (R2_fit-R2_base)/(1-R2_base)
#
# if (robust) {
# r2 <- frac %>% tibble::as_tibble() %>% tidybayes::median_hdi()
# } else {
# r2 <- frac %>% tibble::as_tibble() %>% tidybayes::mean_qi() %>% dplyr::mutate(Est.Error = sd(frac), .after = 1)
# }
#
# return(r2)
# }
#--------- biology IRT ----
library(TAM)
eirtdata <- read_csv("https://raw.githubusercontent.com/danielbkatz/EIRT/master/eirtdata.csv")
eirtdata <- eirtdata[-1]
eirtdata <- eirtdata %>% mutate(treat = ifelse(treat==1, "treat", "not-treat"))
eirtdata$treat <- as.factor(eirtdata$treat)
eirtdata$proflevel <- as.factor(eirtdata$proflevel)
eirtdata$abilcov <- as.factor(eirtdata$abilcov)
iter <- 2000
var_specs <- list(person = 'id')
fit_bio <- birtm_aio(response_data = eirtdata, response_columns = Math.1:MathWordProb.10,
variable_specifications = var_specs, refresh = max(iter/100, 1)
)
var_specs_reg <- list(person = 'id', person_covariables_main_effect = 'treat')
fit_bio_reg <- birtm_aio(response_data = eirtdata, response_columns = Math.1:MathWordProb.10,
variable_specifications = var_specs_reg, refresh = max(iter/100, 1)
)
fit_bio_reg2 <- update(fit_bio_reg,
newdata = birtms::compose_dataset(response_data = eirtdata, response_columns = Math.1:MathWordProb.10, variable_specifications = var_specs_reg),
prior = brms::prior("normal(0, 2)", class = "Intercept"), core = 4, refresh = max(iter/100, 1))
treat <- eirtdata$treat
latg <- tam.mml(eirtdata[2:40], group = treat)
summary(latg)
formulay1 <- ~treat
lat1 <- TAM::tam.mml(eirtdata[2:40], dataY = eirtdata$treat, formulaY = formulay1)
se<- tam.se(lat1)
summary(lat1)
lat1$latreg_stand$beta_stand
lat1$latreg_stand$R2_theta
Y <- lat1$Y
beta <- lat1$beta
variance <- lat1$variance
daty <- eirtdata %>% select(treat, proflevel, abilcov)
formulay2 <- ~ treat + proflevel
latreg2 <- tam.mml(eirtdata[2:40], dataY = daty, formulaY = formulay2)
summary(latreg2)
selatreg2 <- tam.se(latreg2)
selatreg2$beta
var_specs_reg2 <- list(person = 'id', person_covariables_main_effect = c('treat', 'proflevel'))
fit_bio_reg_twopreds <- birtm_aio(response_data = eirtdata, response_columns = Math.1:MathWordProb.10,
variable_specifications = var_specs_reg2, refresh = max(iter/100, 1),
prior = brms::prior("normal(0, 2)", class = "Intercept")
)
Q <- matrix(data = 0, nrow = 40, ncol = 3)
Q4 <- matrix(data = 0, nrow = 40, ncol = 4)
map <- itype[1:40] %>% as.factor() %>% as.numeric()
for (i in 1:40) {
Q4[i,map[i]] <- 1
if(map[i] == 4) map[i] <- 3
Q[i,map[i]] <- 1
}
treat <- eirtdata$treat
formulay1 <- ~treat
multi <- TAM::tam.mml(eirtdata[2:41], dataY = eirtdata$treat, formulaY = formulay1, Q=Q, control=list(snodes=2000, maxiter=100))
multi2 <- TAM::tam.mml(eirtdata[2:41], dataY = eirtdata$treat, formulaY = formulay1, Q=Q4, control=list(snodes=2000, maxiter=100))
summary(multi)
Y <- multi$Y
beta <- multi$beta
variance <- multi2$variance
summary(lltm)
bayes_R2(fit_bio_reg_lltm)
MuMIn::r.squaredGLMM(lltmlmer) # passt mit den Werten von bayes_R2 nicht überein.
# Der Wert für R2m (nur Random effects nicht fixed) entspricht in etwa R2_latent/(pi^2/3) <-- Korrekturterm aus Nakagawa (2013) (müsste eigentlich im Nenner addiert werden)
R2_latent(variance5, beta5, Y5)[[1]][[1]]/(pi^2/3)
# ----
Y2 <- data.frame(Intercept = 1, treat = (fit_bio_reg2$data %>% group_by(id) %>% summarise(treat = median(as.numeric(treat)-1)) %>% ungroup %>% select(treat))) %>% as.matrix
beta2 <- fixef(fit_bio_reg2)[,1] %>% as.matrix() %>% t
variance2 <- matrix(VarCorr(fit_bio_reg2)$id$sd[,1]^2)
# res2 <- residuals(fit_bio_reg2)[,1]
tam_latent_regression_standardized_solution(variance2, beta2, Y2)
# bayesian_latent_regression(res2, beta2, Y2)
beta3 <- fixef(fit_bio_reg2, summary = FALSE) %>% as.matrix()
variance3 <- (VarCorr(fit_bio_reg2, summary = FALSE)$id %>% as.data.frame())^2 %>% as.matrix()
R2_vec <- rep(NA, nrow(beta3))
sd_vec <- rep(NA, nrow(beta3))
for (i in seq_along(beta3[,1])) {
temp <- tam_latent_regression_standardized_solution(variance3[i,] %>% as.matrix(), beta3[i,] %>% as.matrix(), Y2)
R2_vec[i] <- temp$R2_theta
sd_vec[i] <- temp$sd_theta
}
median_hdi(R2_vec)
median_hdi(sd_vec)
Y4 <- data.frame(id = fit_bio_reg_twopreds$data$id, make_standata(data = fit_bio_reg_twopreds$data, formula = fit_bio_reg_twopreds$formula)[['X']]) %>%
group_by(id) %>% summarise_all(~ median(as.numeric(.x))) %>%ungroup() %>% select(-id) %>% as.matrix()
beta4 <- fixef(fit_bio_reg_twopreds)[,1] %>% as.matrix() %>% t
variance4 <- matrix(VarCorr(fit_bio_reg_twopreds)$id$sd[,1]^2)
# function for latent R2
R2_latent(variance4, beta4, Y4)
Y <- data.frame(id = fit_bio_reg_twopreds$data$id, make_standata(data = fit_bio_reg_twopreds$data, formula = fit_bio_reg_twopreds$formula)[['X']]) %>%
group_by(id) %>% summarise_all(~ median(as.numeric(.x))) %>%ungroup() %>% select(-id) %>% as.matrix()
Y <- data.frame(make_standata(data = fit_bio_reg_twopreds$data, formula = fit_bio_reg_twopreds$formula)[['X']]) %>% as.matrix()
beta <- fixef(fit_bio_reg_twopreds, summary = FALSE) %>% as.matrix()
variance <- matrix(VarCorr(fit_bio_reg_twopreds, summary = FALSE)$id$sd^2)
R2_latent(variance, beta, Y)
# LLTM
#change the name of the mathwordproblem items
names(eirtdata)[32:41] <- paste0("WordProb.", 1:10)
#note the nested ifelse statements.
#Basically, if each var contains a certain value, such as "Math,"
#it gets recoded into its own column using "Mutate" from dplyr.
eirtlong <- gather(eirtdata, key = "item", value = "response", Math.1:WordProb.10) %>%
mutate(ittype = if_else(grepl(pattern = "Math.", x = item), 1,
if_else(grepl(pattern = "Science.", x = item), 2,
if_else(grepl(pattern = "ELA.", x = item), 3,4))))
head(eirtlong)
formulaa <- ~ittype
facets <- as.data.frame(eirtlong[c(5,7)])
#just to make it easier to call
names(facets) <- c("item", "ittype")
#only want to use the response column. Have to add an id column
lltm <- tam.mml.mfr(eirtlong[6], facets = facets,
formulaA = formulaa, pid = eirtlong$id)
control <- lme4::glmerControl(optimizer = "optimx",
calc.derivs = F, optCtrl = list(method="nlminb", starttests = F, kkt=F))
#same thing but in lme4
lltmme1 <- lme4::glmer(data=eirtlong, response~-1 + as.factor(ittype) + (1|id),
family = "binomial", control = control)
#random item in lme4 (I don't know how to do this in TAM)
lltmlmer <- lme4::glmer(data=eirtlong, response~-1 + as.factor(ittype) + (1|id) + (1|item),
family = "binomial", control = control)
itype <- strsplit(names(eirtdata[2:41]), "[.]") %>% unlist() %>% matrix(ncol = 2, byrow = TRUE)
idata <- tibble(item = names(eirtdata[2:41]), itype = itype[,1])
var_specs_reg_lltm <- list(person = 'id', item_covariables_intercept = 'itype')
fit_bio_reg_lltm <- birtm_aio(response_data = eirtdata, response_columns = Math.1:WordProb.10,
item_data = idata,
variable_specifications = var_specs_reg_lltm, refresh = max(iter/100, 1),
prior = brms::prior("normal(0, 2)", class = "Intercept")
)
fit_bio_reg_lltm_fixintercept <- birtm_aio(response_data = eirtdata, response_columns = Math.1:WordProb.10,
item_data = idata,
variable_specifications = var_specs_reg_lltm, refresh = max(iter/100, 1),
prior = brms::prior("constant(0)", class = "Intercept")
)
Y5 <- data.frame(item = fit_bio_reg_lltm$data$item, make_standata(data = fit_bio_reg_lltm$data, formula = fit_bio_reg_lltm$formula)[['X']]) %>%
group_by(item) %>% summarise_all(~ median(as.numeric(.x))) %>%ungroup() %>% select(-item) %>% as.matrix()
beta5 <- fixef(fit_bio_reg_lltm, summary = FALSE) %>% as.matrix()
variance5 <- matrix(VarCorr(fit_bio_reg_lltm, summary = FALSE)$item$sd^2) # muss hier die Itemvarianz übergeben werden?
R2_latent(variance5, beta5, Y5)
Y_all <- data.frame(make_standata(data = fit_bio_reg_lltm$data, formula = fit_bio_reg_lltm$formula)[['X']]) %>% as.matrix()
R2_latent(variance5, beta5, Y_all)
var_specs_reg_dualmode <- list(person = 'id', item_covariables_intercept = 'itype', person_covariables_main_effect = 'treat')
fit_bio_reg_dualmode <- birtm_aio(response_data = eirtdata, response_columns = Math.1:WordProb.10,
item_data = idata,
variable_specifications = var_specs_reg_dualmode, refresh = max(iter/100, 1),
prior = brms::prior("normal(0, 2)", class = "Intercept")
)
Y_all <- data.frame(make_standata(data = fit_bio_reg_dualmode$data, formula = fit_bio_reg_dualmode$formula)[['X']]) %>% as.matrix()
Y6 <- data.frame(id = fit_bio_reg_dualmode$data$id, make_standata(data = fit_bio_reg_dualmode$data, formula = fit_bio_reg_dualmode$formula)[['X']]) %>%
group_by(id) %>% summarise_all(~ median(as.numeric(.x))) %>%ungroup() %>% select(-id) %>% as.matrix()
Y7 <- data.frame(item = fit_bio_reg_dualmode$data$item, make_standata(data = fit_bio_reg_dualmode$data, formula = fit_bio_reg_dualmode$formula)[['X']]) %>%
group_by(item) %>% summarise_all(~ median(as.numeric(.x))) %>%ungroup() %>% select(-item) %>% as.matrix()
beta_all <- fixef(fit_bio_reg_dualmode, summary = FALSE) %>% as.matrix()
variance_all <- matrix(VarCorr(fit_bio_reg_dualmode, summary = FALSE)$item$sd^2)
R2_latent(variance5, beta5, Y_all)
var_specs_reg_multidim <- list(person = 'id', regular_dimensions = 'itype', person_covariables_main_effect = 'treat')
fit_bio_reg_multidim <- birtm_aio(response_data = eirtdata, response_columns = Math.1:WordProb.10,
item_data = idata,
variable_specifications = var_specs_reg_multidim, refresh = max(iter/100, 1),
prior = brms::prior("normal(0, 2)", class = "Intercept")
)
var_specs_reg_multidim2 <- list(person = 'id', regular_dimensions = 'itype', person_covariables_all_dimensions = 'treat')
build_formula(variable_specifications = var_specs_reg_multidim2)
prio_multidim2 <- brms::prior("normal(0, 2)", class = "Intercept") +
brms::prior("constant(0)", class = "b", coef ='itypeELA:treatnotMtreat') +
brms::prior("constant(0)", class = "b", coef ='itypeMath:treatnotMtreat') +
brms::prior("constant(0)", class = "b", coef ='itypeScience:treatnotMtreat') +
brms::prior("constant(0)", class = "b", coef ='itypeWordProb:treatnotMtreat')
fit_bio_reg_multidim2 <- birtm_aio(response_data = eirtdata, response_columns = Math.1:WordProb.10,
item_data = idata,
variable_specifications = var_specs_reg_multidim2, refresh = max(iter/100, 1),
prior = prio_multidim2,
chains = 4, core = 4
)
idata2 <- idata
idata2[idata2$itype == 'WordProb',2] <- 'Science'
idata2$itype %>% as.factor()
var_specs_reg_3dim <- list(person = 'id', regular_dimensions = 'itype', person_covariables_main_effect = 'treat')
prio_3dim <- brms::prior("normal(0, 2)", class = "Intercept")
get_prior(formula=build_formula(var_specs_reg_3dim), data = compose_dataset(response_data = eirtdata, response_columns = Math.1:WordProb.10,
item_data = idata2,
variable_specifications = var_specs_reg_3dim))
fit_bio_reg_3dim <- birtm_aio(response_data = eirtdata, response_columns = Math.1:WordProb.10,
item_data = idata2,
variable_specifications = var_specs_reg_3dim, refresh = max(iter/100, 1),
prior = prio_3dim,
chains = 4, core = 2,
iter = iter, warmup = 1000
)
var_specs_3dim <- list(person = 'id', regular_dimensions = 'itype')
prio_3dim <- brms::prior("normal(0, 2)", class = "Intercept")
get_prior(formula=build_formula(var_specs_3dim), data = compose_dataset(response_data = eirtdata, response_columns = Math.1:WordProb.10,
item_data = idata2,
variable_specifications = var_specs_3dim))
fit_bio_3dim <- birtm_aio(response_data = eirtdata, response_columns = Math.1:WordProb.10,
item_data = idata2,
variable_specifications = var_specs_3dim, refresh = max(iter/100, 1),
prior = prio_3dim,
chains = 4, core = 2,
iter = iter, warmup = 1000
)
variance_all <- matrix(VarCorr(fit_bio_reg_3dim, summary = FALSE)$item$sd^2)
R2_latent_birtms(fit_bio_reg_dualmode)
R2_latent_birtms(fit_bio_reg_3dim)
R2_latent_birtms(fit_bio_reg_lltm)
R2_latent_birtms(fit_bio_reg_twopreds)
Y_all <- data.frame(make_standata(data = fit_bio_reg$data, formula = fit_bio_reg$formula)[['X']]) %>% as.matrix()
beta_all <- fixef(fit_bio_reg, summary = FALSE) %>% as.matrix()
variance_all <- matrix(VarCorr(fit_bio_reg, summary = FALSE)$id$sd^2)
R2_latent(variance_all, beta_all, Y_all)
# ---- check different regression formulations ----
eirtdata_frac <- eirtdata %>% sample_frac(size = .1)
var_specs_frac <- list(person = 'id')
prio_frac <- brms::prior("normal(0, 2)", class = "Intercept")
fit_bio_frac <- birtm_aio(response_data = eirtdata_frac, response_columns = Math.1:WordProb.10,
item_data = idata,
variable_specifications = var_specs_frac, refresh = max(iter/20, 1),
prior = prio_frac,
chains = 4, core = 4,
iter = iter, warmup = 1000
)
var_specs_frac_treat <- list(person = 'id', person_covariables_main_effect = 'treat')
prio_frac <- brms::prior("normal(0, 2)", class = "Intercept")
fit_bio_frac_treat <- birtm_aio(response_data = eirtdata_frac, response_columns = Math.1:WordProb.10,
item_data = idata,
variable_specifications = var_specs_frac_treat, refresh = max(iter/20, 1),
prior = prio_frac,
chains = 4, core = 4,
iter = iter, warmup = 1000
)
var_specs_frac_treat_and_itype <- list(person = 'id', person_covariables_main_effect = c('treat'), item_covariables_intercept = 'itype')
prio_frac <- brms::prior("normal(0, 2)", class = "Intercept")
fit_bio_frac_treat_and_itype <- birtm_aio(response_data = eirtdata_frac, response_columns = Math.1:WordProb.10,
item_data = idata,
variable_specifications = var_specs_frac_treat_and_itype, refresh = max(iter/20, 1),
prior = prio_frac,
chains = 4, core = 4,
iter = iter, warmup = 1000
)
var_specs_frac_treat_times_itype <- list(person = 'id', person_covariables_main_effect = c('treat*itype'))
fit_bio_frac_treat_times_itype <- birtm(data = compose_dataset(response_data = eirtdata_frac, response_columns = Math.1:WordProb.10,
item_data = idata, variable_specifications = var_specs_frac_treat_and_itype),
formula = build_formula(var_specs_frac_treat_times_itype),
prior = prio_frac)
var_specs_frac_treat_by_itype <- list(person = 'id', person_covariables_main_effect = c('itype:treat'))
prio_frac_by <- brms::prior("normal(0, 2)", class = "Intercept") +
brms::prior("constant(0)", class = "b", coef ='itypeELA:treatnotMtreat') +
brms::prior("constant(0)", class = "b", coef ='itypeMath:treatnotMtreat') +
brms::prior("constant(0)", class = "b", coef ='itypeScience:treatnotMtreat') +
brms::prior("constant(0)", class = "b", coef ='itypeWordProb:treatnotMtreat')
fit_bio_frac_treat_by_itype <- birtm(data = compose_dataset(response_data = eirtdata_frac, response_columns = Math.1:WordProb.10,
item_data = idata, variable_specifications = var_specs_frac_treat_and_itype),
formula = build_formula(var_specs_frac_treat_by_itype),
prior = prio_frac_by)
var_specs_frac_itype <- list(person = 'id', item_covariables_intercept = 'itype')
fit_bio_frac_itype <- birtm_aio(response_data = eirtdata_frac, response_columns = Math.1:WordProb.10,
item_data = idata,
variable_specifications = var_specs_frac_itype, refresh = max(iter/20, 1),
prior = prio_frac,
chains = 4, core = 4,
iter = iter, warmup = 1000
)
var_specs_frac_4d <- list(person = 'id', regular_dimensions = 'itype')
fit_bio_frac_4d <- birtm_aio(response_data = eirtdata_frac, response_columns = Math.1:WordProb.10,
item_data = idata,
variable_specifications = var_specs_frac_4d, refresh = max(iter/20, 1),
prior = prio_frac,
chains = 4, core = 4,
iter = iter, warmup = 1000
)
var_specs_frac_4d_itype <- list(person = 'id', regular_dimensions = 'itype', item_covariables_intercept = 'itype')
fit_bio_frac_4d_itype <- birtm_aio(response_data = eirtdata_frac, response_columns = Math.1:WordProb.10,
item_data = idata,
variable_specifications = var_specs_frac_4d_itype, refresh = max(iter/20, 1),
prior = prio_frac,
chains = 4, core = 4,
iter = iter, warmup = 1000
)
var_specs_frac_4d_treat <- list(person = 'id', regular_dimensions = 'itype', person_covariables_main_effect = c('treat'))
fit_bio_frac_4d_treat <- birtm_aio(response_data = eirtdata_frac, response_columns = Math.1:WordProb.10,
item_data = idata,
variable_specifications = var_specs_frac_4d_treat, refresh = max(iter/20, 1),
prior = prio_frac,
chains = 4, core = 4,
iter = iter, warmup = 1000
)
var_specs_frac_4d_treat_times_itype <- list(person = 'id', regular_dimensions = 'itype', person_covariables_main_effect = c('treat*itype'))
fit_bio_frac_4d_treat_times_itype <- birtm(data = compose_dataset(response_data = eirtdata_frac, response_columns = Math.1:WordProb.10,
item_data = idata, variable_specifications = var_specs_frac_treat_and_itype),
formula = build_formula(var_specs_frac_4d_treat_times_itype),
prior = prio_frac)
var_specs_frac_4d_treat_by_itype <- list(person = 'id', regular_dimensions = 'itype', person_covariables_main_effect = c('itype:treat'))
fit_bio_frac_4d_treat_by_itype <- birtm(data = compose_dataset(response_data = eirtdata_frac, response_columns = Math.1:WordProb.10,
item_data = idata, variable_specifications = var_specs_frac_treat_and_itype),
formula = build_formula(var_specs_frac_4d_treat_by_itype),
prior = prio_frac_by)
# can't add - terms to remove some predictors to get to : from *
var_specs_frac_4d_test_minus <- list(person = 'id', regular_dimensions = 'itype', person_covariables_all_dimensions = c('treat'), person_covariables_main_effect = '-itype')
fit_bio_frac_4d_treat_times_itype_min_itype <- birtm(data = compose_dataset(response_data = eirtdata_frac, response_columns = Math.1:WordProb.10,
item_data = idata, variable_specifications = var_specs_frac_treat_and_itype),
formula = build_formula(var_specs_frac_4d_test_minus),
prior = prio_frac)
get_prior(data = compose_dataset(response_data = eirtdata_frac, response_columns = Math.1:WordProb.10,
item_data = idata, variable_specifications = var_specs_frac_treat_and_itype),
formula = build_formula(var_specs_frac_4d_test_minus))
# ---- R2 with missings and heterogen answers ----
FDW_data <- rio::import('interactive_scripts/Gesamtdatensatz Querschnitt.sav')
kft_simon <- FDW_data %>% as_tibble %>% group_by(Usercode, Studienstand_FD_Chemie) %>% select(starts_with('KFT')) %>% select(ends_with('P')) %>% `[`(-c(1:17),) %>% ungroup %>%
mutate(group = factor(Studienstand_FD_Chemie, labels = c('BA1','MA','BA2','MA1','MA2')))
kft_1PL_simon <- birtm_aio(response_data = kft_simon, response_columns = KFTA01P:KFTA25P,
variable_specifications = list(person = 'Usercode'))
kft_1PL_simon_reg <- birtm_aio(response_data = kft_simon, response_columns = KFTA01P:KFTA25P,
variable_specifications = list(person = 'Usercode', person_covariables_main_effect = 'group'))
R2_latent(kft_1PL_simon_reg, fast=FALSE)
R2_latent(kft_1PL_simon_reg)
# scheint beides identisch zu sein, obwohl unterschiedliche Leute unterschiedlich viele Beobachtungen beitrugen
grp <- kft_simon$group
latg <- tam.mml(kft_simon %>% select(starts_with('KFT')), group = grp)
summary(latg)
formulay1 <- ~grp
lat1 <- TAM::tam.mml(kft_simon %>% select(starts_with('KFT')), dataY = kft_simon$group, formulaY = formulay1)
se<- tam.se(lat1)
summary(lat1)
# ---- 2pl ----
eirtdata <- read_csv("https://raw.githubusercontent.com/danielbkatz/EIRT/master/eirtdata.csv")
eirtdata <- eirtdata[-1]
eirtdata <- eirtdata %>% mutate(treat = ifelse(treat==1, "treat", "not-treat"))
eirtdata$treat <- as.factor(eirtdata$treat)
eirtdata$proflevel <- as.factor(eirtdata$proflevel)
eirtdata$abilcov <- as.factor(eirtdata$abilcov)
names(eirtdata)[32:41] <- paste0("WordProb.", 1:10)
itype <- strsplit(names(eirtdata[2:41]), "[.]") %>% unlist() %>% matrix(ncol = 2, byrow = TRUE)
idata <- tibble(item = names(eirtdata[2:41]), itype = itype[,1])
iter <- 2000
idata2 <- idata
idata2[idata2$itype == 'WordProb',2] <- 'Science'
eirtdata_frac <- eirtdata %>% sample_frac(size = .5)
model_specs2pl = list(item_parameter_number = 2)
model_specs2pl_regalphagroups = list(item_parameter_number = 2, model_unique_alpha_groups_on_regular_dimensions = TRUE)
var_specs_3dim <- list(person = 'id', regular_dimensions = 'itype')
prio_3dim_2pl <- brms::prior("normal(0, 2)", nlpar = 'skillintercept', class = "b") +
brms::prior("normal(0, 1)", nlpar = 'logalpha1', class = "b") +
brms::prior("constant(1)", nlpar = 'theta1', class = "sd")
get_prior(formula=build_formula(var_specs_3dim, model_specifications = model_specs2pl),
data = compose_dataset(response_data = eirtdata, response_columns = Math.1:WordProb.10,
item_data = idata2, variable_specifications = var_specs_3dim))
fit_bio_3dim_2pl <- birtm_aio(response_data = eirtdata_frac, response_columns = Math.1:WordProb.10,
item_data = idata2,
variable_specifications = var_specs_3dim, refresh = max(iter/20, 1),
model_specifications = model_specs2pl_regalphagroups,
prior = prio_3dim_2pl,
chains = 4, core = 4,
iter = iter, warmup = 1000,
control = list(adapt_delta = .95)
)
fit_bio_3dim_2pl_singlealphagroup <- birtm_aio(response_data = eirtdata_frac, response_columns = Math.1:WordProb.10,
item_data = idata2,
variable_specifications = var_specs_3dim, refresh = max(iter/20, 1),
model_specifications = model_specs2pl,
prior = prio_3dim_2pl,
chains = 4, core = 4,
iter = iter, warmup = 1000
)
idata3 <- idata2
idata3 <- idata3 %>% bind_cols(map(unique(idata3$itype), ~(idata3 %>% transmute('{.x}' := ifelse(itype == .x, 1, 0))))) # entspricht der for-Schleife
# for (i in unique(idata3$itype)) {
# idata3 <- idata3 %>% mutate('{i}' := ifelse(itype == i, 1, 0))
# }
var_specs_3dim_unreg <- list(person = 'id', unregular_dimensions = unique(idata3$itype))
prio_3dim_2pl_unreg <- brms::prior("normal(0, 2)", nlpar = 'skillintercept', class = "b") +
brms::prior("normal(0, 1)", nlpar = 'logalpha1', class = "b") +
brms::prior("normal(0, 1)", nlpar = 'logalpha2', class = "b") +
brms::prior("normal(0, 1)", nlpar = 'logalpha3', class = "b") +
brms::prior("constant(1)", nlpar = 'theta1', class = "sd") +
brms::prior("constant(1)", nlpar = 'theta2', class = "sd") +
brms::prior("constant(1)", nlpar = 'theta3', class = "sd")
fit_bio_3dim_2pl_unregular <- birtm_aio(response_data = eirtdata_frac, response_columns = Math.1:WordProb.10,
item_data = idata3,
variable_specifications = var_specs_3dim_unreg, refresh = max(iter/20, 1),
model_specifications = model_specs2pl,
prior = prio_3dim_2pl_unreg,
chains = 4, core = 4,
iter = iter, warmup = 1000
)
# ------- ordinal ------------
stemcell <- rio::import('interactive_scripts/GSS2006.sav') %>% transmute(belief = relevel(factor(FUND, levels = c(1,2,3),
labels = c('Fundamentalist', 'Moderate', 'Liberal')), ref = 2),
rating = fct_rev(factor(SCRESRCH, levels = c(1:4),
labels = c('Definitely should fund such research',
'Probably should fund such research',
'Probably should not fund such research',
'Definitely should not fund such research'), ordered = TRUE)),
.before = 1) %>% filter(!is.na(rating) & !is.na(belief))
fit_sc0 <- brm(
formula = rating ~ 1,
data = stemcell,
family = cumulative("probit"),
core = 4
)
get_prior(data = stemcell, formula = rating ~ 1 + belief)
fit_sc1 <- brm(
formula = rating ~ 1 + belief,
data = stemcell,
family = cumulative("probit"),
core = 4
)
fit_sc1_log <- brm(
formula = rating ~ 1 + belief,
data = stemcell,
family = cumulative("logit"),
core = 4
)
conditional_effects( fit_sc1, "belief",
categorical = TRUE)
conditional_effects( fit_sc1, "belief",
categorical = FALSE)
conditional_effects( fit_sc1,
categorical = TRUE)
fit_sc1_num <- brm(
formula = rating ~ 1 + belief,
data = stemcell %>% mutate(belief = as.numeric(relevel(belief, ref = 2))),
family = cumulative("probit"),
core = 4
)
conditional_effects( fit_sc1_num, "belief",
categorical = TRUE)
conditional_effects( fit_sc1_num, "belief",
categorical = FALSE)
ce <- conditional_effects( fit_sc1_num,
categorical = TRUE)
plot(ce, plot = FALSE)[[1]] +
scale_x_continuous(expand = c(0, 1)) +
scale_y_continuous(expand = expansion(mult = c(0, 0.05)), limits = c(0, NA))
fit_sc2 <- brm(
formula = rating ~ 1 + cs(belief),
data = stemcell,
family = acat("probit")
)
marginal_effects(fit_sc2, categorical = TRUE)
fit_sc4 <- brm(
formula = bf(rating ~ 1 + belief) +
lf(disc ~ 0 + belief, cmc = FALSE),
data = stemcell,
family = cumulative("probit")
)
#-------
mtcars_clean = mtcars %>% mutate(cyl = ordered(cyl))
head(mtcars_clean)
m_cyl = brm(
cyl ~ mpg,
data = mtcars_clean,
family = cumulative('probit'),
seed = 58393
)
conditional_effects(m_cyl, categorical = TRUE)
# ----- poly trace plots -----
library(mirt)
library(tidyverse)
library(pander)
data(Science)
calculate_cumulative_icc <- function(thresholds, b, a, theta = NULL, b_is_easyness = TRUE, mult_all_by_a = FALSE) {
# parts of this function coming from: https://aidenloe.github.io/irtplots.html
# to calculate the base category probabilities for its traceplot/icc
# function derived by Rost (2004, p. 209)
p0 <- function(thresholds, a, b, theta, b_is_easyness, mult_all_by_a) {
if(b_is_easyness) {
sign <- 1
} else {
sign <- -1
}
sum_exp <- 0
cum_threshold <- 0
for (i in seq_along(thresholds)) {
cum_threshold <- cum_threshold + thresholds[i]
if (mult_all_by_a) {
exp_term <- exp(a*(i*theta + sign*cum_threshold))
} else {
exp_term <- exp(a*i*theta + sign*cum_threshold)
}
sum_exp <- sum_exp + exp_term
}
x <- 1 / (1 + sum_exp)
return(x)
}
# to calculate the threshold iccs probabilities
twopl <- function(a, b, theta, b_is_easyness, mult_all_by_a){
if(b_is_easyness) {
sign <- 1
} else {
sign <- -1
}
if (mult_all_by_a) {
x <- 1 / (1 + exp(-(a * (theta + sign*b))))
} else {
x <- 1 / (1 + exp(-(a*theta + sign*b)))
}
return(x)
}
if(is.null(theta)) {
theta <- seq(-4,4,.1)
}
if(is.null(a)) {
a <- 1
}
values <- tibble(theta = theta)
for (i in seq_along(thresholds)) {
values <- values %>% mutate('tcc{i}' := twopl(a = a, b = thresholds[i], theta, b_is_easyness, mult_all_by_a))
}
values <- values %>% mutate(cat1 = p0(thresholds, a, b, theta, b_is_easyness, mult_all_by_a))
for (i in seq_along(thresholds)) {
cat_i <- sym(glue::glue('cat{i}'))
tcc_i <- sym(glue::glue('tcc{i}'))
values <- values %>% mutate('cat{i+1}' := {{cat_i}}*{{tcc_i}}/(1-{{tcc_i}}))
}
return(values)
}
plot_cumulative_icc <- function(itemname = 'dummy', thresholds, b, a, theta = NULL, b_is_easyness = TRUE, mult_all_by_a = FALSE,
category_cc = TRUE, threshold_cc = FALSE, misc_lines = FALSE) {
# parts of this function coming from: https://aidenloe.github.io/irtplots.html
dat <- calculate_cumulative_icc(thresholds, b, a, theta, b_is_easyness, mult_all_by_a)
# removes not requested data
if (!category_cc) {
dat <- dat %>% select(-starts_with('cat'))
}
if (!threshold_cc) {
dat <- dat %>% select(-starts_with('tcc'))
}
longer.format <- dat %>% pivot_longer(cols = c(starts_with('tcc'), starts_with('cat')), names_to = 'curve', values_to = 'measurement')
# title
if(category_cc & threshold_cc) {
types <- 'CCC and TCC'
} else {
if(category_cc) types <- 'CCC'
if(threshold_cc) types <- 'TCC'
}
title <- glue::glue('{types} Plot for Item {itemname}')
# plot chart
g <- ggplot(longer.format, aes(theta, measurement, colour=curve)) +
geom_line() +
ggtitle(title) +
xlab(expression(theta)) +
ylab(expression(P(theta))) +
theme_bw() # +
# theme(text = element_text(size=16),
# axis.text.x=element_text(colour="black"),
# axis.text.y=element_text(colour="black"),
# legend.title=element_blank())
if(misc_lines) {
n <- ncol(dat)-1
color_pal <- scales::hue_pal()(n)
offset <- ifelse(category_cc & threshold_cc, n/2+1, 0)
g <- g + geom_hline(aes(yintercept = 0.5), linetype = "dashed")
for (i in seq_along(thresholds)) {
g <- g + geom_vline(aes_(xintercept = thresholds[i]), color=color_pal[offset+i])
}
}
return(g)
}
thresholds <- cfs[3,2:4]
p0 <- function(thresholds, theta, a) {
sum_exp <- 0
cum_threshold <- 0
for (i in seq_along(thresholds)) {
cum_threshold <- cum_threshold + thresholds[i]
exp_term <- exp(a*(i*theta-cum_threshold))
sum_exp <- sum_exp + exp_term
}
x <- 1 / (1 + sum_exp)
}
cat0 <- p0(thresholds, theta, a = cfs[3,1])
model <- mirt(Science, 1, itemtype="gpcm", verbose=FALSE)
cfs <- coef(model, IRTpars = TRUE, simplify=TRUE)$items
# 2pl
twopl <- function(a, b, theta){
1 / (1 + exp(-(a * (theta - b))))}
# theta
theta <- seq(-4,4,.1)
# select item to display OCC
item <- 3
# create Operational characteristic curve
lst <- list()
for(i in 1:3) lst[[i]] <- twopl(a=cfs[item,1], b=cfs[item,i+1], theta=theta)
dat <- data.frame(theta, as.data.frame(lst))
names(dat) <- c('theta', 'b1', 'b2', 'b3')
dat <- dat %>% mutate(cat0 = .env$cat0 ,
# cat0 = 1-b1,
cat1 = cat0*b1/(1-b1),
cat2 = cat1*b2/(1-b2),
cat3 = cat2*b3/(1-b3)
)
plot_cumulative_icc(thresholds = cfs[3,2:4], b = 0, a = cfs[3,1], b_is_easyness = FALSE, mult_all_by_a = TRUE)
# wide to long format.
longer.format <- gather(dat,categorials,measurement,b1:cat4)
# # Plot item trace line (mirt package)
# plot(model, type="trace")
#
#
# # Item Parameter Estimates table
# itemPar <- cfs
# pander(itemPar, plain.ascii = TRUE, caption = "Item par estimates")
# plot chart
ggplot(longer.format, aes(theta, measurement, colour=categorials)) +
geom_line() +
ggtitle(paste('OCC Plot for Item', rownames(cfs)[item])) +
xlab(expression(theta)) +
ylab(expression(P(theta))) +
geom_vline(aes(xintercept = cfs[item,2]), color='red') +
geom_vline(aes(xintercept = cfs[item,3]), color="green") +
geom_vline(aes(xintercept = cfs[item,4]), color='blue') +
geom_hline(aes(yintercept = 0.5)) + theme_bw() +
theme(text = element_text(size=16),
axis.text.x=element_text(colour="black"),
axis.text.y=element_text(colour="black"),
legend.title=element_blank())
# ---------
data(Science)
itemtype <- 'gpcm'
mod <- mirt(Science,1, itemtype=itemtype, verbose=FALSE)
# select item to display CRC and OCC
itemSelected <- 4
# Extract all items
# Compute the probability trace lines
# Put into a list
traceline <- NULL
for(i in 1:length(Science)){
extr.2 <- extract.item(mod, i)
Theta <- matrix(seq(-6,6, by = .1))
traceline[[i]] <- probtrace(extr.2, Theta)
}
names(traceline) <- paste('item',1:length(traceline))
# rbind traceline
traceline.df <- do.call(rbind, traceline)
# create item names length based on length of theta provided
item <- rep(names(traceline),each=length(Theta))
# put them all together into a dataframe
l.format <- cbind.data.frame(Theta, item, traceline.df)
# wide to long format.
longer.format <- gather(l.format,categorials,measurement,P.1:P.4)
longer.format$item<-as.factor(longer.format$item)
# Selecting items
items <- c("item 1", "item 2", "item 3", "item 4")
item.format <-longer.format[longer.format$item == items[itemSelected],]
# item coefficient
cfs <- coef(mod, IRTpars = TRUE, simplify=TRUE)$items
# 2pl
twopl <- function(a, b, theta){
1 / (1 + exp(-(a * (theta - b))))}
# theta
theta <- seq(-6,6,.1)
# create Operational characteristic curve
lst <- list()
for(i in 1:3) lst[[i]] <- twopl(a=cfs[itemSelected,1], b=cfs[itemSelected,i+1], theta=theta)
dat <- data.frame(theta, as.data.frame(lst))
names(dat) <- c('Theta', 'b1', 'b2', 'b3')
dat <- dat %>% mutate(cat1 = b1/(b1+b2+b3),
cat2 = b2/(b1+b2+b3),
cat3 = b3/(b1+b2+b3),
cat4 = (1-b1)/(1+b1+b2+b3)
)
# wide to long format.
longer.format <- gather(dat,categorials,measurement,b1:cat4)
# Item Parameter Estimates table
# itemPar <- cfs
# plot chart
ggplot() +
geom_line(data=longer.format, aes(Theta, measurement, fill=categorials)) +
geom_line(data=item.format, aes(Theta, measurement, colour = item,fill=categorials)) +
ggtitle(paste("CRC + OCC Plot:",itemtype)) +
xlab(expression(theta)) +
ylab(expression(P(theta))) +
geom_vline(aes(xintercept = cfs[itemSelected,2]), color='black') +
geom_vline(aes(xintercept = cfs[itemSelected,3]), color="black") +
geom_vline(aes(xintercept = cfs[itemSelected,4]), color='black') +
geom_hline(aes(yintercept = 0.5), color="black") + theme_bw() +
theme(text = element_text(size=16),
axis.text.x=element_text(colour="black"),
axis.text.y=element_text(colour="black"),
legend.title=element_blank(),
legend.position="hide") +
geom_text(aes(x = -3.3, y = 0.55, label = "b1")) +
geom_text(aes(x = -1.5, y = 0.55, label = "b2")) +
geom_text(aes(x = 1.3, y = 0.55, label = "b3")) +
geom_text(aes(x = -4, y = 0.75, label = "C1", color="P.1")) +
geom_text(aes(x = -2, y = 0.58, label = "C2", color="P.2")) +
geom_text(aes(x = 0.2, y = 0.63, label = "C3", color="P.3")) +
geom_text(aes(x = 2.5, y = 0.75, label = "C4", color="P.4"))
# ------------- odds ratio ------
X19_12_17_Rohdaten_KV_und_TF <- rio::import("../../../../Tina/Analyse/database_Rohdaten/19-12-17_Rohdaten_KV_und_TF.sav")
full_data <- X19_12_17_Rohdaten_KV_und_TF %>% rename(person = Codenr)
data_kft <- full_data %>% select(KFT_1:KFT_25)
fit_kft_1pl <- rio::import('models/kft_1PL.rds')
y_rep <- rstan::extract(fit_kft_1pl$fit, pars = "log_lik", include = FALSE)
rep <- 500
J <- ncol(data_kft)
kft_list <- NULL
for (i in seq_len(rep)) {
kft_list[[i]] <- data_kft
}
data_kft_array <- array(data = unlist(kft_list), dim = c(nrow(data_kft), J, rep))
discrepancy_measures <- list()
discrepancy_measures$or <- matrix(data = 0, nrow = rep, ncol = (J^2 - J)/2) %>% as.data.frame()
or_pre <- calculate_odds_ratio(data_kft)
y_rep <- posterior_predict_long(fit_kft_1pl, n_samples = 500)
ppe <- posterior_epred_long(fit_kft_1pl, n_samples = 500)
data_rep <- y_rep %>% select(-response) %>% pivot_wider(names_from = 'item', values_from = 'yrep') %>% select(-person) %>%
group_by(.draw) %>% group_split(.keep = FALSE)
data_kft_array <- list2array(data_rep)
# data_kft_array <- list2array(kft_list)
or_post <- calculate_odds_ratio(data_kft_array)
or_pre_data <- or_pre %>% t() %>% as_tibble(rownames = ".variable") %>% rename(or_pre = V1)
# OR
# PPP
# slower with loop
# microbenchmark::microbenchmark({
# dif <- or_post
# or_pre2 <- as.numeric(or_pre)
# for(i in 1:length(or_pre2)) {
# dif[i] <- dif[i]-or_pre2[i]
# }
# })
# faster via lapply
microbenchmark::microbenchmark({
or_pre2 <- rep_dataframe(or_pre, nrow(or_post))
dif <- or_post-or_pre2
})
dif_hdi <- dif %>% gather_variables() %>% mode_hdi(.width = .89) %>% mutate(includes_zero = .lower <= 0 & .upper >= 0,
too_high = .lower >= 0+sd(or_pre_data$or_pre/10),
too_low = .upper <= 0-sd(or_pre_data$or_pre/10),
# prob_too_high = .lower >= 0 & !too_high,
# prob_too_low = .upper <= 0 & !too_low
)
orPPP <- colMeans(dif>0) %>% as_tibble(rownames = "itempair") %>% arrange(itempair)
ppp_data <- orPPP %>% mutate(itempair = str_remove(itempair, 'ItemPair')) %>% separate(itempair, into = c('i1', 'i2'), convert = TRUE) %>%
mutate(radius = 0.5, group = seq_along(radius), OneMinorPPP = 1 - value) %>% rename(orPPP = value)
or_data <- dif_hdi %>% left_join(or_pre_data, by = '.variable') %>% mutate(.variable = str_remove(.variable, 'ItemPair')) %>%
separate(.variable, into = c('i1', 'i2'), convert = TRUE) %>%
rename(or = .value) %>% mutate(or_eval_z = ifelse(too_high | too_low, NA, scale(or)), or_z = scale(or), width = abs(.upper-.lower)) %>%
left_join(ppp_data %>% select(i1, i2, orPPP), by = c('i1', 'i2'))
max <- max(or_data$or_eval, na.rm = TRUE)
min <- min(or_data$or_eval, na.rm = TRUE)
limit_fill <- round(max(abs(max), abs(min)), 1)
itemnumber <- length(unique(ppp_data$i1))
# Scatterpie
# Heatmap wesentlich schicker!
# ggplot() + scatterpie::geom_scatterpie(aes(x=i1, y=i2), data=ppp_data, cols=c("orPPP", "OneMinorPPP")) +
# # scatterpie::geom_scatterpie(aes(x=i2, y=i1), data=ppp_data, cols=c('orPPP', 'OneMinorPPP')) +
# scale_x_continuous(breaks=1:itemnumber,limits=c(0, itemnumber+1)) +
# scale_y_continuous(breaks=1:itemnumber+1,limits=c(1, itemnumber+2)) +
# coord_fixed() + xlab("Item A") + ylab("Item B") + theme(legend.position="none")
# (1) Plot OR-PPP-values
ppp <- ggplot(ppp_data, aes(i1, i2, fill = orPPP, label = orPPP)) +
scale_x_continuous("Item A", expand=c(0,0), position = "top", breaks = seq(min(ppp_data$i1),max(ppp_data$i1),1)) +
scale_y_continuous("Item B", expand=c(0,0), breaks = seq(min(ppp_data$i2),max(ppp_data$i2),1)) +
ggtitle('Odds Ratio PPP') +
geom_tile(color="black") +
scale_fill_gradient2(low = "blue", high = "red", mid = "white", midpoint = .5,
limit = c(0, 1), name = "PPP") +
# geom_text(aes(i1, i2, fill = orPPP), color = "black", size = 3) +
theme(panel.grid.major = element_blank(), panel.border= element_rect(size=2,color="black", fill=NA), axis.ticks = element_blank()) +
coord_fixed()
# (2) Plot OR-dif values
# grau zeigt die Items, wo 0 nicht im CI ist, daher sehr angenehm zu interpretieren
# PPP-Plot hilft, die Richtung der Abweichung zu erkennen
# Buchstaben machen PPP-Plot aber vielelicht überflüssig.
# Die meisten Werte können nciht von 0 abgegrenzt werden. Aber auch nicht mit 0 gleichgesetzt werden.
# Sie haben HDI sowohl innerhalb und als auch außerhalb des ROPE.
# Einige sinf klar verschieden von 0. Kein Item hat ald ORdiff mit Sicherheit 0.
or_dif <- ggplot(or_data, aes(i1, i2, fill = or_eval_z, label = or_eval_z, height = 1, width = 1)) +
scale_x_continuous("Item A", expand=c(0,0), position = "top", breaks = seq(min(or_data$i1),max(or_data$i1),1)) +
scale_y_continuous("Item B", expand=c(0,0), breaks = seq(min(or_data$i2),max(or_data$i2),1)) +
ggtitle('z-standardised Odds Ratio difference') +
geom_tile(color="black") +
# geom_tile(data = subset(or_data, too_low | too_high), fill = 'gray50', color="black") +
scale_fill_gradient2(low = "#0571b0", high = "#ca0020", mid = "#f7f7f7",
midpoint = 0, limit = c(-1, 1), name = "z(OR diff)", oob = scales::squish, na.value = 'gray50') +
# geom_text(aes(item_i, item_j, fill = PPP), color = "black", size = 3) +
geom_text(data = subset(or_data, too_low), aes(label = 'L'), size = 4) +
geom_text(data = subset(or_data, too_high), aes(label = 'H'), size = 4) +
# geom_text(data = subset(or_data, prob_too_high), aes(label = 'h'), size = 4) +
# geom_text(data = subset(or_data, prob_too_low), aes(label = 'l'), size = 4) +
theme(panel.grid.major = element_blank(), panel.border= element_rect(size=2, color="black", fill=NA), axis.ticks = element_blank()) +
coord_fixed()
or_dif
# (3) Combine two plots: OR diff and PPP
gridExtra::grid.arrange(or_dif, ppp, ncol = 2)
# (2) Plot OR values
# kaum Aussagekraft für sich alleion, da keine Verteilung gegeben (und fast alles rot)
ggplot(or_data, aes(i1, i2, fill = or_pre, label = or_pre)) +
scale_x_continuous("Item A", expand=c(0,0), position = "top", breaks = seq(min(or_data$i1),max(or_data$i1),1)) +
scale_y_continuous("Item B", expand=c(0,0), breaks = seq(min(or_data$i2),max(or_data$i2),1)) +
ggtitle('Odds Ratio difference') +
geom_tile(color="black") +
scale_fill_gradient2(low = "blue", high = "red", mid = "white",
midpoint = 2.5, limit = c(0, 7), name = "OR") +
# geom_text(aes(item_i, item_j, fill = PPP), color = "black", size = 3) +
theme(panel.grid.major = element_blank(), panel.border= element_rect(size=2,color="black", fill=NA), axis.ticks = element_blank()) +
coord_fixed()
tibble::tribble(
~x, ~y,
0, 1,
1, 0,
1, 1,
0, 0,
1, 0
) %>% calculate_odds_ratio()
or_data %>% plot_ppmc_or_heatmap()
# ----- odss ratio ci tests -----
or <- function(mat,corr=0) {
or <- (mat[,1]+corr)*(mat[,2]+corr)/((mat[,3]+corr)*(mat[,4]+corr))
}
or <- function(mat,corr=0) {
or <- (mat[,1])*(mat[,2])/((mat[,3]+corr)*(mat[,4]))
}
# {
# corrs <- unique(round(seq(1,500,1)/1000,10))
# m <- matrix(round(runif(40000, 0, 1000),0), ncol = 4)
# colnames(m) <- c('c00', 'c11', 'c01', 'c10')
# m_dat <- m %>% as_tibble() %>% mutate(odds = or(m,0))
# for (i in seq_along(corrs)) {
# odd <- paste0('odds_',round(corrs[i],5))
# m_dat <- m_dat %>% mutate({{odd}} := odds-or(m,corrs[i]))
# }
# # m_dat <- m_dat %>% filter(is.finite(odds))
# diff <- m_dat %>% select(starts_with('odds_')) %>% tidybayes::gather_variables() %>% group_by(.variable) %>%
# tidybayes::median_hdi(.width = .5)
# diff <- diff %>% separate(.variable, '_', into = c('prefix', 'num'), convert = TRUE) %>% rename(diff = .value, correction = num) %>% select(-prefix)
# diff %>% ggplot(aes(x= correction, y = diff)) + geom_ribbon(aes(x= correction, ymin = .lower, ymax = .upper), fill='grey70') + geom_line()
# }
# Haldane-Korrektur Simulation
# Je größer die Stichprobe (Anzahl Bearbeitungen <=> Werte in der Kontingenztabelle), desto kleiner der Effekt des Korrekturterms
# Der Fehler wird mit kleinerem Korrektur Term ebenfalls kleiner; aber alles auf einer sehr niedrigen Ebene (wenn die Bearbeitungszahl groß ist)
# hingegen wird die Differenz zum Wert wenn eine 0 eigentlich eine 1 sein würde für kleine Werte exponentiell größer
corrs <- unique(round(seq(1,5000,1)/4000,10))
m1 <- matrix(round(runif(40000, 0, 100),0), ncol = 4)
m <- m1
m[,3] <- 0
m2 <- m1
m2[,3] <- 1
odds_reference <- or(m2, 0)
odds_reference[is.infinite(odds_reference)] <- 1000000
{
c <- map(corrs, .f = ~or(mat = m, corr = .x))# %>% as.data.frame() %>% setNames(paste0('corr_',corrs))
c1 <- map(.x = c, .f = ~(abs(odds_reference - .x)))
c2 <- map(.x = c1, .f = ~median(.x)) %>% as.data.frame() %>% t() %>% `colnames<-`('diff') %>% as_tibble(rownames = 'corr')
c3 <- map(.x = c1, .f = ~HDInterval::hdi(.x, credMass = .89)) %>% as.data.frame() %>% t() %>% `colnames<-`(c('lower89', 'upper89'))
c3b <- map(.x = c1, .f = ~HDInterval::hdi(.x, credMass = .6)) %>% as.data.frame() %>% t() %>% `colnames<-`(c('lower60', 'upper60'))
c3c <- map(.x = c1, .f = ~HDInterval::hdi(.x, credMass = .75)) %>% as.data.frame() %>% t() %>% `colnames<-`(c('lower75', 'upper75'))
c4 <- cbind(c2, c3, c3b, c3c) %>% as_tibble(rownames = NULL)%>% mutate(corr = corrs)
c4 %>% ggplot(aes(x= corr, y = diff)) + geom_ribbon(aes(x= corr, ymin = lower89, ymax = upper89), fill='grey89') +
geom_ribbon(aes(x= corr, ymin = lower75, ymax = upper75), fill='grey75') +
geom_ribbon(aes(x= corr, ymin = lower60, ymax = upper60), fill='grey60') +
geom_line() + coord_cartesian(ylim = c(0,1000))
}
or2 <- function(mat,corr=0,ci=.89) {
z <- qnorm(ci+(1-ci)/2)
or <- (mat[,1]+corr)*(mat[,2]+corr)/((mat[,3]+corr)*(mat[,4]+corr))
se <- sqrt(1/(mat[,1]+corr)+1/(mat[,2]+corr)+1/(mat[,3]+corr)+1/(mat[,4]+corr))
upper <- exp(log(or)+z*se)
lower <- exp(log(or)-z*se)
return(list(or = or, lower = lower, upper = upper, ci = ci))
}
or_ci_uncond <- function(mat, ci = .89) {
y1 <- mat[,1] #n11
y2 <- mat[,4] #n01
n1 <- mat[,1] + mat[,3] #n11 + n10
n2 <- mat[,4] + mat[,2] #n01 + n00
return(PropCIs::orscoreci(y1, n1, y2, n2, ci))
}
# n11, n00, n10, n01
counts <- c(5,4,4,2)
# der Median des bayes mit Jeffrey prior entspricht eher dem OR des inferenzstat.
# Der Zugewinn von Bayes hauptsächlich in der 0 korrektur
# Sonst sind alle Werte sehr ähnlich
or_ci_uncond(matrix(counts, ncol = 4), .95)
# or_ci_uncond2(matrix(counts, ncol = 4), .95)
or_ci_bayes(matrix(counts, ncol = 4), .95, k = 0.5) # Jeffrey
or_ci_bayes(matrix(counts, ncol = 4), .95, k = 1) # uniform
or_ci_bayes_with_median(matrix(counts, ncol = 4), .95, k = 0.5)# Jeffrey
or_ci_bayes_with_median(matrix(counts, ncol = 4), .95, k = 1) # uniform
# vari <- matrix(NA, ncol = 5, nrow = 100) %>% as.data.frame()
# for (i in 1:100) {
# for (j in 1:5) {
# res <- or_ci_bayes_with_median(matrix(counts, ncol = 4), .95, k = 0.5, nsim = 100*(10^j))
# vari[i,j] <- res$median
# }
# }
#
# vari2 <- vari
# n11, n00, n10, n01
or2(matrix(counts, ncol = 4),0,.95)
or2(matrix(counts, ncol = 4),0.5,.95)
# n11, n00, n10, n01
ORtable<-matrix(c(counts[1],counts[3],counts[4],counts[2]),nrow = 2, ncol = 2)
ORtable
epitools::oddsratio.wald(ORtable, conf.level = .95)
epitools::oddsratio.small(ORtable, conf.level = .95)
# orscoreci2 <-
# function(x1,n1,x2,n2,conf.level){
# px = x1/n1
# py = x2/n2
# if(((x1==0) && (x2==0)) || ((x1==n1) && (x2==n2))){
# ul = 1/0
# ll = 0
# theta = NaN
# }
# else if((x1==0) || (x2==n2)){
# ll = 0
# theta = 0.01/n2
# ul = limit(x1,n1,x2,n2,conf.level,theta,1)
# }
# else if((x1==n1) || (x2==0)){
# ul = 1/0
# theta = 100*n1
# ll = limit(x1,n1,x2,n2,conf.level,theta,0)
# }
# else{
# theta = px/(1-px)/(py/(1-py))/1.1
# ll = limit(x1,n1,x2,n2,conf.level,theta,0)
# theta=px/(1-px)/(py/(1-py))*1.1
# ul = limit(x1,n1,x2,n2,conf.level,theta,1)
# theta = px/(1-px)/(py/(1-py))
# }
# cint <- c(ll, ul)
# attr(cint, "conf.level") <- conf.level
# rval <- list(conf.int = cint)
# class(rval) <- "htest"
# return(list(rval, theta))
# }
or_ci_uncond2 <- function(mat, ci = .89) {
x1 <- mat[,1] #n11
x2 <- mat[,4] #n01
n1 <- mat[,1] + mat[,3] #n11 + n10
n2 <- mat[,4] + mat[,2] #n01 + n00
return(orscoreci2(x1, n1, x2, n2, ci))
}
p = seq(0,1, length=100)
plot(p, dbeta(p, 100, 100), ylab="density", type ="l", col=4)
lines(p, dbeta(p, 0.5, 0.5), type ="l", col=3)
lines(p, dbeta(p, 2, 2), col=2)
lines(p, dbeta(p, 1, 1), col=1)
legend(0.7,8, c("Be(100,100)","Be(0.5,0.5)","Be(2,2)", "Be(1,1)"),lty=c(1,1,1,1),col=c(4,3,2,1))
limit <-
function(x,nx,y,ny,conflev,lim,t){
z = qchisq(conflev,1)
px = x/nx
score= 0
while ( score < z){
a = ny*(lim-1)
b = nx*lim+ny-(x+y)*(lim-1)
c = -(x+y)
p2d = (-b+sqrt(b^2-4*a*c))/(2*a)
p1d = p2d*lim/(1+p2d*(lim-1))
score = ((nx*(px-p1d))^2)*(1/(nx*p1d*(1-p1d))+1/(ny*p2d*(1-p2d)))*(nx+ny-1)/(nx+ny) ## added *(nx+ny-1)/(nx+ny)
ci = lim
if(t==0) { lim = ci/1.001 }
else{ lim = ci*1.001 }
}
return(ci)
}
or_ci_bayes <- function(mat, ci = .89, k) {
y1 <- mat[,1] #n11
y2 <- mat[,4] #n01
n1 <- mat[,1] + mat[,3] #n11 + n10
n2 <- mat[,4] + mat[,2] #n01 + n00
return(PropCIs::orci.bayes(y1, n1, y2, n2, k, k, k, k, ci))
}
or_ci_bayes_with_median <- function(mat, ci = .89, k, nsim = 10000000) {
y1 <- mat[,1] #n11
y2 <- mat[,4] #n01
n1 <- mat[,1] + mat[,3] #n11 + n10
n2 <- mat[,4] + mat[,2] #n01 + n00
return(orci.bayes_with_median(y1, n1, y2, n2, k, k, k, k, ci, nsim))
}
count_to_rate <- function(mat) {
y1 <- mat[1] #n11
y2 <- mat[4] #n01
n1 <- mat[1] + mat[3] #n11 + n10
n2 <- mat[4] + mat[2] #n01 + n00
return(c(y1, n1, y2, n2))
}
orci.bayes_with_median <- function(x1,n1,x2,n2,a,b,c,d,conf.level=0.95, nsim = 10000000)
{
or.app<- function(a1,b1,c1,d1,conf.level,nsim=nsim)
{
z1 <- rf(nsim, 2*a1,2*b1)
z2 <- rf(nsim, 2*c1,2*d1)
a <- (d1/c1)/(b1/a1)
z <- a*z1/z2
z <- sort(z)
return(z)
}
# Bayes tail interval with beta priors
fct.F<- function(x,t,a1,b1,a2,b2){
c <- (b2/a2)/(b1/a1)
df(x,2*a2,2*b2)*pf(x*t/c,2*a1,2*b1)
}
or.F <- function(t,a1,b1,a2,b2)
{
return(integrate(fct.F,0,Inf,t=t,a1=a1,b1=b1,a2=a2,b2=b2)$value)
}
or.fct <- function(ab,a1,b1,c1,d1,conf.level)
{
abs(or.F(ab[2],a1,b1,c1,d1) - (1 - (1-conf.level)/2))+
abs(or.F(ab[1],a1,b1,c1,d1) - (1-conf.level)/2)
}
# browser()
if(x2!=n2){
a1 <- a + x1
b1 <- b + n1 - x1
c1 <- c + x2
d1 <- d + n2 - x2
z <- or.app(a1,b1,c1,d1,conf.level,nsim)
med <- median(z)
lq <- nsim * (1-conf.level)/2
uq <- nsim * (1 - (1-conf.level)/2)
ci <- array(0,2)
ci[1] <- z[lq]
ci[2] <- z[uq]
start <- ci
tailci <- optim(start,or.fct,a1=a1,b1=b1,c1=c1,d1=d1,
conf.level=conf.level,control=list(maxit=20000))$par
if(tailci[1] < 0) tailci[1] <- 0
}
else{
a1 <- a + n1 - x1
b1 <- b + x1
c1 <- c + n2 - x2
d1 <- d + x2
z <- or.app(a1,b1,c1,d1,conf.level,nsim)
med <- median(1/z)
lq <- nsim * (1-conf.level)/2
uq <- nsim * (1 - (1-conf.level)/2)
ci <- array(0,2)
ci[1] <- z[lq]
ci[2] <- z[uq]
start <- ci
tailci1 <- optim(start,or.fct,a1=a1,b1=b1,c1=c1,d1=d1,
conf.level=conf.level,control=list(maxit=20000))$par
# if(tailci1[1] < 0) tailci1[1] <- ci[1]
tailci <- array(0,2)
tailci[1] <- 1/ tailci1[2]
tailci[2] <- 1/ tailci1[1]
}
return(list(ci = tailci, median = med#, ci2 = c(1/ci[[2]], 1/ci[[1]]), mode = 1/get_mode(z), mean = 1/mean(z)
)
)
}
# ------ Mode tests- -----
library(modeest)
# modeest::hsm() works the way a mode is assumed to work in my eyes
allmodes <- function(z) {
c(asselin(z),
#grenander(x),
#hrm(x),
hsm(z),
#lientz(z),
#meanshift(z),
shorth(z),
#naive(z),
#parzen(z),
#tsybakov(x),
venter(z),
#vieu(z)
get_mode(z)
)
}
# ------ tyrcatch ------
log_list <- list()
suppressWarnings(
withCallingHandlers({
for (i in 1:10) {
x <- or_data3 %>% filter(item1 == 1, item2 == 2) %>% unnest(or_dif_samples) %>% hsm_hdi(or_dif)
}
}, warning = function(w) {log_list <<- c(log_list, w)})
)
w <- log_list %>% unlist() %>% unique()
warning(w)
aggregate_warnings({
for (i in 1:10) {
x <- or_data3 %>% filter(item1 == 1, item2 == 2) %>% unnest(or_dif_samples) %>% hsm_hdi(or_dif)
}
y <- 2
})
# -------------- odds ratio IRT limits ------------
ai <- 1
aj <- 1
sig <- 1
beta <- 0
upper_limit <- exp(ai*aj*sig^2)
lower_limit <- exp(log(upper_limit)/(1+sig^2*(beta+(ai^2+aj^2)/4)))
or_lim_eval <- or_data %>% unnest(or_act_ci) %>% mutate(in_limits = ifelse(.lower > or_limits_irt(1,1,c(1,1.6))$lower_limit &
.upper < or_limits_irt(1,1,c(1,1.6))$upper_limit, TRUE, FALSE),
out_of_limits = ifelse(.lower > or_limits_irt(1,1,c(1,1.6))$upper_limit |
.upper < or_limits_irt(1,1,c(1,1.6))$lower_limit, TRUE, FALSE),
point_in_limits = ifelse(or_act > or_limits_irt(1,1,c(1,1.6))$lower_limit &
or_act < or_limits_irt(1,1,c(1,1.6))$upper_limit, TRUE, FALSE),
point_put_of_limits = ifelse(or_act > or_limits_irt(1,1,c(1,1.6))$upper_limit |
or_act < or_limits_irt(1,1,c(1,1.6))$lower_limit, TRUE, FALSE))
y <- make_responsedata_wider(fit_1d_1pl_spm1_full) %>% select(-dplyr::any_of(unlist(fit_1d_1pl_spm1$var_specs)))
counts <- count_for_itempair_or(y[,1], y[,6])
v <- contingency2successratio(counts)
z <- get_or_distribution(counts, k = .5, nsim = 4000)
sigma <- fit_1d_1pl_spm1_full %>% tidybayes::spread_draws(sd_person__Intercept) %>% pull(sd_person__Intercept)
lims <- or_limits_irt(sigma = sigma)
ll <- lims[[1]]
ul <- lims[[2]]
(z-ll) %>% log() %>% as_tibble() %>% plot_ppmc_ridges(value)
(z-ll) %>% log() %>% plot_ppmc_distribution(clean_data = TRUE)
(sample(z, 10000, replace = TRUE)-sample(ll, 10000, replace = TRUE)) %>% plot_ppmc_distribution()
(sample(ul, 10000, replace = TRUE)-sample(z, 10000, replace = TRUE)) %>% plot_ppmc_distribution()
(z-ll) %>% hsm_hdi()
(sort(z)-sort(ll)) %>% hsm_hdi()
# (sample(z, 10000, replace = TRUE)-sample(ll, 10000, replace = TRUE)) %>% as_tibble() %>%
# plot_ppmc_ridges(value, n = 1000000, custom_ci = .$value %>% ggdist::hdi(.width = .89), range = c(-3, 2)) + xlim(-5, 10)
# (sample(ul, 10000, replace = TRUE)-sample(z, 10000, replace = TRUE)) %>% as_tibble() %>%
# plot_ppmc_ridges(value, n = 1000000, custom_ci = .$value %>% ggdist::hdi(.width = .89), range = c(-80, 5)) + xlim(-150, 20)
or <- counts[[1]]*counts[[2]]/(counts[[3]]*counts[[4]])
(or-ll) %>% plot_ppmc_distribution()
(ul-or) %>% plot_ppmc_distribution()
(or-ll) %>% tidybayes::median_hdi()
or_data_qi <- or_data %>% tidyr::unnest(or_dif_samples, keep_empty = TRUE) %>% group_by(item1, item2) %>% tidybayes::mean_qi(or_dif) %>%
tidyr::nest(or_dif_qi = c(.lower, .upper, .width, .point, .interval)) %>% rename(or_dif_mean = or_dif)
or_data <- or_data %>% left_join(or_data_qi)
or_data_temp <- or_data %>% tidyr::unnest(or_dif_qi, keep_empty = TRUE) %>% dplyr::mutate(above_zero = .lower > 0, beneath_zero = .upper < 0)
for (i in -500:500) {
print(paste(i, brms::logit_scaled(brms::inv_logit_scaled(i))))
}
or_data %>% plot_or_heatmap( )
testdata <- birtms::compose_dataset(response_data = data_spm, response_columns = i1:i12)
formula_1d_1pl <- birtms::build_formula()
fit_1d_1pl_spm1_full <- birtms::birtm(data = testdata, formula = formula_1d_1pl,
file = 'models/fit_1d_1pl_spm1_full')
model_specs <- list(item_parameter_number = 2)
testdata <- birtms::compose_dataset(response_data = data_spm, response_columns = i1:i12)
formula_1d_2pl <- birtms::build_formula(model_specifications = model_specs)
prior_2pl_minimal <- brms::prior("normal(0, 2)", class = "b", nlpar = "skillintercept") +
brms::prior("normal(0, 1)", class = "b", nlpar = "logalpha") +
brms::prior("constant(1)", class = "sd", group = "person", nlpar = "theta")
fit_1d_2pl_spm_full <- birtms::birtm(data = testdata, formula = formula_1d_2pl, prior = prior_2pl_minimal,
model_specifications = model_specs,
file = 'models/fit_1d_2pl_spm_full')
fit_1d_1pl_spm1_itemnametest <- readRDS('models/fit_1d_1pl_spm1_itemnametest.rds')
# no posterior or_rep distribution falls inside the or_act CIs
or_dif_bonds <- or_data_1pl_full %>% tidyr::unnest(or_dif_ci, keep_empty = TRUE) %>% tidyr::unnest(or_act_ci, names_sep = "") %>%
dplyr::mutate(above_zero = .lower > 0, beneath_zero = .upper < 0,
above_rope = .lower > 0 + rope, beneath_rope = .upper < 0 - rope,
inside_or_act_ci = .lower > (or_act_ci.lower- or_act) & .upper < (or_act_ci.upper - or_act))
# --- beschränkte Dichten ----
x <- c(rnorm(100, -3, .5), runif(100, 1,2))
plot(density(x, bw = 'SJ'))
lines(density(-x+max(x)+min(x), bw = 'SJ'), col = 'red')
x2 <- c(exp(-1/runif(100,0,1)))
plot(density(x2, bw = 'SJ'))
(-x2) %>% plot_ppmc_distribution()
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.