simulation/test_sim_4_9.r

# rm(list = ls())
library(ESS)
library(foreach)
library(tidyverse)
library(readxl)
library(tidyverse)

data_list <- fs::dir_ls("simulation/data")

file_path <- data_list[1]
imprt_data <-
  file_path %>%
  excel_sheets() %>%
  map(read_excel, path = file_path) %>%
  set_names(., nm = c("setup", "panelist","rating","item_data","examinee_data"))

rating_data <- imprt_data[[3]]

(grades <- rating_data %>% pull(GCA) %>% unique())
pick_grade <- grades[1]
(levels <- rating_data %>% pull(ALD) %>% unique())
rating_data <- rating_data %>%
  mutate(ALD = case_when(ALD == "early" ~ 1, ALD == "mid" ~ 2, TRUE ~ 3))

rating_data %>% pull(User) %>% unique()
rating_data <- rating_data %>% filter(GCA == pick_grade)
item_data <- imprt_data[[4]] %>% filter(GCA == pick_grade) %>% rename("OOD" = "Order_of_Difficulty")

test_data <- rating_data %>% left_join(., item_data %>% select(Item_ID, OOD, Loc_RP50), by = c("Item_ID")) %>%
  select(Item_ID, Loc_RP50, OOD, ALD) %>%
  arrange(OOD)

hist(test_data$Loc_RP50, breaks = 100)
table(test_data$ALD)
cor(test_data$ALD, test_data$Loc_RP50)

# ddd -------------------------------------------------
test_data_WESS <- simEstCutScore(test_data, WESS = T, SQRT = T)
test_data_CESS <- simEstCutScore(test_data, WESS = F)

mkPlot(.data = test_data_WESS, WESS = T);
mkPlot(.data = test_data_CESS, WESS = F);

# simulate ALD ------------------------------------
existing_theta <- test_data$Loc_RP50
simALD <- function(existing_theta, cor_value, nlevel) {
  given_correlation <- matrix(c(
    1, cor_value,
    cor_value, 1 ), 2, 2 )
  given_means <- c(mean(existing_theta), 0)
  new_theta <- genTheta(theta = existing_theta, mean_vec = given_means, cor_mat = given_correlation)

  new_level <- cut(x = new_theta[,2], breaks = nlevel, labels = F)
  return(new_level)
}



new_ald <- simALD(test_data$Loc_RP50, .6, 3)
table(new_ald)
cor(new_ald, test_data$Loc_RP50)

test_data
new_data <- test_data %>% mutate(ALD = new_ald)

test_data_WESS <- simEstCutScore(new_data, WESS = T, SQRT = T)
test_data_CESS <- simEstCutScore(new_data, WESS = F)

mkPlot(.data = test_data_WESS, WESS = T);
mkPlot(.data = test_data_CESS, WESS = F)


table(OP=test_data_WESS$res$OPL, ALD=test_data_WESS$res$ALD)
# Even distribution of RP_i ----------------------------------
genLoc <- function(fun, ...){
  existing_theta <- match.fun(fun)(...)
  existing_theta <- sort(existing_theta)
  existing_theta
}

genFakeData <- function(fun, cor_value, nlevel, ...) {
  Loc_RP50 <- genLoc(fun, ...)
  new_ald <- simALD(Loc_RP50, cor_value, nlevel)

  fake_data <- bind_cols(Loc_RP50 = Loc_RP50, OOD = 1:length(Loc_RP50), ALD = new_ald)

  fake_data
}


# new_data<- genFakeData("rnorm", cor_value=0.2, nlevel=3, n = 1000, 300, 10)
new_data<- genFakeData("runif", cor_value=0.6, nlevel=3, n = 200, 100, 300)
input <- "runif"
new_data<- genFakeData(input, cor_value=0.6, nlevel=3, n = 100, 100, 300)

simEstCutScore(new_data, WESS = T, SQRT = F)
simEstCutScore(new_data, WESS = F, SQRT = F)
sooyongl/ESS documentation built on Dec. 23, 2021, 4:22 a.m.