simulation/simulation_test.r

rm(list = ls())

library(ESS)
library(tidyverse)

# set.seed(1000)

genSimData <- function(item_loc_range, loc_by, nlevel, nitem, correlation, type_seq) {
  data1 <- MASS::mvrnorm(
    n = nitem,
    mu = c(0,0),
    Sigma = matrix(c(1,correlation,
                     correlation, 1), ncol = 2),
    empirical = T)

  data1 <-
    data.frame(data1) %>%
    setNames(c("ALD","Operational_Lv")) %>%
    arrange(... = Operational_Lv)

  data2 <-
    apply(data1, 2,function(x) cut(x = x, breaks = nlevel, labels = F)) %>%
    data.frame()

  data2 <-
    data2 %>%
    mutate_all( ~paste0("Level", .) )

  if(type_seq == "same") {
    item_location <- seq(item_loc_range[1], item_loc_range[1] + (nitem-1)*loc_by, by = loc_by)
  } else if (type_seq == "random") {
    item_location <- sort(runif(nitem, item_loc_range[1], item_loc_range[2]))
  } else {
    item_location <- seq(item_loc_range[1], item_loc_range[2], length.out = nitem)
  }

  data3 <- data2 %>% mutate(
    GCA = "M3",
    Subject = "Math",
    Grade = 3,
    Round = 1,
    Table = 1,
    Panelist  = "M3.1",
    Item_ID = row_number(),
    OOD = row_number(),
    Loc_RP67 = item_location, .before = ALD)

  return(data3)
}

estCor <- function(.data) {

  ald_vector <- .data$ALD
  lv_vector <- sort(unique(ald_vector))

  operational_vector <- .data$Operational_Lv

  ald_num <- match(ald_vector, lv_vector)
  op_num <- match(operational_vector, lv_vector)

  cor(ald_num, op_num)
}

simEstCutScore <- function(true_data, WESS) {

  ald_vector <- true_data$ALD
  lv_vector <- sort(unique(ald_vector))
  nitem <- dim(true_data)[1]
  item_location <- true_data$Loc_RP67

  location <- tibble(
    GCA = "M3",
    Item_ID = 1:nitem,
    Loc_RP67 = item_location
  )

  cut_scores <-
    cal_cs(lv_vector, ald_vector, location, threshold = F)

  cut_scores <-
    cut_scores %>%
    mutate_at(vars(matches("_W$")), ~ .x /1) %>%
    mutate_all( round, 2)

  cutPoint <- cal_minp(cut_scores)
  selected_CP <- select_cp(cutPoint, cut_scores, WESS)

  inpData <- true_data %>% select(GCA, Subject, Grade, Round, Table, Panelist, Item_ID, ALD)
  data_in_use <- true_data %>% select(GCA:ALD)

  data_in_use <-
    cut_scores %>%
    bind_cols(data_in_use, .) %>%
    relocate(., OOD, Loc_RP67, .after = Item_ID)

  op_num <- rep(0, nrow(inpData))
  op_num[ selected_CP  ] <- 1
  Operational_name <- get_opname1(inpData, lv_vector, op_num)

  data_in_use <-
    data_in_use %>%
    mutate(
      Operational_Lv = Operational_name
    )

  return(data_in_use)
}

mkPlot <- function(.data) {

  .data <- .data %>% select(Loc_RP67, ends_with("_W"))

  .data <- .data %>%
    select(Loc_RP67, ends_with("_W")) %>%
    gather("Levels","weights", -Loc_RP67)

  ggplot(.data,  aes(Loc_RP67, weights, colour = Levels) ) +
    geom_point(aes(shape = Levels)) +
    stat_smooth(method = lm, formula = y ~ x + I(x^2)) +
    theme_bw()
}


getCookD <- function(.data, Level) {

  .data <- .data %>% select(Loc_RP67, ends_with("_W"))

  lm_formula <-
    as.formula(
      glue::glue("{Level} ~ Loc_RP67 + I(Loc_RP67^2)")
    )

  l2 <- lm(lm_formula, data = .data)

  cooks.distance(l2)
}


calMin <- function(.data, Level) {

  .data <- .data %>% select(Loc_RP67, L2_W, L3_W)

  lm_formula <-
    as.formula(
      glue::glue("{Level} ~ Loc_RP67 + I(Loc_RP67^2)")
    )

  l2 <- lm(lm_formula, data = .data)

  # Using Tayler's series
  # f(x) = b0 + b1x + b2x^2
  # a_x = - b1 / (2*b2)
  # a_y = b0 - ((b1^2)/(4*b2)) = b0 - b2*(a_x)^2

  b <- unname(l2$coefficients)

  a_x = - b[2] / (2*b[3])
  a_y = b[1] - (b[3]*(a_x)^2)

  c(minimizer = a_x, miminum = a_y)
}

# generate true levels
true_data <- genSimData(item_loc_range = c(1, 200), loc_by = 1, nitem = 30, nlevel = 3, correlation = 0.5, type_seq = "same")

# test
test_data <- simEstCutScore(true_data, WESS = T)

estCor(true_data)
estCor(test_data)

mkPlot(test_data)
getCookD(test_data, "L2_W")

calMin(test_data, Level = "L2_W")
calMin(test_data, Level = "L3_W")


#
true_test <- true_data %>% mutate(ALD = Operational_Lv)
test_data <- simEstCutScore(true_test, WESS = T)

estCor(true_test)
estCor(test_data)

bind_cols(true_test$Operational_Lv,test_data$Operational_Lv) %>% print(n=50)

mkPlot(test_data)
getCookD(test_data, "L2_W")


lm_formula <-
  as.formula(
    glue::glue("L3_W ~ Loc_RP67 + I(Loc_RP67^2)")
  )

l2 <- lm(lm_formula, data = test_data)

l2 <- lm(lm_formula, data = test_data)

calMin(test_data, Level = "L2_W")
calMin(test_data, Level = "L3_W")
sooyongl/ESS documentation built on Dec. 23, 2021, 4:22 a.m.