knitr::opts_chunk$set(echo = TRUE, cols.print = 15, rows.print = 30)

library(ESS)
library(foreach)
library(tidyverse)
select <- dplyr::select
# example_data <- ESS::read_data("example_data.xlsx")
# data_list <- ESS::data_ready(example_data)

Generate a fake data set

The ALD and true_level is generated based on the correlation. First, generate the random data based on the correlation. Second, categorize the random numbers based on the number of levels.

The distance between locations are identical (type_seq = same).

# generate true levels
true_data <- genSimData(item_loc_range = c(200, 500), loc_by = 1, nitem = 100, nlevel = 3, correlation = 0.1, type_seq = "random")

true_data <- true_data %>% rename("true_level"="Operational_Lv")
true_data

Estimate the cut scores using ESS-weights method {.tabset}

Results

# test
test_data <- simEstCutScore(true_data, WESS = T)
# estCor(true_data)
# estCor(test_data)
test_data %>% relocate(ALD, .after = L3_W) %>% select(-Grade, -Round, -Table, -Panelist) %>% mutate(true_level = true_data$true_level)

plots by each level

lm_formula <-
  as.formula(
    glue::glue("L2_W ~ Loc_RP67 + I(Loc_RP67^2)")
  )

l2 <- lm(lm_formula, data = test_data)
coef(l2)
mkPlot(test_data)

Cook's distance

Determine influential outliers.

cutoff: - $D_i > 0.5$ : - $D_i > 1.0$ :

round(getCookD(test_data, "L2_W"), 3)
round(getCookD(test_data, "L3_W"), 3)

plot(getCookD(test_data, "L2_W"), main="Level 2")
plot(getCookD(test_data, "L3_W"), main="Level 3")

Calculate minimizer and minimum

$$y = b_0 + b_1x + b_2x^2 $$

Minimizer:

$$\alpha_x = - \frac{b_1}{2b_2}$$ Minimum:

$$\alpha_y = b_0 - \frac{b_1^2}{4b_2} = b_0 - b_2\alpha_x^2$$

calMin(test_data, Level = "L2_W")
calMin(test_data, Level = "L3_W")

Different distance between locations

set.seed(10001)
# generate true levels
true_data <- genSimData(item_loc_range = c(1, 200), loc_by = 1, nitem = 30, nlevel = 3, correlation = 0.1, type_seq = "random")

test_data <- simEstCutScore(true_data, WESS = T)
mkPlot(test_data)

ALD same as operational level {.tabset}

When ALD is perfectly aligned.

Results

set.seed(1001)
true_data <- genSimData(item_loc_range = c(1, 200), loc_by = 1, nitem = 20, nlevel = 3, correlation = 0.1, type_seq = "same")

true_test <- true_data %>% mutate(ALD = Operational_Lv)
test_data <- simEstCutScore(true_test, WESS = T) %>% relocate(ALD, .after = L3_W)

test_data %>% select(-Grade, -Round, -Table, -Panelist)

Plots

mkPlot(test_data)

Fit a quadratic function

lm_formula <-
  as.formula(
    glue::glue("L2_W ~ Loc_RP67 + I(Loc_RP67^2)")
  )

l2 <- lm(lm_formula, data = test_data)
coef(l2)

The estimated function is

$$L2_W = 0.53Loc^2 - 6.31Loc + 20$$

The function always follows

$$y = \frac{n(n+1)}{2}$$,

where n is a location value. In this case,

$$y = \frac{(n-6)((n-6)+1)}{2}$$

$$y = 0.5n^2 - 5.5n + 15$$

This is because of the way of calculating weights.

calMin(test_data, Level = "L2_W")


sooyongl/ESS documentation built on Dec. 23, 2021, 4:22 a.m.