R/simulation_function.r

Defines functions genSimData

#' @include import.r
NULL

genSimData <- function(item_loc_range, loc_by, nlevel, nitem, correlation, type_seq) {
  data1 <- MASS::mvrnorm(
    n = nitem,
    mu = c(0,0),
    Sigma = matrix(c(1,correlation,
                     correlation, 1), ncol = 2),
    empirical = T)

  data1 <-
    data.frame(data1) %>%
    setNames(c("ALD","Operational_Lv")) %>%
    arrange(... = Operational_Lv)

  data2 <-
    apply(data1, 2,function(x) cut(x = x, breaks = nlevel, labels = F)) %>%
    data.frame()

  data2 <-
    data2 %>%
    mutate_all( ~paste0("Level", .) )

  if(type_seq == "same") {
    item_location <- seq(item_loc_range[1], item_loc_range[1] + (nitem-1)*loc_by, by = loc_by)
  } else if (type_seq == "random") {
    item_location <- sort(runif(nitem, item_loc_range[1], item_loc_range[2]))
  } else {
    item_location <- seq(item_loc_range[1], item_loc_range[2], length.out = nitem)
  }

  data3 <- data2 %>% mutate(
    GCA = "M3",
    Subject = "Math",
    Grade = 3,
    Round = 1,
    Table = 1,
    Panelist  = "M3.1",
    Item_ID = row_number(),
    OOD = row_number(),
    Loc_RP50 = item_location, .before = ALD)

  return(data3)
}

estCor <- function(.data) {

  ald_vector <- .data$ALD
  lv_vector <- sort(unique(ald_vector))

  operational_vector <- .data$Operational_Lv

  ald_num <- match(ald_vector, lv_vector)
  op_num <- match(operational_vector, lv_vector)

  cor(ald_num, op_num)
}

simEstCutScore <- function(true_data, WESS = F, SQRT = F) {
  # true_data = test_data; WESS = F

  true_data$OOD <- 1:dim(true_data)[1]

  ald_vector <- true_data$ALD
  lv_vector <- sort(unique(ald_vector))
  nitem <- dim(true_data)[1]
  item_location <- true_data %>% select(starts_with("Loc")) %>% pull()

  location <- tibble(
    GCA = "M3",
    Item_ID = 1:nitem,
    Loc_RP50 = item_location
  )

  cut_scores <-
    cal_cs(lv_vector, ald_vector, location, threshold = F)

  if(SQRT) {
    cut_scores <-
      cut_scores %>%
      mutate_at(vars(matches("_W$")), ~ sqrt(.x))

  }
  cut_scores <-cut_scores %>% mutate_all( round, 2)

  cutPoint <- cal_minp(cut_scores)
  selected_CP <- select_cp(cutPoint, cut_scores, WESS)

  op_num <- rep(0, nrow(true_data))
  op_num[ selected_CP  ] <- 1
  Operational_name <- get_opname1(true_data, lv_vector, op_num)

  data_in_use <-
    true_data %>%
    bind_cols(., OPL = Operational_name, cut_scores)

  return(list(res = data_in_use, selected_CP = selected_CP))
}

mkPlot <- function(.data, WESS, fit = T, font_size = 12) {
  cut_scores <- .data$selected_CP
  cut_char <- paste(paste0("Level",2:(length(cut_scores)+1)),cut_scores, sep = "=", collapse = "\n")

  .data <- .data$res

  cut_weight <- .data$Loc_RP50[cut_scores]

  correlations <- round(cor(.data[ c("Loc_RP50", "ALD", "OPL")]),3)

  correlations <- correlations[lower.tri(correlations)]

  correlations <- paste0("\ncor bw loc and ALD = ",correlations[1],"\n",
         "cor bw loc and OPL = ",correlations[2],"\n",
         "cor bw OPL and ALD = ",correlations[3],"\n")

  correlations <- paste0(cut_char, correlations)
  x_axis <- .data$Loc_RP50

  # steps<- round(length(x_axis)/8,0)
  # steps<-sapply(1:8, function(i) {1 + steps * (i)})

  # r_sample <- unique(sort(c(1, 5, 20, cut_scores, length(x_axis)-20, length(x_axis)-5, length(x_axis))))
  r_sample <- cut_scores
  x_breaks <- round(x_axis[r_sample],1)

  # c(min(x_axis),cut_scores,max(x_axis))

  if(WESS){
    .data <- .data %>%
      select(starts_with("Loc_"), ends_with("_W")) %>%
      gather("Levels","weights", -starts_with("Loc_"))
    names(.data)[1] <- c("Location")
    x_var <- "Location"
  } else {
    # .data <- .data %>% select(Loc_RP67, L2, L3)
    .data <- .data %>% select(starts_with("OOD"), matches("^L[0-9]$"))

    x_axis <- 1:dim(.data)[1]

    .data <- .data %>% gather("Levels","counts", -starts_with("OOD"))
    names(.data)[1] <- c("OOD")
    x_var <- "OOD"

    r_sample <- unique(c(1,cut_scores, length(x_axis)))
    # x_breaks <- r_sample
    x_breaks <- cut_scores
    cut_weight <- cut_scores

  }

  y_axis <- .data[,3]

  p1 <- ggplot(.data,  aes_string(x_var, names(.data)[3], colour = "Levels") ) +
    geom_point(aes(shape = Levels), alpha = 0.5) +
    geom_vline(xintercept = cut_weight, colour = "blue", size = 1.5, linetype = "dotted", alpha = 0.5) +
    annotate("text", x = mean(x_axis), y = max(y_axis)-50, label = correlations) +
    scale_x_continuous(breaks = x_breaks) +
    theme_bw(base_size = font_size)


  if(fit){
    p1<- p1 + stat_smooth(method = lm, formula = y ~ x + I(x^2), alpha = 0.4)
  }
  p1
}



getCookD <- function(.data, Level) {

  .data <- .data %>% select(Loc_RP67, ends_with("_W"))

  lm_formula <-
    as.formula(
      glue::glue("{Level} ~ Loc_RP67 + I(Loc_RP67^2)")
    )

  l2 <- lm(lm_formula, data = .data)

  cooks.distance(l2)
}


calMin <- function(.data, Level) {

  .data <- .data %>% select(Loc_RP67, L2_W, L3_W)

  lm_formula <-
    as.formula(
      glue::glue("{Level} ~ Loc_RP67 + I(Loc_RP67^2)")
    )

  l2 <- lm(lm_formula, data = .data)

  # Using Tayler's series
  # f(x) = b0 + b1x + b2x^2
  # a_x = - b1 / (2*b2)
  # a_y = b0 - ((b1^2)/(4*b2)) = b0 - b2*(a_x)^2

  b <- unname(l2$coefficients)

  a_x = - b[2] / (2*b[3])
  a_y = b[1] - (b[3]*(a_x)^2)

  c(minimizer = a_x, miminum = a_y)
}


shiftMean <- function(theta_mat, mean_vec){

  col_means <- colMeans(theta_mat)
  mean_centered <- theta_mat - matrix(rep(col_means, dim(theta_mat)[1]), nrow = dim(theta_mat)[1], byrow = T)

  theta_mat <- mean_centered + matrix(rep(mean_vec, dim(theta_mat)[1]), nrow = dim(theta_mat)[1], byrow = T)

  return(theta_mat)
}

#
genTheta <- function(theta, mean_vec, cor_mat) {

  stopifnot( length(mean_vec) == dim(cor_mat)[2] )
  stopifnot( eigen( cor_mat )$values > 0 )

  n_col <- ifelse(is.null(dim(theta)), 1, dim(theta)[2])

  n <- ifelse(is.null(dim(theta)), length(theta), dim(theta)[1])
  k <- ncol(cor_mat)
  x <- matrix( rnorm(n*k), nc=k )
  x[,seq_len(n_col)] <- theta

  y <- x %*% solve(chol(var(x))) %*% chol(cor_mat) # cor(y)
  y[,seq_len(n_col)] <- theta  #

  y <- shiftMean(y, mean_vec)

  return(y)
}

simALD <- function(existing_theta, cor_value, nlevel) {
  given_correlation <- matrix(c(
    1, cor_value,
    cor_value, 1 ), 2, 2 )
  given_means <- c(mean(existing_theta), 0)
  new_theta <- genTheta(theta = existing_theta, mean_vec = given_means, cor_mat = given_correlation)

  new_level <- cut(x = new_theta[,2], breaks = nlevel, labels = F)
  return(new_level)
}

genLoc <- function(fun, ...){
  existing_theta <- match.fun(fun)(...)
  existing_theta <- sort(existing_theta)
  existing_theta
}

genFakeData <- function(fun, cor_value, nlevel, ...) {
  Loc_RP50 <- genLoc(fun, ...)
  new_ald <- simALD(Loc_RP50, cor_value, nlevel)

  fake_data <- bind_cols(Loc_RP50 = Loc_RP50, OOD = 1:length(Loc_RP50), ALD = new_ald)

  fake_data
}
sooyongl/ESS documentation built on Dec. 23, 2021, 4:22 a.m.