R/bootstrap_function.r

Defines functions cutBootstrap boot_fun estCutScore

Documented in estCutScore

# bootstrapping

estCutScore <- function(use.data, cut_level = 2) { # 
  
  cut_candi <- use.data$Loc_RP50
  cut_info <- map_df(cut_candi, ~ evalInconsistency(use.data, d_alpha = 1, cut_level, .x)$cut_info)
  
  model_c <- gam(counts ~ s(cut_score), data = cut_info, family = "gaussian")
  model_w <- gam(weights ~ s(cut_score), data = cut_info, family = "gaussian")
  
  model_pred_c <- predict_gam(model_c,length_out = 10000)
  model_pred_w <- predict_gam(model_w,length_out = 10000)
  
  min_point <- which.min(model_pred_c$fit)
  y_point_c <- model_pred_c$fit[min_point]
  x_point_c <- model_pred_c$cut_score[min_point]
  
  min_point <- which.min(model_pred_w$fit)
  y_point_w <- model_pred_w$fit[min_point]
  x_point_w <- model_pred_w$cut_score[min_point]
  
  res <- list(
    model_c = model_c,
    model_w = model_w,
    cut_info=cut_info, 
    model_pred_c = model_pred_c, 
    model_pred_w = model_pred_w,
    cut_point_c = c(x_point_c, y_point_c),
    cut_point_w = c(x_point_w, y_point_w))
  
  return(res)
}

boot_fun <- function(cut_info, b_prop) {
  inp_data <-
    cut_info %>%
    sample_frac(., size = b_prop) %>%
    arrange(cut_score)
  
  model_c <- gam(counts ~ s(cut_score), data = inp_data, family = "gaussian")
  model_w <- gam(weights ~ s(cut_score), data = inp_data, family = "gaussian")
  
  model_pred_c <- predict_gam(model_c,length_out = 10000)
  model_pred_w <- predict_gam(model_w,length_out = 10000)
  
  min_point <- which.min(model_pred_c$fit)
  y_point_c <- model_pred_c$fit[min_point]
  x_point_c <- model_pred_c$cut_score[min_point]
  
  min_point <- which.min(model_pred_w$fit)
  y_point_w <- model_pred_w$fit[min_point]
  x_point_w <- model_pred_w$cut_score[min_point]
  
  res <- list(
    cut_info = inp_data,
    model_pred_c = model_pred_c, 
    model_pred_w = model_pred_w,
    cut_point_c = c(x_point_c, y_point_c),
    cut_point_w = c(x_point_w, y_point_w))
  return(res)
}

cutBootstrap <- function(cut_data, b_prop = 0.75, n_rep = 100) {

  # cut_data <- cut_lv2
  
  cut_info <- cut_data$cut_info
  
  res_boot <-
    rerun(n_rep, boot_fun(cut_info, b_prop))

    res_cut_score <-
      map(res_boot, ~ c(.x$cut_point_c, .x$cut_point_w)) %>%
      do.call("rbind", .) %>%
      data.frame() %>%
      setNames(c("cs_counts_x","cs_counts_y","cs_weights_x","cs_weights_y")) %>% 
      mutate(boot_rep = row_number())

    res_model_pred <-
      map(res_boot, 
          ~ bind_rows(.x$model_pred_c,.x$model_pred_w, .id = "type") %>%
            mutate(type = case_when(type == 1 ~ "counts", TRUE ~ "weights")))  %>%
      bind_rows(.id = "boot_rep")

    res <- list(cut_info = cut_info, bs_cut_score = res_cut_score, bs_model_pred = res_model_pred)

  return(res)

}

# cutBootstrap <- function(use.data, cut_level = 2,
#                          b_prop = 0.75, n_rep = 100) {
#   
#   cut_lv <- estCutScore(use.data, cut_level)
#   boot_fun <- function(use.data, cut_level, b_prop) {
#     inp_data <-
#       use.data %>% 
#       sample_frac(., size = b_prop) %>% 
#       arrange(Item_ID)
#     
#     cut_data <- estCutScore(inp_data, cut_level)
# 
#     cut_score_c <- cut_data$cut_point_c[1]
#     cut_score_w <- cut_data$cut_point_w[1]
# 
#     model_pred_c <- cut_data$model_pred_c
#     model_pred_w <- cut_data$model_pred_w
#     
#     # }
#     model_pred <- list(counts = model_pred_c, weights = model_pred_w)
#     cut_score <- c(cut_score_c, cut_score_w)
#     
#     list(model_pred = model_pred, cut_score = cut_score)
#   }
#   
#   res_boot <- 
#     rerun(n_rep, boot_fun(use.data, cut_level, b_prop))
#   
#   res_cut_score <- 
#     map(res_boot, ~ .x$cut_score) %>% 
#     do.call("rbind", .) %>% 
#     data.frame() %>% 
#     setNames(c("cs_counts","cs_weights"))
#   
#   res_model_pred <- 
#     map(res_boot, ~ .x$model_pred %>% 
#           bind_rows(.id = "type")) %>% 
#     bind_rows(.id = "boot_rep")
#   
#   res <- list(cut_lv = cut_lv, bs_cut_score = res_cut_score, bs_model_pred = res_model_pred)
#   return(res)
# }

cutPlot <- function(cut_data, counts = F) {

  cut_info <- cut_data$cut_info
  
  if(counts) {
    model_pred <- cut_data$model_pred_c
    cut_point <- cut_data$cut_point_c
    type <- "counts"
  } else {
    model_pred <- cut_data$model_pred_w
    cut_point <- cut_data$cut_point_w
    type <- "weights"
  }
  
  model_pred %>%
    ggplot(aes(x = cut_score, y = fit)) +
    geom_smooth_ci() +
    annotate(geom = "point", x = cut_point[1], y = cut_point[2], colour = "orange", size = 5) +
    annotate(geom = "point", x = cut_point[1], y = cut_point[2],
             colour = "white") + 
    annotate(geom = "label", x = cut_point[1], y = cut_point[2], 
             label = round(cut_point[1], 2), 
             # hjust = "left",
             vjust = -1) + # https://ggplot2-book.org/annotations.html
    geom_point(data = cut_info,
               aes_string(x = "cut_score", y = type), alpha = 0.5)
  
}

# bsPlot <- function(bs_data, counts = T) {
#   # bs_data = boot_sample_lv2
#   cut_info <- bs_data$cut_lv$cut_info
#   cut_score <- bs_data$bs_cut_score
#   
#   if(counts) {
#     type <- "counts"
#     model_pred <- bs_data$bs_model_pred %>% filter(type == "counts")
#     
#     cut_score <- cut_score %>% 
#       select(ends_with(type)) %>% 
#       mutate(boot_rep = row_number()) %>% 
#       group_split(boot_rep)
#     
#     model_pred0 <-
#       model_pred %>% 
#       group_split(boot_rep)
#       
#     cut_point <-
#       map2(model_pred0, cut_score, 
#            ~ .x %>% filter(cut_score == .y[[1]])) %>% 
#       bind_rows()
#     
#     final_cut <- c(mean(cut_point$cut_score), mean(cut_point$fit))
#     
#   } else {
#     type = "weights"
#     model_pred <- bs_data$bs_model_pred %>% filter(type == "weights")
#     
#     cut_score <- cut_score %>% 
#       select(ends_with(type)) %>% 
#       mutate(boot_rep = row_number()) %>% 
#       group_split(boot_rep)
#     
#     model_pred0 <-
#       model_pred %>% 
#       group_split(boot_rep)
#     
#     cut_point <-
#       map2(model_pred0, cut_score, 
#            ~ .x %>% filter(cut_score == .y[[1]])) %>% 
#       bind_rows()
#     
#     final_cut <- c(mean(cut_point$cut_score), mean(cut_point$fit))
#     
#   }
#   
#   p1 <- ggplot() +
#     geom_point(data = cut_info,
#              aes_string(x = "cut_score", y = type), alpha = 0.5)
#   p2 <- 
#     p1 +
#     geom_line(data = model_pred, 
#               aes(x = cut_score, y = fit, group = boot_rep), alpha=0.05)
#   p3 <- 
#     p2 +
#     geom_point(data = cut_point,
#                aes(x = cut_score, y = fit), alpha = 0.2, colour = "blue")
#     
#   p3 +
#   annotate(geom = "point", x = final_cut[1], y = final_cut[2], 
#            colour = "orange", size = 5) +
#   annotate(geom = "point", x = final_cut[1], y = final_cut[2]) + 
#   annotate(geom = "text", x = final_cut[1], y = final_cut[2], 
#            label = round(final_cut[1], 2), 
#            # hjust = "left",
#            vjust = -1) # https://ggplot2-book.org/annotations.html
# }

bsPlot <- function(bs_data, counts = T) {
  # bs_data = boot_sample_lv2
  
  type <- ifelse(counts, "counts", "weights") 
  
  cut_info <- bs_data$cut_info
  cut_score <- bs_data$bs_cut_score
  model_pred <- bs_data$bs_model_pred
  
  model_pred <- model_pred %>% filter(type == !!type) # get("type", envir = .env)
  cut_score <- cut_score %>% select(matches(!!type)) %>% set_names(c("cut_score","fit"))
  final_cut <- colMeans(cut_score)
  
  p1 <- ggplot() +
    geom_point(data = cut_info,
               aes_string(x = "cut_score", y = type), alpha = 0.5)
  p2 <- 
    p1 +
    geom_line(data = model_pred, 
              aes(x = cut_score, y = fit, group = boot_rep), alpha=0.01)
  p3 <- 
    p2 +
    geom_point(data = cut_score,
               aes(x = cut_score, y = fit), alpha = 0.2, colour = "blue")
  
  p3 +
    annotate(geom = "point", x = final_cut[1], y = final_cut[2], 
             colour = "orange", size = 5) +
    annotate(geom = "point", x = final_cut[1], y = final_cut[2],
             colour = "white") + 
    annotate(geom = "label", x = final_cut[1], y = final_cut[2], 
             label = round(final_cut[1], 2), 
             # hjust = "left",
             vjust = -1) # https://ggplot2-book.org/annotations.html
}
sooyongl/ESS documentation built on Dec. 23, 2021, 4:22 a.m.