#' @include import.r
NULL
genSimData <- function(item_loc_range, loc_by, nlevel, nitem, correlation, type_seq) {
data1 <- MASS::mvrnorm(
n = nitem,
mu = c(0,0),
Sigma = matrix(c(1,correlation,
correlation, 1), ncol = 2),
empirical = T)
data1 <-
data.frame(data1) %>%
setNames(c("ALD","Operational_Lv")) %>%
arrange(... = Operational_Lv)
data2 <-
apply(data1, 2,function(x) cut(x = x, breaks = nlevel, labels = F)) %>%
data.frame()
data2 <-
data2 %>%
mutate_all( ~paste0("Level", .) )
if(type_seq == "same") {
item_location <- seq(item_loc_range[1], item_loc_range[1] + (nitem-1)*loc_by, by = loc_by)
} else if (type_seq == "random") {
item_location <- sort(runif(nitem, item_loc_range[1], item_loc_range[2]))
} else {
item_location <- seq(item_loc_range[1], item_loc_range[2], length.out = nitem)
}
data3 <- data2 %>% mutate(
GCA = "M3",
Subject = "Math",
Grade = 3,
Round = 1,
Table = 1,
Panelist = "M3.1",
Item_ID = row_number(),
OOD = row_number(),
Loc_RP50 = item_location, .before = ALD)
return(data3)
}
estCor <- function(.data) {
ald_vector <- .data$ALD
lv_vector <- sort(unique(ald_vector))
operational_vector <- .data$Operational_Lv
ald_num <- match(ald_vector, lv_vector)
op_num <- match(operational_vector, lv_vector)
cor(ald_num, op_num)
}
simEstCutScore <- function(true_data, WESS = F, SQRT = F) {
# true_data = test_data; WESS = F
true_data$OOD <- 1:dim(true_data)[1]
ald_vector <- true_data$ALD
lv_vector <- sort(unique(ald_vector))
nitem <- dim(true_data)[1]
item_location <- true_data %>% select(starts_with("Loc")) %>% pull()
location <- tibble(
GCA = "M3",
Item_ID = 1:nitem,
Loc_RP50 = item_location
)
cut_scores <-
cal_cs(lv_vector, ald_vector, location, threshold = F)
if(SQRT) {
cut_scores <-
cut_scores %>%
mutate_at(vars(matches("_W$")), ~ sqrt(.x))
}
cut_scores <-cut_scores %>% mutate_all( round, 2)
cutPoint <- cal_minp(cut_scores)
selected_CP <- select_cp(cutPoint, cut_scores, WESS)
op_num <- rep(0, nrow(true_data))
op_num[ selected_CP ] <- 1
Operational_name <- get_opname1(true_data, lv_vector, op_num)
data_in_use <-
true_data %>%
bind_cols(., OPL = Operational_name, cut_scores)
return(list(res = data_in_use, selected_CP = selected_CP))
}
mkPlot <- function(.data, WESS, fit = T, font_size = 12) {
cut_scores <- .data$selected_CP
cut_char <- paste(paste0("Level",2:(length(cut_scores)+1)),cut_scores, sep = "=", collapse = "\n")
.data <- .data$res
cut_weight <- .data$Loc_RP50[cut_scores]
correlations <- round(cor(.data[ c("Loc_RP50", "ALD", "OPL")]),3)
correlations <- correlations[lower.tri(correlations)]
correlations <- paste0("\ncor bw loc and ALD = ",correlations[1],"\n",
"cor bw loc and OPL = ",correlations[2],"\n",
"cor bw OPL and ALD = ",correlations[3],"\n")
correlations <- paste0(cut_char, correlations)
x_axis <- .data$Loc_RP50
# steps<- round(length(x_axis)/8,0)
# steps<-sapply(1:8, function(i) {1 + steps * (i)})
# r_sample <- unique(sort(c(1, 5, 20, cut_scores, length(x_axis)-20, length(x_axis)-5, length(x_axis))))
r_sample <- cut_scores
x_breaks <- round(x_axis[r_sample],1)
# c(min(x_axis),cut_scores,max(x_axis))
if(WESS){
.data <- .data %>%
select(starts_with("Loc_"), ends_with("_W")) %>%
gather("Levels","weights", -starts_with("Loc_"))
names(.data)[1] <- c("Location")
x_var <- "Location"
} else {
# .data <- .data %>% select(Loc_RP67, L2, L3)
.data <- .data %>% select(starts_with("OOD"), matches("^L[0-9]$"))
x_axis <- 1:dim(.data)[1]
.data <- .data %>% gather("Levels","counts", -starts_with("OOD"))
names(.data)[1] <- c("OOD")
x_var <- "OOD"
r_sample <- unique(c(1,cut_scores, length(x_axis)))
# x_breaks <- r_sample
x_breaks <- cut_scores
cut_weight <- cut_scores
}
y_axis <- .data[,3]
p1 <- ggplot(.data, aes_string(x_var, names(.data)[3], colour = "Levels") ) +
geom_point(aes(shape = Levels), alpha = 0.5) +
geom_vline(xintercept = cut_weight, colour = "blue", size = 1.5, linetype = "dotted", alpha = 0.5) +
annotate("text", x = mean(x_axis), y = max(y_axis)-50, label = correlations) +
scale_x_continuous(breaks = x_breaks) +
theme_bw(base_size = font_size)
if(fit){
p1<- p1 + stat_smooth(method = lm, formula = y ~ x + I(x^2), alpha = 0.4)
}
p1
}
getCookD <- function(.data, Level) {
.data <- .data %>% select(Loc_RP67, ends_with("_W"))
lm_formula <-
as.formula(
glue::glue("{Level} ~ Loc_RP67 + I(Loc_RP67^2)")
)
l2 <- lm(lm_formula, data = .data)
cooks.distance(l2)
}
calMin <- function(.data, Level) {
.data <- .data %>% select(Loc_RP67, L2_W, L3_W)
lm_formula <-
as.formula(
glue::glue("{Level} ~ Loc_RP67 + I(Loc_RP67^2)")
)
l2 <- lm(lm_formula, data = .data)
# Using Tayler's series
# f(x) = b0 + b1x + b2x^2
# a_x = - b1 / (2*b2)
# a_y = b0 - ((b1^2)/(4*b2)) = b0 - b2*(a_x)^2
b <- unname(l2$coefficients)
a_x = - b[2] / (2*b[3])
a_y = b[1] - (b[3]*(a_x)^2)
c(minimizer = a_x, miminum = a_y)
}
shiftMean <- function(theta_mat, mean_vec){
col_means <- colMeans(theta_mat)
mean_centered <- theta_mat - matrix(rep(col_means, dim(theta_mat)[1]), nrow = dim(theta_mat)[1], byrow = T)
theta_mat <- mean_centered + matrix(rep(mean_vec, dim(theta_mat)[1]), nrow = dim(theta_mat)[1], byrow = T)
return(theta_mat)
}
#
genTheta <- function(theta, mean_vec, cor_mat) {
stopifnot( length(mean_vec) == dim(cor_mat)[2] )
stopifnot( eigen( cor_mat )$values > 0 )
n_col <- ifelse(is.null(dim(theta)), 1, dim(theta)[2])
n <- ifelse(is.null(dim(theta)), length(theta), dim(theta)[1])
k <- ncol(cor_mat)
x <- matrix( rnorm(n*k), nc=k )
x[,seq_len(n_col)] <- theta
y <- x %*% solve(chol(var(x))) %*% chol(cor_mat) # cor(y)
y[,seq_len(n_col)] <- theta #
y <- shiftMean(y, mean_vec)
return(y)
}
simALD <- function(existing_theta, cor_value, nlevel) {
given_correlation <- matrix(c(
1, cor_value,
cor_value, 1 ), 2, 2 )
given_means <- c(mean(existing_theta), 0)
new_theta <- genTheta(theta = existing_theta, mean_vec = given_means, cor_mat = given_correlation)
new_level <- cut(x = new_theta[,2], breaks = nlevel, labels = F)
return(new_level)
}
genLoc <- function(fun, ...){
existing_theta <- match.fun(fun)(...)
existing_theta <- sort(existing_theta)
existing_theta
}
genFakeData <- function(fun, cor_value, nlevel, ...) {
Loc_RP50 <- genLoc(fun, ...)
new_ald <- simALD(Loc_RP50, cor_value, nlevel)
fake_data <- bind_cols(Loc_RP50 = Loc_RP50, OOD = 1:length(Loc_RP50), ALD = new_ald)
fake_data
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.