Nothing
#' Prepare BILOG-MG data file
#'
#' @description Prepare \code{".dat"} data file for BILOG-MG.
#'
#' @param x Either a \\code{data.frame}, \code{matrix} or
#' \code{\link{Response_set-class}} object. Each row should represent a
#' subject (examinee) and each column represent a variable (usually an item).
#' The response values should be either 0, 1 or \code{NA}.
#' @param items A vector of column names or numbers of the \code{x} that
#' represents the responses.
#' @param examinee_id_var The column name or number that contains individual
#' subject IDs. If none is provided (i.e. \code{examinee_id_var = NULL}), the
#' program will check whether the data provided has row names.
#' @param group_var The column name or number that contains group membership
#' information if multi-group calibration is desired. Ideally, it grouping
#' variable is represented by single digit integers. If other type of data
#' provided, an integer value will automatically assigned to the variables.
#' The default value is \code{NULL}, where no multi-group analysis is
#' performed.
#' @param reference_group Represent which group's ability distribution will be
#' set to mean = 0 and standard deviation = 1. For example, if the value is 1,
#' then the group whose code is 1 will have ability distribution with mean 0
#' and standard deviation 1. When groups are assumed to coming from a single
#' population, set this value to 0.
#'
#' The default value is \code{NULL}.
#'
#' This value will be represented in BILOG-MG control file as:
#' \code{REFERENCE = reference_group}.
#' @param target_path The location where the BILOG-MG data file fill be saved.
#' For example: \code{file.path(getwd(), "data", "mydata.dat")}.
#' @param create_np_key_file If \code{TRUE}, a file that contains
#' the non-presented key will be created. This key is the same format as
#' the corresponding response data file.
#' @param overwrite If TRUE and there is already a BILOG-MG data file in the
#' target path with the same name, the file will be overwritten.
#'
#' @author Emre Gonulates
#'
#' @noRd
#'
#' @examples
#' \dontrun{
#' resp <- sim_resp(generate_ip(n = 15), rnorm(200), prop_missing = .2)
#' bilog_create_datafile(x = resp)
#' bilog_create_datafile(x = resp, items = paste0("Item-", 1:7))
#' }
#'
bilog_create_datafile <- function(
x,
items = NULL,
examinee_id_var = NULL,
group_var = NULL,
reference_group = NULL,
target_path = file.path(getwd(), "bilog_data.dat"),
create_np_key_file = FALSE,
overwrite = FALSE) {
if (!(inherits(x, c("matrix", "data.frame", "Response_set"))))
stop("Invalid data file. Data file should be either a matrix, data.frame ",
"or a 'Response_set' object.", call. = FALSE)
# Check path
target_path <- normalizePath(target_path, mustWork = FALSE)
target_dir <- dirname(target_path)
if (!grepl("(.*)\\.dat$", target_path))
stop("Invalid 'target_path' argument. Please provide a valid file name ",
"with '.dat' extension as a target_path", call. = FALSE)
# Make sure the directory exists
if (!dir.exists(target_dir))
dir.create(path = target_dir, recursive = TRUE)
if (!dir.exists(target_dir))
stop(paste0("The directory for BILOG-MG data file cannot be created at: \n",
target_dir, "\nPlease create directory manually.",
call. = FALSE))
# If x is Response_set, convert it to a data.frame and add examinee_id_var and
# group_var
if (is(x, "Response_set")) {
x_matrix <- as.matrix(x, output = "score")
if (!is.null(group_var) && is_atomic_vector(group_var)) {
if (length(group_var) == 1 && is.character(group_var) &&
is_atomic_vector(do.call("$", list(x = x, name = group_var)))) {
x_matrix <- cbind(do.call("$", list(x = x, name = group_var)),
data.frame(x_matrix))
colnames(x_matrix)[1] <- group_var
} else if (length(group_var) == nrow(x_matrix)) {
x_matrix <- cbind(group_var, data.frame(x_matrix))
group_var <- "grouping_variable"
colnames(x_matrix)[1] <- group_var
} else {
stop("Invalid 'group_var' value. For 'response_set' objects, please ",
"attach the group information to the response set by: ",
"'x$<group_var> <- values'. Please see the examples.")
}
}
x <- x_matrix
}
### Examinee ID ###
# Create the examinee_id field, if it is NULL, automatically the row names
# of the data will be the subject ids.
examinee_id <- rownames(x)
# Check whether examinee_id_var is valid
if (!is.null(examinee_id_var) && (
length(examinee_id_var) != 1 ||
!(examinee_id_var %in% 1:ncol(x) ||
(!is.null(colnames(x)) && examinee_id_var %in% colnames(x)))
))
stop("Invalid 'examinee_id_var' argument. 'examinee_id_var' value should ",
"be a column number or a column name.", call. = FALSE)
if (!is.null(examinee_id_var)) {
examinee_id <- paste0(x[, examinee_id_var, drop = TRUE])
# # Remove 'examinee_id_var" from the response data
# if (examinee_id_var %in% colnames(x)) {
# x <- x[, which(colnames(x) != examinee_id_var)]
# } else {
# x <- x[, -examinee_id_var]
# }
}
if (is.null(examinee_id)) examinee_id <- paste0(1:nrow(x))
# find the maximum number or characters in an ID
num_id_char <- max(nchar(examinee_id))
if (!num_id_char %in% 1:30)
stop("Invalid IDs. Number of characters alloted to IDs cannot be more ",
"than 30 characters.", call. = FALSE)
examinee_id <- sprintf(paste0("%", num_id_char, "s"), examinee_id)
### Item IDs ###
# If items are NULL, then use all of the items.
if (is.null(items)) {
# Check if the column names are NULL
if (is.null(colnames(x)))
colnames(x) <- paste0("Item-", 1:ncol(x))
items <- colnames(x)
# Make sure to remove examinee_id_var
if (!is.null(examinee_id_var)) {
if (examinee_id_var %in% items) {
items <- items[items != examinee_id_var]
} else {
items <- items[-examinee_id_var]
}
}
# Make sure to remove group_var
if (!is.null(group_var)) {
if (group_var %in% items) {
items <- items[items != group_var]
} else {
items <- items[-group_var]
}
}
}
# Make sure items are valid
if (!is.null(items) && (
any(duplicated(items)) ||
!(all(items %in% colnames(x)) || all(items %in% 1:ncol(x)) )
))
stop("Invalid 'items' argument. The elements of 'items' argument should ",
"be a character vector of column names of the 'x'. All elements ",
"should be unique.", call. = FALSE)
### Prepare responses ###
resp <- apply(x[, items], 2, as.character)
# Check if the responses are valid:
if (!all(sapply(unique(as.vector(resp)), function(x) is.na(x) ||
x %in% c("0", "1"))))
stop("Invalid response data. The response values should be either 0, 1 ",
"or missing (i.e. NA).", call. = FALSE)
# Convert missing data to "."
resp <- ifelse(is.na(resp), ".", resp)
num_of_items <- ncol(resp)
resp <- apply(resp, 1, paste0, collapse = "")
### Group ###
# group: In BILOG-MG, "the group identifier has to be a single digit
# (integer), starting with 1."
group_info <- bilog_create_group_info(x = x, group_var = group_var,
reference_group = reference_group)
# # group: In BILOG-MG, "the group identifier has to be a single digit
# # (integer), starting with 1."
# group <- NULL
# groups <- NULL
# num_of_groups <- 0
# # Check whether group_var is valid
# if (!is.null(group_var) && (
# length(group_var) != 1 || !(group_var %in% 1:ncol(x) ||
# (!is.null(colnames(x)) &&
# group_var %in% colnames(x)))
# ))
# stop("Invalid 'group_var' argument. 'group_var' value should be ",
# "a column number or a column name.", call. = FALSE)
# if (!is.null(group_var)) {
# group_names <- as.character(x[, group_var, drop = TRUE])
# unique_groups <- unique(group_names)
# if (any(is.na(unique_groups))) {
# stop("Grouping variable cannot have missing (NA) values.",
# call. = FALSE)
# }
# group_sizes <- data.frame(table(x[, group_var]))
# if (any(nchar(unique_groups) > 8)) {
# unique_groups <- substr(unique_groups, 1, 8)
# if (length(unique(unique_groups)) != length(unique_groups)) {
# stop("Invalid group values. Values that designate group membership ",
# "should be up to eight (8) characters and be unique.",
# call. = FALSE)
# } else
# message(paste0("Some of the group names are larger than eight ",
# "characters. Group names are shortened to fit ",
# "within eight characters:\n",
# paste0("\"", unique_groups, "\"", collapse = ", ")))
# }
# num_of_groups <- length(unique_groups)
# # Make sure group variable is integers from 1:9
# if (!all(unique_groups %in% paste0(1:num_of_groups))) {
# group <- as.character(as.integer(factor(group_names,
# levels = unique_groups)))
# } else group <- group_names
# groups <- data.frame(name = unique_groups, code = unique(group),
# n = group_sizes$Freq)
# }
### Prepare the data file and formal statement ###
# Add ID
data_text <- paste0(examinee_id, " ")
formal_statement <- paste0("(", num_id_char, "A1", ",", "1X")
# Add group if there is
if (!is.null(group_info$group)) {
data_text <- paste0(data_text, group_info$group, " ")
formal_statement <- paste0(formal_statement, ",I1", ",1X")
}
# Create not-presented key file:
if (create_np_key_file) {
np_text <- paste0(
# Add examinee_id
paste0(rep(" ", num_id_char), collapse = ""), " ",
# Add groups
ifelse(group_info$num_of_groups == 0, "", " "),
# Add items
paste0(rep(".", num_of_items), collapse = ""))
np_key_file_path <- file.path(
target_dir,
paste0(gsub("\\.(.*)$", "", basename(target_path)), ".NFN"))
if (overwrite || !file.exists(np_key_file_path))
writeLines(text = np_text, con = np_key_file_path)
} else
np_key_file_path <- NULL
# Add response data:
data_text <- paste0(data_text, resp)
formal_statement <- paste0(formal_statement, ",", num_of_items, "A1)")
if (overwrite || !file.exists(target_path)) writeLines(data_text, target_path)
return(list(x = x,
formal_statement = formal_statement,
data_file_path = target_path,
num_of_items = num_of_items,
num_of_groups = group_info$num_of_groups,
group_info = group_info$group_info,
reference_group = group_info$reference_group,
np_key_file_path = np_key_file_path,
num_id_char = num_id_char))
}
#' Create group information
#'
#' @param x Either a \\code{data.frame}, \code{matrix} or
#' \code{\link{Response_set-class}} object. Each row should represent a
#' subject (examinee) and each column represent a variable (usually an item).
#' The response values should be either 0, 1 or \code{NA}.
#' @param group_var The column name or number that contains group membership
#' information if multi-group calibration is desired. Ideally, it grouping
#' variable is represented by single digit integers. If other type of data
#' provided, an integer value will automatically assigned to the variables.
#' The default value is \code{NULL}, where no multi-group analysis is
#' performed.
#'
#' @author Emre Gonulates
#'
#' @noRd
bilog_create_group_info <- function(x = NULL, group_var = NULL,
reference_group = NULL) {
group <- NULL
group_info <- NULL
num_of_groups <- 0
if (is.null(x) || is(x, "Response_set")) {
return(list(group = group, group_info = group_info,
num_of_groups = num_of_groups))
}
# Check whether group_var is valid
if (!is.null(group_var) && (
length(group_var) != 1 || !(group_var %in% 1:ncol(x) ||
(!is.null(colnames(x)) &&
group_var %in% colnames(x)))
))
stop("Invalid 'group_var' argument. 'group_var' value should be a column ",
"number or a column name.", call. = FALSE)
if (!is.null(group_var)) {
# Check whether reference_group is NULL, if yes, assign a default
# reference group:
group_info <- data.frame(table(name = x[, group_var]))
colnames(group_info) <- c("name", "n")
group_info$name <- as.character(group_info$name)
if (is.null(reference_group)) {
reference_group <- group_info$name[1]
} else if (!reference_group %in% group_info$name) {
stop(paste0("'reference_group' argument should be one of the following",
" group names:\n",
paste0("\"", group_info$name, "\"", collapse = ",")),
call. = FALSE)
}
group_info$ref <- group_info$name == reference_group
# put the reference group to the top
group_info <- group_info[order(group_info$ref, decreasing = TRUE), ]
group_info$code <- as.character(1:nrow(group_info))
group_names <- as.character(x[, group_var, drop = TRUE])
# unique_groups <- group_info$name
# unique_groups <- unique(group_names)
if (any(is.na(group_info$name))) {
stop("Grouping variable cannot have missing (NA) values.", call. = FALSE)
}
if (any(nchar(group_info$name) > 8)) {
group_info$name <- substr(group_info$name, 1, 8)
if (length(unique(group_info$name)) != length(group_info$name)) {
stop("Invalid group values. Values that designate group membership ",
"should be up to eight (8) characters and be unique.",
call. = FALSE)
} else
message(paste0("Some of the group names are larger than eight ",
"characters. Group names are shortened to fit ",
"within eight characters:\n",
paste0("\"", group_info$name, "\"", collapse = ", ")))
}
num_of_groups <- nrow(group_info)
group <- group_info$code[match(group_names, group_info$name)]
# # Make sure group variable is integers from 1:9
# if (!all(group_info$name %in% paste0(1:num_of_groups))) {
#
# group <- as.character(as.integer(factor(group_names,
# levels = group_info$name)))
# } else group <- group_names
# group_info <- data.frame(name = group_info$name, code = unique(group),
# n = group_info$Freq)
}
return(list(group = group, group_info = group_info,
num_of_groups = num_of_groups,
reference_group = reference_group))
}
#' Read the parameters of BILOG-MG Calibration
#'
#' @param par_file Path for a BILOG-MG parameter file with '.PAR' extension.
#' @param items The names of the items. This will be given assigned as the
#' item_id's of the items. The default value is \code{NULL}, where the item
#' item_id's assigned by BILOG-MG will be assigned as item parameters.
#' @param model The model of the items. The value is one of the
#' following:
#' One-parameter logistic model (\code{"1PL"}),
#' Two-parameter logistic model (\code{"2PL"}),
#' Three-parameter logistic model (\code{"3PL"}).
#'
#' The default value is \code{"3PL"}.
#' @param D Scaling constant. The default value is \code{1}. If the item
#' parameters needs to be converted to the commonly used normal scale where
#' \code{D = 1.7} or \code{D = 1.702}, change this value accordingly.
#'
#' @noRd
#'
bilog_read_pars <- function(
par_file,
ph1_file,
items = NULL,
model = "3PL",
D = 1
) {
result <- list(ip = NULL,
failed_items = NULL)
if (is.null(D)) D <- 1.7
# Wait three seconds for the parameter file and
counter <- 1
while (!file.exists(par_file) && counter < 4) {
message(paste0("Waiting for the parameter file... (", counter, ")\n"))
Sys.sleep(1)
counter <- counter + 1
}
if (!file.exists(par_file))
stop(paste0("Parameter file cannot be found at: \n\"", par_file, "\"\n"),
call. = FALSE)
# Locate the parameter file and read it
# In BILOG-MG's terms:
# "a" = slope
# "b" = threshold
# "c" = lower asymptote
pars <- utils::read.fwf(
file = par_file,
widths = c(8, 8, rep(10, 13), 4, 1, 1), skip = 4, header = FALSE,
col.names = c("item_id", "subtest_name", "intercept", "intercept_se",
"a", "a_se", "b", "b_se", "dispersion", "dispersion_se",
"c", "c_se", "drift", "drift_se", "unused", "item_location",
"answer_key", "dummy_values"
))
if (model == "3PL") {
ipdf <- pars[, c("item_id", "a", "b", "c")]
ipdf_se <- pars[, c("a_se", "b_se", "c_se")]
} else if (model == "2PL") {
ipdf <- pars[, c("item_id", "a", "b")]
ipdf_se <- pars[, c("a_se", "b_se")]
} else if (model == "1PL") {
ipdf <- pars[, c("item_id", "a", "b"), drop = FALSE]
ipdf_se <- pars[, c("a_se", "b_se"), drop = FALSE]
model <- "2PL"
} else if (model == "Rasch") {
ipdf <- pars[, c("item_id", "b")]
ipdf_se <- pars[, "b_se", drop = FALSE]
}
if (!is.null(items)) ipdf$item_id <- items
# Check whether all item parameter values are valid:
ph1_content <- readLines(con = ph1_file)
invalid_items <- ph1_content[grepl("WILLNOT BE CALIBRATED", ph1_content)]
if (any(sapply(ipdf[, -1], class) != "numeric") ||
any(sapply(ipdf_se[, -1], class) != "numeric") ||
length(invalid_items) > 0) {
# Get the invalid items in the form:
# "**** ITEM 6 WITH BISERIAL R LESS THAN -0.15 WILLNOT BE CALIBRATED."
invalid_items <- gsub("(.*)\\*\\*\\*\\* ITEM[ ]+", "", invalid_items)
invalid_items <- 1:nrow(ipdf) %in% as.numeric(gsub(" (.*)", "",
invalid_items))
# Get invalid items that does not have numeric values
invalid_items <- invalid_items | !apply(
sapply(ipdf[, -1], function(x) grepl(
"[-]?[0-9]+[.]?[0-9]*|[-]?[0-9]+[l]?|[-]?[0-9]+[.]?[0-9]*[ee][0-9]+",
x)), 1, all)
invalid_items <- invalid_items | !apply(sapply(ipdf_se[, -1], function(x)
grepl(
"[-]?[0-9]+[.]?[0-9]*|[-]?[0-9]+[l]?|[-]?[0-9]+[.]?[0-9]*[ee][0-9]+",
x)), 1, all)
warning(paste0("\nEstimation failed for following item(s):\n\n",
paste0(utils::capture.output(print(ipdf[invalid_items, ])),
collapse = "\n"),
"\n\nThese items will be remove from the output."))
result$failed_items <- ipdf[invalid_items, ]
# Remove invalid items
ipdf <- ipdf[!invalid_items, ]
ipdf_se <- ipdf_se[!invalid_items, ]
}
# Convert the cleaned data frame to numeric.
for (i in which(sapply(ipdf[, -1], class) != "numeric"))
ipdf[, i + 1] <- as.numeric(ipdf[, i + 1])
for (i in which(sapply(ipdf_se[, -1], class) != "numeric"))
ipdf_se[, i + 1] <- as.numeric(ipdf_se[, i + 1])
# if ("a" %in% colnames(ipdf)) ipdf$a <- ipdf$a / D
ipdf$item_id <- gsub("^\\s+|\\s+$", "", ipdf$item_id)
result$ip <- itempool(ipdf[, which(colnames(ipdf) %in% c("a", "b", "c")),
drop = FALSE],
item_id = ipdf$item_id, model = model, D = D,
se_parameters = ipdf_se)
return(result)
}
#' This function reads the next CTT table from the row given
#' @param text BILOG-MG text grabbed
#' @param row The row number to start reading from 'text'
#'
#' @noRd
bilog_read_ctt_table <- function(text, row) {
sub_text <- text[row:length(text)]
sub_text <- sub_text[6:(which(grepl(paste0(
"^ ", paste0(rep("-", 73), collapse = "")), sub_text))[2] - 1)]
pars <- utils::read.fwf(textConnection(sub_text),
widths = c(5, 11, 9, 10, 8, 9, 10, 9),
col.names = c("order", "name", "tried", "right", "pvalue",
"logit", "pbis", "bis"),
header = FALSE,
colClasses = c("integer", "character", "numeric",
"numeric", "numeric", "numeric", "numeric",
"numeric")
)
if (any(pars$pvalue > 1))
pars$pvalue <- pars$pvalue/100
pars$name <- gsub("^\\s+|\\s+$", "", pars$name)
return(pars)
}
#' Read the CTT parameters of BILOG-MG Calibration
#'
#' @param ph1_file Path for a BILOG-MG ".PH1" file which holds CTT statistics.
#' @return A list of CTT parameters. If there are groups, then the CTT
#' statistics for groups can be found in \code{$group$GROUP-NAME}.
#' Overall statistics for the whole group is at \code{$overall}.
#'
#' @noRd
bilog_read_ctt <- function(ph1_file) {
text <- readLines(ph1_file)
result <- list(overall = NULL)
# Check whether there is a group
if (any(grepl("ITEM STATISTICS FOR MULTIPLE GROUPS", text))) {
### Multi-group ###
result$group <- NULL
# Read the CTT statistics for groups
for (i in which(grepl("ITEM STATISTICS FOR GROUP:", text))) {
group_name <- sub("( )*$", "", text[i])
group_name <- sub("^(.*) ", "", group_name)
result$group[[group_name]] <- bilog_read_ctt_table(text, i)
}
# Read the CTT statistics for the overall test
i <- which(grepl("ITEM STATISTICS FOR MULTIPLE GROUPS", text))
result$overall <- bilog_read_ctt_table(text, i)
} else if (any(grepl("ITEM STATISTICS FOR SUBTEST", text))) {
### Single-Group ###
i <- which(grepl("ITEM STATISTICS FOR SUBTEST", text))
result$overall <- bilog_read_ctt_table(text, i)
}
if (is.null(result$group)) return(result$overall)
return(result)
}
#' Read posterior distribution points and weights
#'
#' @param ph2_file Path for a BILOG-MG ".PH2" file which holds group means.
#' @param group_info A data frame holding information about group names and
#' codes
#'
#' @return A data frame for posterior points and weights
#'
#' @noRd
bilog_read_posterior_dist <- function(ph2_file, group_info = NULL) {
main_text <- readLines(ph2_file, skipNul = TRUE)
row <- which(grepl(
pattern = "QUADRATURE POINTS, POSTERIOR WEIGHTS, MEAN AND S\\.D\\.:",
main_text))
read_pw <- function(text) {
temp_pattern <- "^ POINT "
points <- sapply(unlist(strsplit(gsub(
temp_pattern, "", text[grepl(temp_pattern, text)]), " ")), as.numeric)
points <- points[!is.na(points)]
temp_pattern <- "^ POSTERIOR "
posterior <- sapply(unlist(strsplit(gsub(
temp_pattern, "", text[grepl(temp_pattern, text)]), " ")), as.numeric)
posterior <- posterior[!is.na(posterior)]
return(data.frame(point = points, weight = posterior))
}
if (length(row) == 1) { # Single group calibration
subtext <- main_text[row:length(main_text)]
return(read_pw(subtext))
} else if (length(row) > 1) { # Multi-group calibration
result <- vector("list", length(row))
for (i in 1:length(row)) {
subtext <- main_text[row[i]:ifelse(i == length(row), length(main_text),
row[i + 1] - 1)]
result[[i]] <- read_pw(subtext)
}
if (!is.null(group_info)) names(result) <- group_info$name
return(result)
}
return(NULL)
}
#' Read the group means of multigroup BILOG-MG Calibration
#'
#' @param ph3_file Path for a BILOG-MG ".PH3" file which holds group means.
#' @param group_info A data frame holding information about group names and
#' codes
#' @return Group means
#'
#' @noRd
bilog_read_group_means <- function(ph3_file, group_info) {
main_text <- readLines(ph3_file, skipNul = TRUE)
result <- group_info
row <- which(grepl(pattern = "PRIOR MEANS AND STANDARD DEVIATIONS",
main_text))
if (length(row) == 1) {
text <- main_text[row:length(main_text)]
row <- which(grepl("^ ---------------------------$", text))[1:2]
text <- text[(row[1] + 1):(row[2] - 1)]
pars <- utils::read.fwf(textConnection(text), widths = c(3, 13, 10),
col.names = c("code", "prior_mean", "prior_sd"))
if (is.null(result)) return(pars)
result <- merge(x = result, pars, by = "code")
}
row <- which(grepl("SUMMARY STATISTICS FOR SCORE ESTIMATES BY GROUP",
main_text))
if (length(row) > 0) {
text <- main_text[row:length(main_text)]
pars <- group_info[, "code", drop = FALSE]
pars <- pars[order(pars$code), , drop = FALSE]
temp_pattern <- "^ MEAN:[[:blank:]]*(.*)$"
pars$mean <- as.numeric(gsub(temp_pattern, "\\1",
text[grepl(temp_pattern, text)]))
temp_pattern <- "^ S\\.D\\.:[[:blank:]]*(.*)$"
pars$sd <- as.numeric(gsub(temp_pattern, "\\1",
text[grepl(temp_pattern, text)]))
result <- merge(x = result, pars, by = "code")
}
return(result)
}
#' Read the scale scores from BILOG-MG Calibration
#'
#' @param score_file Path for a BILOG-MG ".SCO" file which holds estimated
#' scale scores.
#' @param x the data file
#' @param examinee_id_var The column name of x that is holding the
#' examinee_id variable.
#' @param group_var The column name of x that is holding the examinee group
#' variable
#'
#' @return A data frame consist of scores.
#'
#' @noRd
bilog_read_scores <- function(score_file, x, examinee_id_var = NULL,
group_var = NULL) {
text <- readLines(score_file)
# Remove first two lines which does not contain any information
text <- text[-c(1:2)]
if (nrow(x) == length(text)) {
scores <- utils::read.delim(textConnection(text), sep = "|", header = FALSE)
scores <- utils::read.fwf(
textConnection(scores[, 2]),
widths = c(6 + 1, 11, 3, 5, 10, 12, 12, 11, 10),
col.names = c("weight", "test", "tried", "right", "percent",
"ability", "se", "prob", "unknown1")
)
} else if (nrow(x) == (length(text) / 2)) {
# Get the width of the first line that contains group number and item_id
# widths <- c(3, nchar(text[1])-2, 6, 11, 3, 5, 10, 12, 12, 11, 10)
n <- 1:(length(text)/2)
# text <- paste(text[2*n-1], text[2*n])
text <- text[2*n]
scores <- utils::read.fwf(
textConnection(text),
widths = c(6, 11, 3, 5, 10, 12, 12, 11, 10),
col.names = c("weight", "test", "tried", "right", "percent",
"ability", "se", "prob", "unknown1")
)
}
scores <- scores[, c("tried", "right", "ability", "se", "prob")]
# Add group info
if (!is.null(group_var) && group_var %in% names(x)) {
scores <- cbind(group = x[[group_var]], scores)
}
# Add examinee_id info
if (!is.null(examinee_id_var) && examinee_id_var %in% names(x)) {
scores <- cbind(examinee_id = x[[examinee_id_var]], scores)
} else if (!is.null(rownames(x))) {
scores <- cbind(examinee_id = rownames(x), scores)
} else if (is(x, "Response_set") && length(x$examinee_id) == length(x)) {
scores <- cbind(examinee_id = x$examinee_id, scores)
}
if (requireNamespace("tibble")) {
return(tibble::as_tibble(scores))
} else return(scores)
}
#' This function determines whether the calibration is converged or not.
#'
#' The method in this function is not tested extensively and based on
#' limited experience.
#'
#' @param par_file The parameter file
#' @param ph2_file The file with ".PH2" extension
#' @param ph3_file The file with ".PH3" extension
#'
#' @return TRUE if convergence reached, FALSE if not.
#' @noRd
check_bilog_convergence <- function(par_file, ph2_file, ph3_file) {
converged <- FALSE
if (file.exists(ph2_file)) {
ph2_content <- readLines(con = ph2_file)
converged <- any(grepl("BYTES OF NUMERICAL WORKSPACE USED",
x = ph2_content)) &&
!any(grepl("NOTE: CONVERGENCE HAS NOT BEEN REACHED TO CRITERION",
x = ph2_content))
}
# Check whether b parameters have standard error values
if (file.exists(par_file)) {
pars <- utils::read.fwf(
file = par_file,
widths = c(8, 8, rep(10, 13), 4, 1, 1), skip = 4, header = FALSE,
col.names = c("item_id", "subtest_name", "intercept", "intercept_se",
"a", "a_se", "b", "b_se", "dispersion", "dispersion_se",
"c", "c_se", "drift", "drift_se", "unused", "item_location",
"answer_key", "dummy_values"
))
converged <- converged || any(pars$b_se > 0)
}
return(converged)
}
#' Wrap text
#'
#' @description This functions wraps text within 80 characters. If it overflows,
#' the remaining text will be flowed the next line beginning column 1.
#'
#' @param t text
#' @param width total width of the line
#' @param tab whether to add 'tab' string to the beginning of each text, usual
#' 'tab = " "'.
#' @param skip_tab do not put tab to these lines
#'
#' @noRd
wrap_text <- function(t, width = 80, tab = NULL, skip_tab = 0) {
# Add tab to all lines except to the first line.
if (!is.null(tab)) {
r <- strwrap(t, width = width)
r <- c(r[skip_tab], paste0(
tab, strwrap(paste0(r[setdiff(1:length(r), skip_tab)],
collapse = "\n"),
width = width - nchar(tab) - 1)))
return(paste0(r, collapse = "\n"))
}
r <- gsub(paste0("(.{", width, "})"), "\\1\n", t)
# do not allow the first character in a line to be a comma ",":
if (grepl(pattern = "\n,", x = r, fixed = TRUE)) {
r <- gsub(paste0("(.{", width - 1, "})"), "\\1\n", t)
r <- gsub(pattern = "\n,", replacement = ",\n", r, fixed = TRUE)
}
return(r)
}
#' Item Calibration via BILOG-MG
#'
#' @description \code{est_bilog} runs BILOG-MG in batch mode or reads BILOG-MG
#' output generated by BILOG-MG program. In the first case, this function
#' requires BILOG-MG already installed on your computer under
#' \code{bilog_exe_folder} directory.
#'
#' In the latter case, where appropriate BILOG-MG files are present (i.e.
#' \code{"<analysis_name>.PAR"}, \code{"<analysis_name>.PH1"},
#' \code{"<analysis_name>.PH2"} and \code{"<analysis_name>.PH3"} files exist)
#' and \code{overwrite = FALSE}, there is no need for BILOG-MG program. This
#' function can read BILOG-MG output without BILOG-MG program.
#'
#' @param x Either a \code{data.frame}, \code{matrix} or
#' \code{\link{Response_set-class}} object. When the data is not necessary,
#' i.e. user only wants to read the BILOG-MG output from the
#' \code{target_dir}, then this can be set to \code{NULL}.
#' @param model The model of the items. The value is one of the
#' following:
#' \describe{
#' \item{\code{"1PL"}}{One-parameter logistic model.}
#' \item{\code{"2PL"}}{Two-parameter logistic model.}
#' \item{\code{"3PL"}}{Three-parameter logistic model.}
#' \item{\code{"CTT"}}{Return only Classical Test theory statistics such as
#' p-values, point-biserial and biserial correlations.}
#' }
#'
#' The default value is \code{"3PL"}.
#' @param target_dir The directory/folder where the BILOG-MG analysis and data
#' files will be saved. The default value is the current working directory,
#' i.e. \code{get_wd()}.
#' @param analysis_name A short file name that will be used for the data files
#' created for the analysis.
#' @param items A vector of column names or numbers of the \code{x} that
#' represents the responses. If, in the syntax file, no entry for item
#' names are desired, then, simply write \code{items = "none"}.
#' @param examinee_id_var The column name or number that contains individual
#' subject IDs. If none is provided (i.e. \code{examinee_id_var = NULL}), the
#' program will check whether the data provided has row names.
#' @param group_var The column name or number that contains group membership
#' information if multi-group calibration is desired. Ideally, it grouping
#' variable is represented by single digit integers. If other type of data
#' provided, an integer value will automatically assigned to the variables.
#' The default value is \code{NULL}, where no multi-group analysis will be
#' performed.
#' @param logistic A logical value. If \code{TRUE}, \code{LOGISTIC} keyword will
#' be added to the BILOG-MG command file which means the calibration will
#' assume the natural metric of the logistic response function in all
#' calculations. If \code{FALSE}, the logit is multiplied by D = 1.7 to obtain
#' the metric of the normal-ogive model. The default value is \code{TRUE}.
#' @param num_of_alternatives An integer specifying the maximum number of
#' response alternatives in the raw data. \code{1/num_of_alternatives} is
#' used by the analysis as automatic starting value for estimating the
#' pseudo-guessing parameters.
#'
#' The default value is \code{NULL}. In this case, for 3PL, 5 will be used
#' and for 1PL and 2PL, 1000 will be used.
#'
#' This value will be represented in BILOG-MG control file as:
#' \code{NALT = num_of_alternatives}.
#'
#' @param criterion Convergence criterion for EM and Newton iterations. The
#' default value is 0.01.
#' @param num_of_quadrature The number of quadrature points in MML estimation.
#' The default value is 81. This value will be represented in BILOG-MG control
#' file as: \code{NQPT = num_of_quadrature}. The BILOG-MG default value is
#' 20 if there are more than one group, 10 otherwise.
#' @param max_em_cycles An integer (0, 1, ...) representing the maximum number
#' of EM cycles. This value will be represented in BILOG-MG control file as:
#' \code{CYCLES = max_em_cycles}.
#' The default value is 100.
#' @param newton An integer (0, 1, ...) representing the number of Gauss-Newton
#' iterations following EM cycles. This value will be represented in BILOG-MG
#' control file as: \code{NEWTON = newton}.
#' @param reference_group Represent which group's ability distribution will be
#' set to mean = 0 and standard deviation = 1. For example, if the value is 1,
#' then the group whose code is 1 will have ability distribution with mean 0
#' and standard deviation 1. When groups are assumed to coming from a single
#' population, set this value to 0.
#'
#' The default value is \code{NULL}.
#'
#' This value will be represented in BILOG-MG control file as:
#' \code{REFERENCE = reference_group}.
#' @param fix This arguments helps to specify whether the parameters of
#' specific items are free to be estimated or are to be held fixed at
#' their starting values. This argument accepts a \code{data.frame} with
#' an \code{item_id} column in which items for which the item parameters
#' will be held fixed; \code{a}, \code{b}, \code{c} parameter values. See,
#' examples section for a demonstration.
#'
#' @param scoring_options A string vector of keywords/options that will be added
#' to the \code{SCORE} section in BILOG-MG syntax. Set the value of
#' \code{scoring_options} to \code{NULL} if scoring of individual examinees is
#' not necessary.
#'
#' The default value is \code{c("METHOD=1", "NOPRINT")} where scale scores
#' will be estimated using Maximum Likelihood estimation and the scoring
#' process will not be printed to the R console (if
#' \code{show_output_on_console = TRUE}).
#'
#' The main option to be added to this vector is \code{"METHOD=n"}.
#' Following options are available:
#'
#' \describe{
#' \item{"METHOD=1"}{Maximum Likelihood (ML)}
#' \item{"METHOD=2"}{Expected a Posteriori (EAP)}
#' \item{"METHOD=3"}{Maximum a Posteriori (MAP)}
#' }
#'
#' In addition to \code{"METHOD=n"} keyword, following keywords can be added:
#'
#' \code{"NOPRINT"}: Suppresses the display of the scores on the R console.
#'
#' \code{"FIT"}: likelihood ratio chi-square goodness-of-fit statistic for
#' each response pattern will be computed.
#'
#' \code{"NQPT=(list)"}, \code{"IDIST=n"}, \code{"PMN=(list)"},
#' \code{"PSD=(list)"}, \code{"RSCTYPE=n"}, \code{"LOCATION=(list)"},
#' \code{"SCALE=(list)"}, \code{"INFO=n"}, \code{"BIWEIGHT"},
#' \code{"YCOMMON"}, \code{"POP"}, \code{"MOMENTS"},
#' \code{"FILE"}, \code{"READF"}, \code{"REFERENCE=n"}, \code{"NFORMS=n"}
#'
#' See BILOG-MG manual for more details about these keywords/options.
#' @param calib_options A string vector of keywords/options that will be added
#' to the \code{CALIB} section in BILOG-MG syntax in addition to the keywords
#' \code{NQPT}, \code{CYCLES}, \code{NEWTON}, \code{CRIT}, \code{REFERENCE}.
#'
#' The default value is \code{c("NORMAL")}.
#'
#' When \code{"NORMAL"} is included in \code{calib_options}, the prior
#' distributions of ability in the population is assumed to have normal
#' distribution.
#'
#' When \code{"COMMON"} is included in \code{calib_options}, a common value
#' for the lower asymptote for all items in the 3PL model will be estimated.
#'
#' If items will be calibrated using \code{"RASCH"} model, set
#' \code{model = "Rasch"}, instead of adding \code{"RASCH"} keyword to
#' \code{calib_options}.
#'
#' Following keywords/options can be added to \code{calib_options}:
#'
#' \code{"PRINT=n"}, \code{"IDIST=n"}, \code{"PLOT=n"}, \code{"DIAGNOSIS=n"},
#' \code{"REFERENCE=n"}, \code{"SELECT=(list)"}, \code{"RIDGE=(a,b,c)"},
#' \code{"ACCEL=n"}, \code{"NSD=n"}, \code{"COMMON"}, \code{"EMPIRICAL"},
#' \code{"NORMAL"}, \code{"FIXED"}, \code{"TPRIOR"}, \code{"SPRIOR"},
#' \code{"GPRIOR"}, \code{"NOTPRIOR"}, \code{"NOSPRIOR"}, \code{"NOGPRIOR"},
#' \code{"READPRIOR"}, \code{"NOFLOAT"}, \code{"FLOAT"}, \code{"NOADJUST"},
#' \code{"GROUP-PLOT"}, \code{"NFULL"}, \code{"CHI=(a,b)"}.
#'
#' See BILOG-MG manual for more details about these keywords/options.
#'
#' NOTE: Do not add any of the following keywords to \code{calib_options}
#' since they will already be included:
#'
#' \code{NQPT}, \code{CYCLES}, \code{NEWTON}, \code{CRIT}, \code{REFERENCE}
#'
#' @param prior_ability Prior ability is the quadrature points and weights of
#' the discrete finite representations of the prior distribution for the
#' groups. It should be a list in the following form:
#'
#' \code{list(<GROUP-NAME-1> = list(points = ...., weights = ...),
#' <GROUP-NAME-2> = list(points = ...., weights = ...),
#' ...
#' )}
#'
#' \code{GROUP-NAME-1} is the name of the first group, \code{GROUP-NAME-2} is
#' the name of the second group, etc.
#'
#' See examples section for an example implementation.
#'
#' @param prior_ip Specify priors distributions for item parameters. The
#' default value is \code{NULL}, where BILOG-MG defaults will be used. In
#' order to specify priors, a list of one or more of the following elements
#' needs to be provided:
#' \describe{
#' \item{\code{"ALPHA"}}{"'alpha' parameters for the beta prior distribution
#' of lower asymptote (guessing) parameters"}
#' \item{\code{"BETA"}}{"'beta' parameters for the beta prior distribution
#' of lower asymptote (guessing) parameters."}
#' \item{\code{"SMU"}}{prior means for slope parameters}
#' \item{\code{"SSIGMA"}}{prior standard deviations for slope parameters}
#' \item{\code{"TMU"}}{prior means for threshold parameters}
#' \item{\code{"TSIGMA"}}{prior standard deviations for threshold
#' parameters}
#' }
#' Quoted descriptions were taken from BILOG-MG manual.
#'
#' Here are couple examples:
#' \code{list(ALPHA = 4, BETA = 3, SMU = 1, SSIGMA = 1.648, TMU = 0,
#' TSIGMA = 2)}
#'
#' A very strong prior for guessing which almost fixes all guessing
#' parameters at 0.2:
#'
#' \code{list(ALPHA = 1000000, BETA = 4000000)}
#'
#' Fix guessing at 0.25:
#' \code{list(ALPHA = 1000000, BETA = 3000000)}
#'
#' More generally, one can play with the alpha and beta parameters to obtain
#' desired number considering the mode of beta distribution is:
#'
#' \deqn{mode = \frac{\alpha - 1}{\alpha + \beta - 2}}
#'
#' Also, one can set \code{SSIGMA} or \code{TSIGMA} to a very small value to
#' effectively fix the item parameters, for example set \code{TSIGMA = 0.005}
#' or \code{SSIGMA = 0.001} to effectively fix those item parameters. Note
#' that there might be convergence issues with these restrictions.
#'
#' Note that a non-null \code{prior_ip} value will automatically add
#' \code{READPRIOR} option to \code{CALIB} section.
#'
#' @param overwrite If \code{TRUE} and there are already a BILOG-MG analysis
#' files in the target path with the same name, these file will be
#' overwritten.
#' @param show_output_on_console logical (not NA), indicates whether to capture
#' the output of the command and show it on the R console. The default value
#' is \code{TRUE}.
#' @param bilog_exe_folder The location of the \code{"blm1.exe"},
#' \code{"blm2.exe"} and \code{"blm3.exe"} files. The default location is
#' \code{file.path("C:/Program Files/BILOGMG")}.
#'
#' @return A list of following objects:
#' \describe{
#' \item{"ip"}{An \code{\link{Itempool-class}} object holding the item
#' parameters. Please check whether model converged (using
#' \code{...$converged}) before interpreting/using \code{ip}.
#' This element will not be created when \code{model = "CTT"}.}
#' \item{"score"}{A data frame object that holds the number of item
#' examinee has attempted (\code{tried}), the number of item examinee got
#' right (\code{right}), the estimated scores of examinees
#' (\code{ability}), the standard errors of ability estimates (\code{se}),
#' and the probability of the response string (\code{prob}). This element
#' will not be created when \code{model = "CTT"}.}
#' \item{"ctt"}{The Classical Test Theory (CTT) stats such as p-value,
#' biserial, point-biserial estimated by BILOG-MG. If there are groups,
#' then the CTT statistics for groups can be found in
#' \code{ctt$group$GROUP-NAME}. Overall statistics for the whole group is
#' at \code{ctt$overall}.
#' }
#' \item{"failed_items"}{A data frame consist of items that cannot be
#' estimated.}
#' \item{"syntax"}{The syntax file.}
#' \item{"converged"}{A logical value indicating whether a model has been
#' converged or not. If the value is \code{TRUE}, model has been
#' converged. This element will not be created when \code{model = "CTT"}.}
#' \item{"cycle"}{Number of cycles run before calibration converge or fail
#' to converge.}
#' \item{"largest_change"}{Largest change between the last two cycles.}
#' \item{"neg_2_log_likelihood"}{-2 Log Likelihood value. This value is
#' \code{NULL}, when model does not converge. This element will not be
#' created when \code{model = "CTT"}.}
#' \item{"input"}{A list object that stores the arguments that are passed
#' to the function.}
#' }
#'
#' @export
#'
#' @author Emre Gonulates
#'
#' @examples
#' \dontrun{
#' #############################################
#' ############## Example 1 - 2PL ##############
#' #############################################
#' # IRT Two-parameter Logistic Model Calibration
#'
#' # Create responses to be used in BILOG-MG estimation
#' true_theta <- rnorm(4000)
#' true_ip <- generate_ip(n = 30, model = "2PL")
#' resp <- sim_resp(true_ip, true_theta)
#'
#' # The following line will run BILOG-MG, estimate 2PL model and put the
#' # analysis results under the target directory:
#' bilog_calib <- est_bilog(x = resp, model = "2PL",
#' target_dir = "C:/Temp/Analysis",
#' overwrite = TRUE)
#' # Check whether the calibration converged
#' bilog_calib$converged
#'
#' # Get the estimated item pool
#' bilog_calib$ip
#'
#' # See the BILOG-MG syntax
#' cat(bilog_calib$syntax)
#'
#' # See the classical test theory statistics estimated by BILOG-MG:
#' bilog_calib$ctt
#'
#' # Get -2LogLikelihood for the model (mainly for model comparison purposes):
#' bilog_calib$neg_2_log_likelihood
#'
#' # Get estimated scores
#' head(bilog_calib$score)
#'
#' # Compare true and estimated abilities
#' plot(true_theta, bilog_calib$score$ability, xlab = "True Theta",
#' ylab = "Estimated theta")
#' abline(a = 0, b = 1, col = "red", lty = 2)
#'
#' # Compare true item parameters
#' plot(true_ip$a, bilog_calib$ip$a, xlab = "True 'a'", ylab = "Estimated 'a'")
#' abline(a = 0, b = 1, col = "red", lty = 2)
#'
#' plot(true_ip$b, bilog_calib$ip$b, xlab = "True 'b'", ylab = "Estimated 'b'")
#' abline(a = 0, b = 1, col = "red", lty = 2)
#'
#' # Note that Bilog-MG centers the ability at mean 0.
#' mean(bilog_calib$score$ability)
#'
#' # Quadrature points and posterior weights:
#' head(bilog_calib$posterior_dist)
#'
#' #############################################
#' ############## Example 2 - EAP ##############
#' #############################################
#' # Getting Expected-a-posteriori theta scores
#' result <- est_bilog(x = resp, model = "2PL",
#' scoring_options = c("METHOD=2", "NOPRINT"),
#' target_dir = "C:/Temp/Analysis",
#' overwrite = TRUE)
#' head(result$score)
#'
#'
#' ###############################################
#' ############## Example 3 - Rasch ##############
#' ###############################################
#' # Rasch Model Calibration
#' true_theta <- rnorm(400)
#' true_ip <- generate_ip(n = 30, model = "Rasch")
#' resp <- sim_resp(true_ip, true_theta)
#'
#' # Run calibration
#' bilog_calib <- est_bilog(x = resp, model = "Rasch",
#' target_dir = "C:/Temp/Analysis",
#' overwrite = TRUE)
#' bilog_calib$ip
#'
#' plot(true_ip$b, bilog_calib$ip$b, xlab = "True 'b'", ylab = "Estimated 'b'")
#' abline(a = 0, b = 1, col = "red", lty = 2)
#'
#' # Note that the 'b' parameters are rescaled so that their arithmetic mean
#' # equals 0.0.
#' mean(bilog_calib$ip$b)
#'
#'
#' #############################################
#' ############## Example 4 - 3PL ##############
#' #############################################
#' # IRT Three-parameter Logistic Model Calibration
#'
#' # Create responses to be used in BILOG-MG estimation
#' true_theta <- rnorm(4000)
#' true_ip <- generate_ip(n = 30, model = "3PL")
#' resp <- sim_resp(true_ip, true_theta)
#'
#' # The following line will run BILOG-MG, estimate 2PL model and put the
#' # analysis results under the target directory:
#' bilog_calib <- est_bilog(x = resp, model = "3PL",
#' target_dir = "C:/Temp/Analysis",
#' overwrite = TRUE)
#'
#' bilog_calib$ip
#'
#' #############################################
#' ############## Example 5 - 1PL ##############
#' #############################################
#' # One-Parameter Logistic Model Calibration
#' true_theta <- rnorm(800)
#' true_ip <- generate_ip(n = 30, model = "2PL")
#' # Set 'a' parameters to a fixed number
#' true_ip$a <- 1.5
#' resp <- sim_resp(true_ip, true_theta)
#'
#' # Run calibration
#' bilog_calib <- est_bilog(x = resp, model = "1PL",
#' target_dir = "C:/Temp/Analysis",
#' overwrite = TRUE)
#' # Note that all 'a' parameter values and all 'se_a' values are the same:
#' bilog_calib$ip
#'
#' plot(true_ip$b, bilog_calib$ip$b, xlab = "True 'b'", ylab = "Estimated 'b'")
#' abline(a = 0, b = 1, col = "red", lty = 2)
#'
#'
#' #############################################################
#' ############## Example 6.1 - Multi-group - 3PL ##############
#' #############################################################
#' # Multi-group IRT calibration - 3PL
#'
#' ## Generate Data ##
#' ip <- generate_ip(n = 35, model = "3PL", D = 1.7)
#' n_upper <- sample(1200:3000, 1)
#' n_lower <- sample(1900:2800, 1)
#' theta_upper <- rnorm(n_upper, 1.5, .25)
#' theta_lower <- rnorm(n_lower)
#' resp <- sim_resp(ip = ip, theta = c(theta_lower, theta_upper))
#' # Create response data where first column group information
#' dt <- data.frame(level = c(rep("Lower", n_lower), rep("Upper", n_upper)),
#' resp)
#'
#' ## Run Calibration ##
#' mg_calib <- est_bilog(x = dt, model = "3PL",
#' group_var = "level",
#' reference_group = "Lower",
#' items = 2:ncol(dt), # Exclude the 'group' column
#' num_of_alternatives = 5,
#' # Use MAP ability estimation.
#' # "FIT": calculate GOF for response patterns
#' scoring_options = c("METHOD=3", "NOPRINT", "FIT"),
#' target_dir = "C:/Temp/Analysis", overwrite = TRUE,
#' show_output_on_console = FALSE)
#' # Estimated item pool
#' mg_calib$ip
#' # Print group means
#' mg_calib$group_info
#' # Check Convergence
#' mg_calib$converged
#' # Print estimated scores of first five examinees
#' head(mg_calib$score)
#'
#' # Posterior distributions of 'Lower' (in red) and 'Upper' group
#' plot(mg_calib$posterior_dist$Upper$point,
#' mg_calib$posterior_dist$Upper$weight)
#' points(mg_calib$posterior_dist$Lower$point,
#' mg_calib$posterior_dist$Lower$weight, col = "red")
#'
#'
#' #############################################################
#' ############## Example 6.2 - Multi-group - Response_set #####
#' #############################################################
#' # Multi-group IRT calibration - Response_set 2PL
#'
#' ## Generate Data ##
#' ip <- generate_ip(n = 35, model = "2PL", D = 1.7)
#' n_upper <- sample(1000:2000, 1)
#' n_lower <- sample(1000:2000, 1)
#' resp_set <- generate_resp_set(
#' ip = ip, theta = c(rnorm(n_lower), rnorm(n_upper, 1.5, .25)))
#' # Attach the group information
#' resp_set$mygroup <- c(rep("Lower", n_lower), rep("Upper", n_upper))
#'
#' ## Run Calibration ##
#' mg_calib <- est_bilog(x = resp_set,
#' model = "2PL",
#' group_var = "mygroup",
#' reference_group = "Lower",
#' target_dir = "C:/Temp/Analysis",
#' overwrite = TRUE,
#' show_output_on_console = FALSE)
#' # Estimated item pool
#' mg_calib$ip
#' # Print group means
#' mg_calib$group_info
#'
#' ###############################################################
#' ############## Example 6.3 - Multi-group - 1PL ################
#' ###############################################################
#' # Multi-group IRT calibration - 1PL
#'
#' ## Generate Data ##
#' n_item <- sample(30:40, 1)
#' ip <- generate_ip(n = n_item, model = "2PL", D = 1.7)
#' ip$a <- 1.25
#' n_upper <- sample(700:1000, 1)
#' n_lower <- sample(1200:1800, 1)
#' theta_upper <- rnorm(n_upper, 1.5, .25)
#' theta_lower <- rnorm(n_lower)
#' resp <- sim_resp(ip = ip, theta = c(theta_lower, theta_upper))
#' # Create response data where first column group information
#' dt <- data.frame(level = c(rep("Lower", n_lower), rep("Upper", n_upper)),
#' resp)
#'
#' ## Run Calibration ##
#' mg_calib <- est_bilog(x = dt,
#' model = "1PL",
#' group_var = "level",
#' reference_group = "Lower",
#' items = 2:ncol(dt), # Exclude the 'group' column
#' target_dir = "C:/Temp/Analysis",
#' overwrite = TRUE,
#' show_output_on_console = FALSE)
#' # Estimated item pool
#' mg_calib$ip
#' # Print group means
#' mg_calib$group_info
#' # Check Convergence
#' mg_calib$converged
#' # Print estimated scores of first five examinees
#' head(mg_calib$score)
#'
#'
#' ###############################################################
#' ############## Example 6.4 - Multi-group - Prior Ability ######
#' ###############################################################
#' # Multi-group IRT calibration - 3PL with user supplied prior ability
#' # parameters
#' n_item <- sample(40:70, 1)
#' ip <- generate_ip(n = n_item, model = "3PL", D = 1.7)
#' n_upper <- sample(2000:4000, 1)
#' n_lower <- sample(3000:5000, 1)
#' theta_upper <- rgamma(n_upper, shape = 2, rate = 2)
#' # hist(theta_upper)
#' theta_lower <- rnorm(n_lower)
#' true_theta <- c(theta_lower, theta_upper)
#' resp <- sim_resp(ip = ip, theta = true_theta, prop_missing = .2)
#' # Create response data where first column group information
#' dt <- data.frame(level = c(rep("Lower", n_lower), rep("Upper", n_upper)),
#' resp)
#'
#' # Set prior ability parameters
#' points <- seq(-4, 4, .1)
#' prior_ability = list(
#' Lower = list(points = points, weights = dnorm(points)),
#' # Also try misspecified prior:
#' # Upper = list(points = points, weights = dnorm(points, 1, .25))
#' Upper = list(points = points, weights = dgamma(points, 2, 2))
#' )
#' mg_calib <- est_bilog(x = dt,
#' model = "3PL",
#' group_var = "level",
#' reference_group = "Lower",
#' items = 2:ncol(dt), # Exclude the 'group' column
#' calib_options = c("IDIST = 2"),
#' prior_ability = prior_ability,
#' # Use MAP ability estimation.
#' scoring_options = c("METHOD=3"),
#' target_dir = target_dir,
#' overwrite = TRUE,
#' show_output_on_console = FALSE)
#'
#' # Check whether model has convergence
#' mg_calib$converged
#'
#' # Group information
#' mg_calib$group_info
#'
#' # Quadrature points and posterior weights:
#' head(mg_calib$posterior_dist$Lower)
#'
#' plot(mg_calib$posterior_dist$Lower$point,
#' mg_calib$posterior_dist$Lower$weight,
#' xlab = "Quadrature Points",
#' ylab = "Weights",
#' xlim = c(min(c(mg_calib$posterior_dist$Lower$point,
#' mg_calib$posterior_dist$Upper$point)),
#' max(c(mg_calib$posterior_dist$Lower$point,
#' mg_calib$posterior_dist$Upper$point))),
#' ylim = c(min(c(mg_calib$posterior_dist$Lower$weight,
#' mg_calib$posterior_dist$Upper$weight)),
#' max(c(mg_calib$posterior_dist$Lower$weight,
#' mg_calib$posterior_dist$Upper$weight))))
#' points(mg_calib$posterior_dist$Upper$point,
#' mg_calib$posterior_dist$Upper$weight, col = "red")
#'
#' # Comparison of true and estimated item parameters
#' plot(ip$a, mg_calib$ip$a, xlab = "True 'a'", ylab = "Estimated 'a'")
#' plot(ip$b, mg_calib$ip$b, xlab = "True 'b'", ylab = "Estimated 'b'")
#' plot(ip$c, mg_calib$ip$c, xlab = "True 'c'", ylab = "Estimated 'c'")
#'
#' # Ability parameters
#' plot(true_theta, mg_calib$score$ability,
#' xlab = "True Theta",
#' ylab = "Estimated Theta")
#' abline(a = 0, b = 1, col = "red")
#'
#'
#'
#' ####################################################################
#' ############## Example 7 - Read Pars without BILOG-MG ##############
#' ####################################################################
#' # When user wants to read BILOG-MG output saved in the directory "Analysis/"
#' # with file names "my_analysis.PH1", "my_analysis.PH2", etc.,
#' use the following syntax to read Bilog output files without running the
#' calibration:
#' # (The following code does not require an installed BILOG-MG program on the
#' # computer.)
#' result <- est_bilog(target_dir = file.path("Analysis/"), model = "3PL",
#' analysis_name = "my_analysis", overwrite = FALSE)
#'
#'
#'
#' ####################################################################
#' ############## Example 8 - Fixed Item Parameters ###################
#' ####################################################################
#' # The idea is to fix individual item parameters to certain values.
#' # If all of values of a certain item parameter(s) need to be fixed,
#' # then, strong priors can also be used. See the documentation for
#' # "prior_ip" argument.
#'
#' # Create responses to be used in BILOG-MG estimation
#' true_theta <- rnorm(3000)
#' true_ip <- generate_ip(n = 30, model = "3PL")
#' resp <- sim_resp(true_ip, true_theta)
#'
#' # Setup the data frame that will hold 'item_id's to be fixed, and the
#' # item parameters to be fixed.
#' fix_pars <- data.frame(item_id = c("Item_5", "Item_4", "Item_10"),
#' a = c(1, 1.5, 1.75),
#' b = c(-1, 0.25, 0.75),
#' c = c(.15, .25, .35))
#'
#' fixed_calib <- est_bilog(x = resp, fix = fix_pars,
#' target_dir = "C:/Temp/Analysis", overwrite = TRUE)
#' # Check item parameters for Item_4, Item_5, Item_10:
#' fixed_calib$ip
#'
#' ######### #########
#' # If only some of the parameters are supplied, the defaults will be used
#' # for the missing parameters. For example, for the example below, the
#' # default 'a' parameter value is 1, and the default 'c' parameter value is
#' # (1/num_of_alternatives) = (1/5) = 0.2.
#' fix_pars2 <- data.frame(item_id = c("Item_1", "Item_2", "Item_3"),
#' b = c(-1, 0.25, 0.75))
#'
#' fixed_calib2 <- est_bilog(x = resp, fix = fix_pars2,
#' target_dir = "C:/Temp/Analysis", overwrite = TRUE)
#' # Check item parameters for Item_4, Item_5, Item_10:
#' fixed_calib2$ip
#'
#'
#' ##################################################################
#' ############## Example 9 - 3PL with Common Guessing ##############
#' ##################################################################
#' # IRT Three-parameter Logistic Model Calibration with Common Guessing
#'
#' # Create responses to be used in BILOG-MG estimation
#' true_theta <- rnorm(4000)
#' true_ip <- generate_ip(n = 30, model = "3PL")
#' resp <- sim_resp(true_ip, true_theta)
#'
#' # Run calibration:
#' bilog_calib <- est_bilog(x = resp, model = "3PL",
#' target_dir = "C:/Temp/Analysis",
#' calib_options = c("NORMAL", "COMMON"),
#' overwrite = TRUE)
#'
#' # Note the 'c' parameters
#' bilog_calib$ip
#'
#' ##################################################################
#' ############## Example 10 - 3PL with Fixed Guessing ##############
#' ##################################################################
#' # IRT Three-parameter Logistic Model Calibration with Fixed Guessing
#' # The aim is to fix guessing parameters of all items to a fixed
#' # number like 0.25
#' true_theta <- rnorm(3000)
#' true_ip <- generate_ip(n = 30, model = "3PL")
#' true_ip$c <- 0.25
#' resp <- sim_resp(true_ip, true_theta)
#' prc1 <- est_bilog(x = resp, model = "3PL", target_dir = "C:/Temp/Analysis",
#' prior_ip = list(ALPHA = 10000000, BETA = 30000000),
#' overwrite = TRUE)
#'
#'
#' } # end dontrun
est_bilog <- function(
x = NULL,
model = "3PL",
target_dir = getwd(),
analysis_name = "bilog_calibration",
items = NULL,
examinee_id_var = NULL,
group_var = NULL,
logistic = TRUE,
num_of_alternatives = NULL,
criterion = 0.01,
num_of_quadrature = 81,
max_em_cycles = 100,
newton = 20,
reference_group = NULL,
fix = NULL,
scoring_options = c("METHOD=1", "NOPRINT"),
calib_options = c("NORMAL"),
prior_ability = NULL,
prior_ip = NULL,
overwrite = FALSE,
show_output_on_console = TRUE,
bilog_exe_folder = file.path("C:/Program Files/BILOGMG")
) {
result <- list(ip = NULL,
score = NULL,
ctt = NULL,
failed_items = NULL,
syntax = NULL,
input = c(as.list(environment()), call = match.call())
)
result$input$x <- NULL
# set the input list
# result$input <- list(
# model = model,
# target_dir = target_dir,
# analysis_name = analysis_name,
# items = items,
# examinee_id_var = examinee_id_var,
# group_var = group_var,
# logistic = logistic,
# num_of_alternatives = num_of_alternatives,
# criterion = criterion,
# num_of_quadrature = num_of_quadrature,
# max_em_cycles = max_em_cycles,
# newton = newton,
# reference_group = reference_group,
# fix = fix,
# scoring_options = scoring_options,
# calib_options = calib_options,
# prior_ability = prior_ability,
# bilog_exe_folder = bilog_exe_folder
# )
if (!model %in% c("CTT", "Rasch", "1PL", "2PL", "3PL"))
stop("Invalid 'model' argument. 'model' should be either '1PL', '2PL' ",
"'3PL', 'Rasch' or 'CTT'.", call. = FALSE)
par_file <- file.path(target_dir, paste0(analysis_name, ".PAR"))
ph1_file <- file.path(target_dir, paste0(analysis_name, ".PH1"))
ph2_file <- file.path(target_dir, paste0(analysis_name, ".PH2"))
ph3_file <- file.path(target_dir, paste0(analysis_name, ".PH3"))
# If fix is not NULL, this is where all fixed item parameters will be saved.
fix_file <- file.path(target_dir, paste0(analysis_name, "_fix.PAR"))
score_file <- file.path(target_dir, paste0(analysis_name, ".SCO"))
cov_file <- file.path(target_dir, paste0(analysis_name, ".COV"))
D <- ifelse(logistic, 1, 1.7)
target_path <- file.path(target_dir, paste0(analysis_name, ".blm"))
# RDS file where the output of the calibration will be saved.
result_fn <- file.path(target_dir, paste0(analysis_name, ".rds"))
if (!overwrite && file.exists(result_fn)) {
temp_result <- readRDS(result_fn)
# The names in input that can differ but not affect calibration results
temp <- c("overwrite", "call", "show_output_on_console")
if (identical(temp_result$input[!names(temp_result$input) %in% temp],
result$input[!names(temp_result$input) %in% temp]))
return(temp_result)
}
if (overwrite || any(!file.exists(c(ph1_file, ph2_file, ph3_file,
par_file)))) {
# Check whether the filename is less than 128 characters:
if (nchar(target_path) > 128)
stop(paste0("\nThe following path for control file is ",
nchar(target_path), " character long, i.e. longer than the ",
"maximum length of 128 characters:\n\"", target_path,
"\"\nPlease choose a shorter 'analysis_name' or change the ",
"'target_dir'."), call. = FALSE)
# Determines the tab size, space added to lines following main analysis
# commands
tab <- paste0(rep(" ", nchar("GLOBAL") + 2), collapse = "")
text <- paste0(analysis_name, "\n\n")
# If overwrite is TRUE, delete important output files:
if (overwrite) {
extensions <- c("blm", "dat", "PH1", "PH2", "PH3", "COV", "NFN", "PAR",
"SCO")
suppressWarnings(file.remove(
file.path(target_dir, paste0(analysis_name, ".", extensions))))
}
# Create the data file:
data_output <- bilog_create_datafile(
x = x,
items = items,
examinee_id_var = examinee_id_var,
group_var = group_var,
reference_group = reference_group,
target_path = file.path(target_dir, paste0(analysis_name, ".dat")),
create_np_key_file = TRUE,
overwrite = overwrite)
# for non-multigroup data num_of_groups is 0.
num_of_groups <- data_output$num_of_groups
group_info <- data_output$group_info
reference_group <- data_output$reference_group
# in case x is Response_set, make sure x is matrix after this point.
x <- data_output$x
#########################################@###
################### GLOBAL ##############@###
#########################################@###
temp_text <- wrap_text(paste0(
">GLOBAL DFNAME='", data_output$data_file_path, "',\n"))
# # Add the file name where parameters values that should be fixed saved:
# if (!is.null(fix)) {
# temp_text <- paste0(temp_text, wrap_text(paste0(
# tab, "PRNAME='", normalizePath(fix_file, mustWork = FALSE), "',\n")))
# }
# Add model
num_of_pars <- as.integer(gsub("(.*)PL", "\\1", ifelse(
model %in% c("CTT", "Rasch"), "1PL", model)))
temp_text <- paste0(temp_text, tab, "NPARM = ", num_of_pars , ",\n", tab)
# Add Logistic: when added the natural metric of the logistic response
# function is assumed in all calculations.
if (logistic) temp_text <- paste0(temp_text, "LOGISTIC,\n", tab)
# Add SAVE statement
# This indicates that the SAVE command will follow the GLOBAL command.
temp_text <- paste0(temp_text, "SAVE;\n")
text <- c(text, temp_text)
########################################@###
################### SAVE ###############@###
########################################@###
temp_text <- paste0(">SAVE ")
tab <- paste0(rep(" ", nchar("SAVE") + 2), collapse = "")
# Add PARM
temp_text <- wrap_text(paste0(
temp_text, "PARM='", normalizePath(par_file, mustWork = FALSE), "',\n"))
# Add SCORE
temp_text <- paste0(temp_text, wrap_text(paste0(
tab, "SCORE='", normalizePath(score_file, mustWork = FALSE), "',\n")))
# Add COVARIANCE
temp_text <- paste0(temp_text, wrap_text(paste0(
tab, "COVARIANCE='", normalizePath(cov_file, mustWork = FALSE), "';\n")))
# # Add TSTAT
# temp_text <- paste0(
# temp_text, "',\n", tab, "TSTAT = '",
# normalizePath(file.path(target_dir, paste0(analysis_name, ".tst")),
# mustWork = FALSE))
# # Add PDISTRIB
# temp_text <- paste0(
# temp_text, "',\n", tab, "PDISTRIB = '",
# normalizePath(file.path(target_dir, paste0(analysis_name, ".dst")),
# mustWork = FALSE))
# temp_text <- paste0(temp_text, "';\n")
text <- c(text, temp_text)
##########################################@###
################### LENGTH ###############@###
##########################################@###
temp_text <- paste0(">LENGTH NITEMS = (", data_output$num_of_items, ");\n")
text <- c(text, temp_text)
#########################################@###
################### INPUT ###############@###
#########################################@###
# This section provides information that describes the raw data file.
temp_text <- paste0(">INPUT NTOTAL = ", data_output$num_of_items)
tab <- paste0(rep(" ", nchar("INPUT") + 2), collapse = "")
# Add number of alternatives
if (!is.null(num_of_alternatives) && is.numeric(num_of_alternatives))
temp_text <- paste0(temp_text, ",\n", tab, "NALT = ", num_of_alternatives)
# Add NFNAME:
temp_text <- paste0(
temp_text, ",\n", wrap_text(paste0(tab, "NFNAME = '",
normalizePath(data_output$np_key_file_path), "',\n")))
# Add NIDCHAR
temp_text <- paste0(temp_text, tab,
"NIDCHAR = ", data_output$num_id_char)
# Add number of groups NGROUP:
if (num_of_groups > 1)
temp_text <- paste0(temp_text, ",\n", tab,
"NGROUP = ", num_of_groups)
temp_text <- paste0(temp_text, ";\n")
text <- c(text, temp_text)
#########################################@###
################### ITEMS ###############@###
#########################################@###
# Setup the items to be printed.
if (is.null(items)) {
items <- colnames(x)
# Make sure 'items' is valid, i.e. each item is 8 character or less:
if (!is.null(items) && any(nchar(items) > 8)) {
stop("Invalid item IDs. Bilog-MG do not accept item IDs that are ",
"longer than eight characters.")
}
# Make sure to remove examinee_id_var
if (!is.null(examinee_id_var)) {
if (examinee_id_var %in% items) {
items <- items[items != examinee_id_var]
} else {
items <- items[-examinee_id_var]
}
}
# Make sure to remove group_var
if (!is.null(group_var)) {
if (group_var %in% items) {
items <- items[items != group_var]
} else {
items <- items[-group_var]
}
}
} else if (length(items) == 1 && items == "none") {
items <- NULL
}
if (is.null(items)) {
text <- c(text, paste0(">ITEMS ;\n"))
} else {
if (any(nchar(items) > 8)) {
stop("Invalid item IDs. Bilog-MG do not accept item IDs that are ",
"longer than eight characters.")
}
# temp_text <- wrap_text(paste0(
# ">ITEMS INAMES=(", paste0("'", items, "'", collapse = ","), ");\n"))
temp_text <- paste0(wrap_text(paste0(
">ITEMS INAMES=(", paste0("'", items, "'", collapse = ", "), ");"),
tab = paste0(tab, " "), skip_tab = 1), "\n")
text <- c(text, temp_text)
}
########################################@###
################### TEST ###############@###
########################################@###
# This section provides information that describes the raw data file.
temp_text <- paste0(">TEST1 TNAME = 'TEST01'")
tab <- paste0(rep(" ", nchar("TEST1") + 2), collapse = "")
temp_text <- paste0(temp_text, ",\n", tab, "INUMBER = (1(1)",
data_output$num_of_items, ")")
# Add item parameters that are fixed:
if (!is.null(fix)) {
# check whether fix is a data frame with relevant item_id's and
# item parameters
if (!is(fix, "data.frame") ||
!"item_id" %in% colnames(fix) ||
# ncol(fix) != num_of_pars + 1 ||
!any(c("a", "b", "c") %in% colnames(fix))
)
stop(paste0("Invalid 'fix' argument. 'fix' should be a data.frame with",
" 'item_id' column and ", num_of_pars, " for item ",
"parameters that needs to be fixed. "), call. = FALSE)
if (model == "CTT") {
stop("Invalid 'model' argument. 'fix' cannot be used when 'model' is ",
"'CTT'.", call. = FALSE)
}
if (!all(fix$item_id %in% items))
stop("Invalid 'fix' argument. All of the 'item_id' in the 'fix' ",
"data.frame should correspond to item_id's of the responses.",
call. = FALSE)
### Setup the "FIX = " part; fix_p="fix_pattern" ###
fix_p <- rle(1 * (items %in% fix$item_id))
fix_p <- paste0(fix_p$values, "(0)", fix_p$lengths, collapse = ",")
temp_text <- paste0(temp_text, ",\n",
wrap_text(paste0(tab, "FIX = (", fix_p, ")")))
### Write the PRNAME file: ###
temp_par <- data.frame(par = c("a", "b", "c"),
bilog_name = c("SLOPE", "THRESHLD", "GUESS"),
default = c(1, 0, 0))
for (i in 1:nrow(temp_par)) {
if (temp_par$par[i] %in% colnames(fix)) {
temp <- rep(temp_par$default[i], length(items))
for (j in 1:nrow(fix)) {
temp[fix$item_id[j] == items] <- fix[j, temp_par$par[i]]
}
temp_text <- paste0(
temp_text, ",\n", wrap_text(paste0(
tab, temp_par$bilog_name[i], " = (",
paste0(temp, collapse = ","), ")")))
# temp_text <- paste0(
# temp_text, ",\n", wrap_text(paste0(
# tab, temp_par$bilog_name[i], " = (",
# paste0(fix[, temp_par$par[i]], collapse = ","), ")")))
}
}
# fix_copy <- fix
# fix_copy$item_id <- which(items %in% fix$item_id)
# fix_copy[] <- lapply(fix_copy, as.character)
# temp_pattern <- paste0("%", apply(sapply(fix_copy, nchar), 2, max), "s",
# collapse = " ")
#
# # see p.222-223 of BILOT-MG Manual for FIX parameter
# fix_content <- c(as.character(nrow(fix_copy)),
# do.call(sprintf, c(fmt = temp_pattern, fix_copy)))
# writeLines(fix_content, fix_file)
}
temp_text <- paste0(temp_text, ";\n")
text <- c(text, temp_text)
##########################################@###
################### GROUPS ###############@###
##########################################@###
temp_text <- ""
for (g in seq_len(num_of_groups)) {
tab <- paste0(rep(" ", nchar("GROUP1") + 2), collapse = "")
temp_text <- paste0(temp_text,
">GROUP", g, " GNAME = '", group_info$name[g], "', ",
"LENGTH = ", data_output$num_of_items, ",\n", tab,
"INUMBER = (1(1)", data_output$num_of_items, ")")
temp_text <- paste0(temp_text, ";\n")
}
text <- c(text, temp_text)
### Formal Statement ###
text <- c(text, paste0(data_output$formal_statement, "\n"))
#########################################@###
################### CALIB ###############@###
#########################################@###
temp_text <- paste0(">CALIB ")
tab <- paste0(rep(" ", nchar("CALIB") + 2), collapse = "")
# Add NQPT
temp_text <- paste0(temp_text, "NQPT = ", num_of_quadrature)
# Add CYCLES
temp_text <- paste0(temp_text, ",\n", tab, "CYCLES = ", max_em_cycles)
# Add NEWTON
temp_text <- paste0(temp_text, ",\n", tab, "NEWTON = ", newton)
# Add CRIT
temp_text <- paste0(temp_text, ",\n", tab, "CRIT = ", criterion)
# If model is Rasch, add "RASCH" keyword to calib_options, also make sure
# "RASCH" keyword appears in calib_options only once.
if (model == "Rasch") {
calib_options <- c(calib_options["rasch" != tolower(calib_options)],
"RASCH")
}
# Add IDIST
# If prior_ability is not NULL and group_var is not NULL, and "IDIST = 2" is
# not in the calib_options, then add it.
if (!is.null(prior_ability) &&
!is.null(group_var) &&
!any(grepl("idist[ ]?=", tolower(calib_options))))
calib_options <- c(calib_options, "IDIST = 2")
# Add READPRIOR to calib_options if prior_ip is not NULL and valid:
prior_ip_names <- c("ALPHA", "BETA", "SMU", "SSIGMA", "TMU", "TSIGMA")
if (!is.null(prior_ip) &&
is.list(prior_ip) &&
!is.null(names(prior_ip)) &&
any(toupper(names(prior_ip)) %in% prior_ip_names)
) {
calib_options <- c(calib_options, "READPRI")
# if "READPRIOR" is not already in calib_options, add it
if (!any(grepl("*readpri", tolower(calib_options))))
calib_options <- c(calib_options, "READPRI")
}
# Add calib_options
if (length(calib_options) > 0)
for (i in calib_options) temp_text <- paste0(temp_text, ",\n", tab, i)
# Add REFERENCE
if (!is.null(reference_group) && num_of_groups > 1) {
if (length(reference_group) == 1) {
temp_text <- paste0(
temp_text, ",\n", tab, "REFERENCE = ",
group_info$code[group_info$name == reference_group])
}
}
temp_text <- paste0(temp_text, ";\n")
text <- c(text, temp_text)
#########################################@###
################### QUADS (for CALIB) ###@###
#########################################@###
if (!is.null(prior_ability) &&
!is.null(group_var) &&
any(grepl("idist[ ]?=[ ]?[12]", tolower(calib_options)))) {
# Check whether prior_ability list is valid. It should be:
# (1) a list
# (2) it's length should be equal to the number of groups.
# (3) Each element should have two numeric elements named "points" and
# "weights".
# (4) Ideally quadrature points are from lowest to the highest
# (5) Weights should be positive fractions and summing up to 1.
if (
!inherits(prior_ability, "list") ||
length(prior_ability) != num_of_groups ||
!all(sapply(prior_ability, inherits, "list")) ||
# Name of the each list element should be equal to the group names
!all(names(lapply(prior_ability, names)) %in% group_info$name) ||
# Each list element should have two named numeric vectors: weights and
# points
!all(sapply(prior_ability, function(i) all(c("points", "weights") %in%
names(i)))) ||
# Both weights and points should be numeric vectors
!all(sapply(prior_ability, function(i) all(sapply(i, is.numeric) &
!sapply(i, is.matrix))))
)
stop(paste0(
"Invalid 'prior_ability' argument. 'prior_ability' should satisfy ",
"the following conditions: \n",
"(1) it should be a list with ", num_of_groups, " elements;\n",
"(2) Each element should be a list object; \n",
"(3) Name of the each list element should be equal to the ",
"group names: ", paste0(group_info$name, collapse = ", "), ";\n",
"(4) Each list element should have two named numeric vectors: ",
"'list(points = ..., weights = ...)'."
))
for (g in seq_len(num_of_groups)) {
tab <- paste0(rep(" ", nchar("QUAD1") + 2), collapse = "")
temp_g <- prior_ability[[group_info$name[g]]]
temp_g$points <- gsub("e", "E", format(
temp_g$points / sum(temp_g$weights), scientific = TRUE))
temp_g$weights <- gsub("e", "E", format(
temp_g$weights / sum(temp_g$weights), scientific = TRUE))
# temp_text <- wrap_text(paste0(
# temp_text, ">QUAD", g,
# " POINTS = (", paste0(temp_g$points, collapse = " "), "),\n", tab,
# "WEIGHTS = (", paste0(temp_g$weights, collapse = " "), ")"),
# tab = tab)
temp_text <- wrap_text(paste0(
">QUAD", g,
" POINTS = (", paste0(temp_g$points, collapse = " "), "),"),
tab = tab, skip_tab = 1)
text <- c(text, temp_text)
temp_text <- wrap_text(paste0(
"WEIGHTS = (", paste0(temp_g$weights, collapse = " "), ")"),
tab = tab, skip_tab = 0)
temp_text <- paste0("\n", temp_text, ";\n")
# cat(temp_text)
text <- c(text, temp_text)
}
}
#########################################@###
################### PRIORS ##############@###
#########################################@###
if (!is.null(prior_ip)) {
if (!is.list(prior_ip) || is.null(names(prior_ip)) ||
!any(toupper(names(prior_ip)) %in% prior_ip_names)) {
stop("Invalid 'prior_ip'. Please provide a valid named list object. ",
"See the examples at '?est_bilog'.", call. = FALSE)
}
tab <- paste0(rep(" ", nchar("PRIORS") + 2), collapse = "")
names(prior_ip) <- toupper(names(prior_ip))
# prior_ip <- c(prior_ip, B = 12, SMU = 1)
# prior_ip <- list(aLPHA = 1, BETA = 2, B = 12, SMU = 1)
# prior_ip <- list(aLPHA = 1)
prior_ip <- prior_ip[names(prior_ip) %in% prior_ip_names]
temp_text <- paste0(">PRIORS ")
for (i in names(prior_ip)) {
if (is_single_value(prior_ip[[i]], class = c("numeric", "integer"))) {
temp <- paste0(i, " = (", prior_ip[[i]], "(0)",
data_output$num_of_items, ")")
if (i == names(prior_ip)[1]) {
temp_text <- paste0(temp_text, temp, ",\n")
} else {
temp_text <- paste0(temp_text, tab, temp, ",\n")
}
} else {
stop("Invalid 'prior_ip'. All elements of the list should be a ",
"single numeric value. ",
"See the examples at '?est_bilog'.", call. = FALSE)
}
}
text <- c(text, gsub(",\\n$", ";\n", temp_text))
}
#########################################@###
################### SCORE ###############@###
#########################################@###
if (!is.null(scoring_options) && length(scoring_options) > 0) {
temp_text <- paste0(">SCORE ")
tab <- paste0(rep(" ", nchar("SCORE") + 2), collapse = "")
for (i in 1:length(scoring_options)) {
temp_text <- paste0(temp_text, ifelse(i == 1, "", paste0(",\n", tab)),
scoring_options[i])
}
temp_text <- paste0(temp_text, ";\n")
text <- c(text, temp_text)
}
########################################################################@###
################### RUN CALIBRATION ####################################@###
########################################################################@###
result$syntax <- paste0(text, collapse = "\n")
command <- paste0(
"cd ", normalizePath(target_dir), " && \"",
file.path(bilog_exe_folder, "blm1.exe"), "\" ", analysis_name ,
" NUM=8900 CHAR=2000"
)
if (model != "CTT") {
command <- paste0(
command, " && \"",
file.path(bilog_exe_folder, "blm2.exe"), "\" ", analysis_name ,
" NUM=8900 CHAR=2000 && \"",
file.path(bilog_exe_folder, "blm3.exe"), "\" ", analysis_name,
" NUM=8900 CHAR=2000"
)
}
if (overwrite || !file.exists(par_file)) {
if (!all(file.exists(file.path(bilog_exe_folder,
paste0("blm", 1:3, ".exe"))))) {
stop(paste0("The required BILOG-MG executable files does not exist at ",
"\"", bilog_exe_folder, "\"."),
call. = FALSE)
}
writeLines(text = text, con = target_path, sep = "")
system("cmd.exe", input = command, wait = TRUE,
show.output.on.console = show_output_on_console)
if (show_output_on_console) cat("\n")
}
} else if (file.exists(target_path)) {
group_info <- bilog_create_group_info(x = x, group_var = group_var)
num_of_groups <- group_info$num_of_groups
group_info <- group_info$group_info
result$syntax <- paste0(readLines(con = target_path, skipNul = TRUE),
collapse = "\n")
}
# Determine whether the model converged or not:
result$converged <- check_bilog_convergence(par_file = par_file,
ph2_file = ph2_file,
ph3_file = ph3_file)
if (file.exists(ph2_file)) {
ph2_content <- readLines(con = ph2_file)
temp_pattern <- paste0(
"^ CYCLE([[:blank:]]*)([[:digit:]]*);([[:blank:]]*)",
"LARGEST CHANGE=([[:blank:]]+)([0-9]*?\\.?[0-9]*?)(.*)$")
temp <- utils::tail(
ph2_content[grepl(pattern = temp_pattern, ph2_content)], 1)
result$cycle <- as.integer(gsub("^ CYCLE (.*);(.*) LARGEST CHANGE= (.*)$",
replacement = "\\1", temp))
result$largest_change <- as.numeric(gsub(temp_pattern,
replacement = "\\6", temp))
# If the result converged get the -2 Log Likelihood
if (result$converged) {
result$neg_2_log_likelihood <- as.numeric(
gsub("^ -2 LOG LIKELIHOOD = ", "",
utils::tail(ph2_content[grepl(pattern = "^ -2 LOG LIKELIHOOD = ",
ph2_content)],1)))
# Get posterior quadrature points and weights:
result$posterior_dist <- bilog_read_posterior_dist(
ph2_file = ph2_file, group_info = group_info)
} else result$neg_2_log_likelihood <- NULL
}
# Read item parameters
if (model == "CTT") {
result <- result[-which(names(result) %in%
c("ip", "converged", "failed_items", "score"))]
} else {
par_results <- bilog_read_pars(
par_file = par_file,
ph1_file = ph1_file,
items = items,
model = ifelse("COMMON" %in% calib_options, "3PL", model),
D = D)
result$ip <- par_results$ip
result$failed_items <- par_results$failed_items
}
result$ctt <- bilog_read_ctt(ph1_file)
# Check if there is valid group means
if (model != "CTT" && file.exists(ph3_file) && result$converged) {
if (num_of_groups > 1) {
result$group_info <- bilog_read_group_means(
ph3_file, group_info = group_info)
# In case reading directly from the Bilog output and data is not available
} else if (!is.null(result$ctt$group)) {
result$group_info <- bilog_read_group_means(
ph3_file, group_info = NULL)
}
}
if (model != "CTT" && !is.null(scoring_options) &&
(length(scoring_options) > 0) && result$converged) {
result$score <- bilog_read_scores(score_file, x = x,
examinee_id_var = examinee_id_var,
group_var = group_var)
}
attr(result, "class") <- "bilog_output"
saveRDS(object = result, file = result_fn)
return(result)
}
###############################################################################@
############################# print.bilog_output ###############################
###############################################################################@
#' Prints \code{bilog_output} objects
#'
#' @param x A \code{bilog_output} object.
#' @param ... further arguments passed to or from other methods.
#'
#' @keywords internal
#'
#' @export
#'
#' @importFrom utils head str
#'
#' @author Emre Gonulates
#'
#' @method print bilog_output
#'
print.bilog_output <- function(x, ...) {
y <- x
cat(paste0("Use `cat(..$syntax)` to view the syntax.\n"))
# Print Input
cat(paste0("\n$input\n"))
cat(str(x$input, give.attr = FALSE, no.list = TRUE, give.head = FALSE,
comp.str = "$"))
# Other arguments
class(y) <- NULL
y$ip <- NULL
y$group_info <- NULL
y$score <- NULL
y$input <- NULL
y$ctt <- NULL
y$syntax <- NULL
y$converged <- NULL
cat(paste0("\nAdditional output:\n"))
cat(str(y, give.attr = FALSE, no.list = TRUE, give.head = FALSE,
comp.str = "$"))
# CTT
if (is.data.frame(x$ctt)) {
cat(paste0("\n$ctt\n"))
print(head(x$ctt, 5))
cat(paste0("# ... \n"))
} else {
cat(paste0("\n$ctt\n"))
cat(str(x$ctt, no.list = TRUE, give.head = FALSE, give.attr = FALSE,
max.level = 1, comp.str = "$"))
}
if (x$converged) {
# Score
if (!is.null(x$score)) {
cat(paste0("\n$score\n"))
print(head(x$score, 5))
cat(paste0("# ... with ", nrow(x$score) - 5, " more examinees\n"))
}
# group_info
if (!is.null(x$group_info)) {
cat(paste0("\n$group_info\n"))
print(x$group_info)
}
# IP
cat(paste0("\n$ip\n"))
print(x$ip, n = min(x$ip$n$items, 5))
}
cat(paste0("\nConverged: ", x$converged, "\n"))
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.