#' Fit latent-class MPT models
#'
#' This function estimates one or more latent-class MPT models.
#' Starting from a one-class solution, it increments the number of latent classes
#' until a criterion is met.
#'
#' @param model_file Character. File path to an .eqn file
#' @param data_file Character. File path to a data file
#' @param classes,max_classes The number of classes for a single model or the
#' maximum number of classes. To estimate a single model, specify \code{classes};
#' to estimate multiple models with increasing number of classes, specify \code{max_classes}.
#' @param runs Integer. Number of optimization runs.
#' @param fisher_information The type of Fisher Information to be computed.
#' Can be either \code{expected}, \code{montecarlo}, \code{observed}, or \code{none}.
#' Defaults to \code{expected}. However, if expected Fisher Information cannot be
#' computed, the Monte Carlo method is used.
#' @param montecarlo_samples The number of simulations to be used for Monte Carlo
#' Fisher Information. Defaults to 1e5.
#' @param crit If multiple models with an increasing number of classes are to be estimated,
#' use this criterion to stop estimating more complex models. Can be one of \code{"AIC"},
#' \code{"BIC"}, or \code{"BIC2"}.
#' @param verbose Logical. Indicating whether an announcement is printed on the
#' console when fitting of a new model starts.
#' @param keep_files Logical. Should temporary files be retained?
#'
#' @references
#' Stahl, C., & Klauer, K.C. (2007). HMMTree: A computer program for hierarchical
#' multinomial processing tree models. \emph{Behavior Research Methods}, \emph{39}, 267-273.
#'
#' @export
lc <- function(
model_file
, data_file
, classes = NULL
, max_classes = 20
, runs = 20
, fisher_information = "expected"
, montecarlo_samples = 1e5
, crit = "AIC"
, verbose = FALSE
, keep_files = FALSE
){
# Check if running on Windows
if(!Sys.info()[["sysname"]]=="Windows") {
stop("Sorry, but model estimation only works on Windows machines. We know this must be disappointing.")
}
# Input validation ----
if (!(crit %in% c("AIC","BIC", "BIC2"))) {
stop("Parameter crit must be one of 'AIC', 'BIC', or 'BIC2'.")
}
# Create legacy eqn file
eqn_file <- paste0("HMMTreeR-tmpfile-", basename(model_file))
eqn_conv_out <- simplify_eqn(model_file = model_file, eqn_file = eqn_file)
# Create HMMTree data file (tab-separated, first column is dataset name)
dataset_name <- gsub(basename(data_file), pattern = ".dat|.csv|.txt| ", replacement = "")
dat_file <- paste0("HMMTreeR-tmpfile-", dataset_name, ".dat")
if(grepl(x = data_file, pattern = ".csv")) {
tmp_dat <- utils::read.csv(file = data_file)
if(any(grepl(x = tmp_dat[, 1], pattern = ";"))) {
tmp_dat <- utils::read.csv2(file = data_file, check.names = FALSE)
# print(tmp_dat)
}
} else {
if(grepl(x = data_file, pattern = ".dat")) {
tmp_dat <- utils::read.delim(file = data_file, check.names = FALSE)
}
}
# print(tmp_dat)
if(!all(eqn_conv_out$cat_order %in% colnames(tmp_dat))) {
stop("Some categories defined in the .eqn file are missing from data.")
}
# try_conv <- suppressWarnings(as.integer(eqn_conv_out$cat_order))
# print(eqn_conv_out$cat_order)
# print(colnames(tmp_dat))
# if(any(is.na(try_conv))) {
tmp_dat <- tmp_dat[, eqn_conv_out$cat_order]
tmp_dat <- cbind(
data.frame(
id = dataset_name
)
, tmp_dat
)
# }
utils::write.table(
x = tmp_dat
, file = dat_file
, sep = "\t"
, row.names = FALSE
, col.names = TRUE
, quote = FALSE
)
nsubj <- nrow(tmp_dat)
res <- list()
# test fit: is (co)variance accounted for by single-class model?
#pval <- pchisq(q=as.numeric(res[[nclass]]$S1), df=as.numeric(res[[nclass]]$S1_df), lower.tail=FALSE)
#if ((length(pval)>0) || (pval < .05)){ # aggregate does not fit, estimate latent-class models
if(!is.null(classes)) {
# fit only one solution
res <- model_object(
hmmtreec(
model_file = eqn_file
, data_file = dat_file
, nsubj = nsubj
, nclass = classes
, nruns = runs
, fi = switch(fisher_information, "expected" = 3L, "montecarlo" = 2L, "observed" = 1L, "none" = 0L)
, mc = montecarlo_samples
, keep_files = keep_files
)
)
} else {
# Estimate more complex models until criterion is reached
n_classes <- 1L
repeat {
if(verbose) {
if(n_classes == 1L) {
message("Fitting a latent-class MPT with one class (as a baseline).")
} else {
message("Fitting a latent-class MPT with ", n_classes, " classes.")
}
}
tmp_hmmtreec <- try(
hmmtreec(
model_file = eqn_file
, data_file = dat_file
, nsubj = nsubj
, nclass = n_classes
, nruns = runs
, fi = switch(fisher_information, "expected" = 3, "montecarlo" = 2, "observed" = 1, "none" = 0)
, mc = montecarlo_samples
, keep_files = keep_files
)
)
if(class(tmp_hmmtreec) == "try-error") {
break
}
tmp <- model_object(tmp_hmmtreec)
# Stop estimating more complex models if failcodes occur
if(tmp$description$failcode!=0) {
break
}
# Stop if negative degrees of freedom occur (seems to be a byproduct of ill-conditioned FI)
if(any(tmp$fit_statistics[c("df_M1", "df_M2", "df_S1", "df_S2")]<0)) {
break
}
# Stop estimating more complex models if criterion (i.e., AIC or BIC) increased
if(n_classes > 1) {
crit_new <- tmp$fit_statistics[[crit]]
crit_old <- res[[n_classes-1]]$fit_statistics[[crit]]
if(isTRUE(crit_new > crit_old)) break
}
res[[n_classes]] <- tmp
n_classes <- n_classes + 1
if(n_classes > max_classes) {
break
}
}
class(res) <- c("lc_mpt_list", class(res))
}
# remove temporary files
if(!keep_files) {
file.remove(eqn_file, dat_file)
}
# return
res
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.