#' @title function to estimate generalized ordered choice model
#'
#' @author X.PAN
#'
#' @description estimate ordered choice model
#'
#' @export X.order
#'
#' @importFrom rlang :=
#'
#' @param data A tibble, input data.
#'
#' @param choice A character, name of column indicating individuals' rate.
#'
#' @param rate A vector of characters, the scale that used ot rate an
#' alternative.
#'
#' @param attrs A list contains only one slice, whose elements have to be positive
#' integers. The only one slice is a tibble, named "attrs_alts",
#' indicating the all attributes (including context variables). The column
#' names in the tibbles represent the name of attributes. All have
#' same numbers of rows (elements) with the length of argument "rate" minus
#' one. The element in the tibble indicates if the attribute is
#' alternative-specific: if some elements in one column have a same value,
#' then the corresponding segments share a generic parameter in terms of this
#' attribute.
#'
#' @param attr_coding A vector of character, names of categorical attributes.
#' Default = NULL, which means all attributes are continuous.
#'
#' @param attr_level A list of vector of character, representing the levels of
#' all categorical attributes. The first element of a vector is set as base
#' level, others are set as 1, 2, 3...in sequence. This argument is only
#' relevant when argument "attr_coding" is not NULL. Default = NULL.
#'
#' @param interact A vector of character, name of attributes' interaction,
#' connected by "*". Default = NULL.
#'
#' @param type A character, indicating which type of ordered model is used,
#' either "gologit" or "goprobit". Default = "gologit".
#'
#' @param method A character, passed to the function maxLik() in "maxLik"
#' package. It indicates the method used in maximum likelihood estimation.
#' Default = "BFGS".
#'
#' @param estimator A argument in the function maxLik() from "maxLik"
#' package. Here this argument could only be either TRUE or "BHHH"/"bhhh".
#' Default = TRUE.
#'
#' @param param_fixed A vector of characters, passed to the function maxLik() in
#' "maxLik" package. It indicates which parameters are fixed. Default = NULL.
#'
#' @param param_start A vector of numbers, passed to the function maxLik() in
#' "maxLik" package. It indicages the initial values of parameters.
#' Default = NULL.
#'
X.order <- function(data, choice, rate, attrs, attr_coding = NULL,
attr_level = NULL, interact = NULL, type = "gologit",
method = "BFGS", estimator = TRUE,
param_fixed = NULL, param_start = NULL){
# data preparation --------------------------------------------------------
# change the column of choice to character
data <- dplyr::mutate_at(data, .vars = dplyr::vars(choice), .funs = as.character)
# change the column of choice to ordered factor
data <- dplyr::mutate_at(data, .vars = dplyr::vars(choice),
.funs = ~factor(., levels = rate, ordered = TRUE))
# change the column of choice to numeric starting with 1
data <- dplyr::mutate_at(data, .vars = dplyr::vars(choice), .funs = as.numeric)
# change the start number to 0
data[choice] <- data[choice] - 1
# update the argument "rate", which starts with 0
rate <- as.character(0:(length(rate) - 1))
# to add an extra row for each choice task
rate <- c(rate, "pos.inf")
# data preparation and return the data set can be used and the utility formula
process_data <- L.data(data = data, choice = choice, alts = rate,
attrs = attrs, attr_coding = attr_coding,
attr_level = attr_level, interact = interact,
avi = NULL, flag = "order")
# get the data set
data <- process_data[[1]]
# get the utility formula
utility <- process_data[[2]]
# thresholds setting
process_data <- L.order(data = data, rate = rate, choice = choice)
# update the data set by adding the thresholds
data <- process_data[[1]]
# to get the threshold columns
threshold <- process_data[[2]]
# get the data that needs to model estimation
df <- stats::model.frame(utility, data)
# get respondents' choice
y <- df[[1]]
# get the attributes columns
x <- as.matrix(df[, -1])
# get the names of attributes
name_param <- names(df[, -1])
# get the number of paramters/attributes to be estimated
Nparam <- length(name_param)
# set the initial values of parameters as 0
beta <- rep(0, Nparam)
# name the parameters
names(beta) <- name_param
# get the threshold set
df <- stats::model.frame(threshold, data)
# get the threshold column
x_thd <- as.matrix(df[, -1])
# get the names of thresholds
name_param <- names(df[, -1])
# get the number of thresholds
Nparam_thd <- length(name_param)
# set the initial values of thresholds
beta_thd <- seq(1, Nparam_thd)
# name the threshold parameters
names(beta_thd) <- name_param
# get all fixed parameters involving the -Inf and Inf thresholds
param_fixed_all <- c("thd.0", name_param[length(name_param)], param_fixed,
names(beta)[stringr::str_detect(names(beta), "pos.inf")])
# update the parameter to be estimated by adding the thresholds
beta <- c(beta, beta_thd)
beta["thd.0"] <- 1
beta[name_param[length(name_param)]] <- 1
# get the length of rating
Nalt <- length(rate) - 1
# get the number of observations
Nobs <- nrow(df) / length(rate)
# get the post and previous choice
choice_pre <- df[ ,1]
choice_post <- dplyr::lag(choice_pre)
choice_post[1] <- FALSE
# get the model type and the name of distributions to be used
if(type == "gologit"){
fun <- stats::plogis
model_name <- "generalized ordered logit"
} else if(type == "goprobit"){
fun <- stats::pnorm
model_name <- "generalized ordered probit"
} else stop("Undefined model type!")
# model estimation --------------------------------------------------------
# re-set the initial values of certain parameters
beta[names(param_start)] <- param_start
start_time <- Sys.time()
cat(as.character(start_time), "- model estimation starts\n")
res <- maxLik::maxLik(logLik = logLik.order,
start = beta,
method = method,
fixed = param_fixed_all,
finalHessian = estimator,
control = list(iterlim = 1000),
attr = x, attr_thd = x_thd,
choice_pre = choice_pre, choice_post = choice_post,
chid = data$obs.id, fun = fun,
Nparam = Nparam, Nparam_all = length(beta))
end_time <- Sys.time()
cat(as.character(end_time), "- model estimation ends\n")
# goodness of fit and return it -------------------------------------------
L.gof(res = res, Nalt = Nalt, Nobs = Nobs,
Nparam = length(beta) - length(param_fixed_all),
param_fixed = param_fixed,
param_fixed_all = param_fixed_all,
name = model_name,
flag = "order",
start_time = start_time, end_time = end_time, estimator = estimator)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.