Nothing
#' @title The sequential logistic regression model for multi-classification
#' problem under the ordinal case.
#'
#' @description \code{seq_ord_model} chooses the subjects sequentially by the
#' logistic regression model for ordinal case
#'
#' @details The \code{\link{seq_ord_model}} function and
#' \code{\link{seq_cat_model}} function are very similar. seq_ord_model is
#' also a multinomial logistic regression model but under the ordinal case
#' that estimate the coefficient variables and determines the samples given
#' the fixed size confidence set. seq_ord_model selects the sample in the same
#' way as seq_cat_model: both are two methods. The details about the selecting
#' methoed in seq_ord_model please refer to the \code{\link{seq_cat_model}}
#' function.
#' @param labeled_ids A numeric vector for the unique identification of the
#' labeled dataset
#' @param unlabeled_ids A numeric vector for the unique identification of the
#' unlabeled dataset
#' @param splitted A list containing the datasets which we will use in the
#' cordinl case. Note that the element of the data_split is the samples from
#' Classes k-1and Classes k
#' @param newY A numeric number denotes the value of the labels from 0 to K
#' which is the number of categories
#' @param train A matrix for the labeled samples. Note that the indices of the
#' samples in the train dataset is the same as the labeled_ids
#' @param data A matrix denotes all the data including the labeled samples and
#' the unlabeled samples. Note that the first column of the dataset is the
#' response variable, that's the labels and the rest is the explanatory
#' variables.
#' @param d A numeric number specifying the length of the fixed size confidence
#' set for our model. The default value is 0.8.
#' @param adaptive A character string that determines the sample selection
#' criterion to be used, matching one of 'random' or 'A_optimal'. The default
#' value is 'random'.
#' @export
#' @return a list containing the following components
#' \item{d}{the length of the fixed size confidence set that we specify}
#' \item{n}{the current sample size when the stopping criterion is satisfied}
#' \item{is_stopped}{the label of sequential iterations stop or not. When the
#' value of is_stopped is TRUE, it means the iteration stops}
#' \item{beta_est}{the estimated coeffificent when the criterion is safisfied}
#' \item{cov}{the covariance matrix between the estimated parameters}
#' \item{adaptive}{the sample selection criterion we used }
#'
#' @references {
#' Li, J., Chen, Z., Wang, Z., & Chang, Y. I. (2020). Active learning in
#' multiple-class classification problems via individualized binary models.
#' \emph{Computational Statistics & Data Analysis}, 145, 106911.
#' doi:10.1016/j.csda.2020.106911
#' }
#'
#' @seealso{
#' \code{\link{seq_cat_model}} for categorical case
#'
#' \code{\link{seq_bin_model}} for binary classification case
#'
#' \code{\link{seq_GEE_model}} for generalized estimating equations case.
#'
#'}
#'
#'
#' @examples
#' # generate the toy example
#' beta <- matrix(c(1,2,1,-1,1,2), ncol=2)
#' res <- gen_multi_data(beta, N = 10000, type = 'ord', test_ratio = 0.3)
#' train_id <- res$train_id
#' train <- res$train
#' test <- res$test
#' res <- init_multi_data(train_id, train, init_N = 300, type = 'ord')
#' splitted <- res$splitted
#' train <- res$train
#' newY <- res$newY
#' labeled_ids <- res$labeled_ids
#' unlabeled_ids <- res$unlabeled_ids
#' data <- res$data
#'
#' # use seq_ord_model to multi-classification problem under the ordinal case.
#' # You can remove '#' to run the command.
#' # start_time <- Sys.time()
#' # logitA_ord <- seq_ord_model(labeled_ids, unlabeled_ids, splitted, newY,
#' # train, data, d = 0.5, adaptive = "A_optimal")
#' # logitA_ord$time <- as.numeric(Sys.time() - start_time, units = "mins")
#' # print(logitA_ord)
seq_ord_model <- function(labeled_ids, unlabeled_ids, splitted, newY, train, data, d = 0.8, adaptive="random") {
i <- length(labeled_ids)
model <- function(df) glm(Y ~ . -1, data = df, family = "binomial")
models <- lapply(splitted, model)
beta_list <- lapply(models, `[[`, "coefficients")
p <- length(beta_list[[1]])
beta_mat <- matrix(unlist(beta_list), nrow = p)
while (TRUE) {
beta_mat <- logit_model_ord(splitted, newY, beta_mat)
cov <- getWH_ord(train, beta_mat)
eigen_min <- cov$eigen_min
K <- dim(beta_mat)[2]
p <- dim(beta_mat)[1]
Kp <- K * p
d <- d
is_stopped <- (eigen_min > qchisq(df=Kp, 0.95) / d^2)
N <- nrow(data)
if (!is_stopped && i < N) {
if (adaptive == "random") {
ind <- sample(1:length(unlabeled_ids), 1)
}
if (adaptive == "A_optimal") {
ind <- A_optimal_ord(data[,-1], beta_mat,
cov$W, unlabeled_ids)
}
res <- update_data_ord(ind, splitted, data, train, labeled_ids, unlabeled_ids)
splitted <- res$splitted
newY <- res$newY
train <- res$train
labeled_ids <- res$labeled_ids
unlabeled_ids <- res$unlabeled_ids
i <- i + 1
} else {
results <- list(d = d,
n = i,
is_stopped = is_stopped,
beta_est = beta_mat,
cov = cov$inv_sigma,
adaptive = adaptive)
class(results) <- c('seqmulti','list')
return(results)
}
}
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.