Nothing
#' Random k-labelsets for multilabel classification
#'
#' Create a RAkEL model for multilabel classification.
#'
#' RAndom k labELsets is an ensemble of LP models where each classifier is
#' trained with a small set of labels, called labelset. Two different strategies
#' for constructing the labelsets are the disjoint and overlapping labelsets.
#'
#' @family Transformation methods
#' @family Powerset
#' @param mdata A mldr dataset used to train the binary models.
#' @param base.algorithm A string with the name of the base algorithm. (Default:
#' \code{options("utiml.base.algorithm", "SVM")})
#' @param k The number of labels used in each labelset. (Default: \code{3})
#' @param m The number of LP models. Used when overlapping is TRUE, otherwise it
#' is ignored. (Default: \code{2 * length(labels)})
#' @param overlapping Logical value, that defines if the method must overlapping
#' the labelsets. If FALSE the method uses disjoint labelsets.
#' (Default: \code{TRUE})
#' @param ... Others arguments passed to the base algorithm for all subproblems.
#' @param cores The number of cores to parallelize the training. Values higher
#' than 1 require the \pkg{parallel} package. (Default:
#' \code{options("utiml.cores", 1)})
#' @param seed An optional integer used to set the seed. This is useful when
#' the method is running in parallel. (Default:
#' \code{options("utiml.seed", NA)})
#' @return An object of class \code{RAkELmodel} containing the set of fitted
#' models, including:
#' \describe{
#' \item{labels}{A vector with the label names.}
#' \item{labelsets}{A list with the labelsets used to build the LP models.}
#' \item{model}{A list of the generated models, named by the label names.}
#' }
#' @references
#' Tsoumakas, G., Katakis, I., & Vlahavas, I. (2011). Random k-labelsets for
#' multilabel classification. IEEE Transactions on Knowledge and Data
#' Engineering, 23(7), 1079-1089.
#' @export
#'
#' @examples
#' model <- rakel(toyml, "RANDOM")
#' pred <- predict(model, toyml)
#' \donttest{
#' ## SVM using k = 4 and m = 100
#' model <- rakel(toyml, "SVM", k=4, m=100)
#'
#' ## Random Forest using disjoint labelsets
#' model <- rakel(toyml, "RF", overlapping=FALSE)
#' }
rakel <- function (mdata,
base.algorithm = getOption("utiml.base.algorithm", "SVM"),
k = 3, m = 2 * mdata$measures$num.labels, overlapping = TRUE,
..., cores = getOption("utiml.cores", 1),
seed = getOption("utiml.seed", NA)) {
# Validations
if (!is(mdata, "mldr")) {
stop("First argument must be an mldr object")
}
# RAkEL Model class
rkmodel <- list(
labels = rownames(mdata$labels),
overlapping = overlapping,
k = k,
m = ifelse(overlapping, m, ceiling(mdata$measures$num.labels / k)),
labelsets = list(),
call = match.call()
)
if (!anyNA(seed)) {
set.seed(seed)
}
if (overlapping) {
#RAkEL overllaping
rkmodel$labelsets <- lapply(seq(rkmodel$m), function(i) {
sample(rkmodel$labels, k)
})
#TODO validate if all labels are used
} else {
#RAkEL disjoint
labels <- rkmodel$labels
for (i in seq(rkmodel$m)) {
labelset <- sample(labels, min(k, length(labels)))
rkmodel$labelsets[[length(rkmodel$labelsets) + 1]] <- labelset
labels <- setdiff(labels, labelset)
}
}
lbl.index <- mdata$measures$num.inputs
rkmodel$models <- utiml_lapply(rkmodel$labelsets, function (labels) {
data <- mldr::mldr_from_dataframe(
cbind(mdata$dataset[mdata$attributesIndexes], mdata$dataset[labels]),
seq(lbl.index + 1, lbl.index + length(labels)),
name = mdata$name
)
lp(data, base.algorithm = base.algorithm, ...)
}, cores, seed)
class(rkmodel) <- "RAkELmodel"
rkmodel
}
#' Predict Method for RAkEL
#'
#' This function predicts values based upon a model trained by
#' \code{\link{rakel}}.
#'
#' @param object Object of class '\code{RAkELmodel}'.
#' @param newdata An object containing the new input data. This must be a
#' matrix, data.frame or a mldr object.
#' @param probability Logical indicating whether class probabilities should be
#' returned. (Default: \code{getOption("utiml.use.probs", TRUE)})
#' @param ... Others arguments passed to the base algorithm prediction for all
#' subproblems.
#' @param cores The number of cores to parallelize the prediction. Values higher
#' than 1 require the \pkg{parallel} package. (Default:
#' \code{options("utiml.cores", 1)})
#' @param seed An optional integer used to set the seed. This is useful when
#' the method is run in parallel. (Default: \code{options("utiml.seed", NA)})
#' @return An object of type mlresult, based on the parameter probability.
#' @seealso \code{\link[=rakel]{Random k Labelsets (RAkEL)}}
#' @export
#'
#' @examples
#' model <- rakel(toyml, "RANDOM")
#' pred <- predict(model, toyml)
predict.RAkELmodel <- function(object, newdata,
probability = getOption("utiml.use.probs", TRUE),
..., cores = getOption("utiml.cores", 1),
seed = getOption("utiml.seed", NA)) {
# Validations
if (!is(object, "RAkELmodel")) {
stop("First argument must be a RAkELmodel object")
}
previous.value <- getOption("utiml.empty.prediction")
options(utiml.empty.prediction = TRUE)
newdata <- utiml_newdata(newdata)
results <- utiml_lapply(object$models, function (lpmodel){
predict.LPmodel(lpmodel, newdata)
}, cores, seed)
if (object$overlapping) {
nvotes <- as.numeric(table(unlist(object$labelsets))[object$labels])
votes <- matrix(0, nrow=nrow(newdata), ncol=length(nvotes),
dimnames = list(rownames(newdata), object$labels))
for (result in results) {
votes[, colnames(result)] <- votes[, colnames(result)] +
as.bipartition(result)
}
prediction <- as.mlresult(t(t(votes) / nvotes), probability, threshold=0.5)
rm(votes, nvotes)
} else {
prediction <- multilabel_prediction(
do.call(cbind, lapply(results, as.bipartition))[,object$labels],
do.call(cbind, lapply(results, as.probability))[,object$labels],
probability
)
}
rm(results)
options(utiml.empty.prediction = previous.value)
prediction
}
#' Print RAkEL model
#' @param x The rakel model
#' @param ... ignored
#'
#' @return No return value, called for print model's detail
#'
#' @export
print.RAkELmodel <- function(x, ...) {
cat("RAkEL",ifelse(x$overlapping, "Overlapping", "Disjoint"), "Model")
cat("\n\nCall:\n")
print(x$call)
cat("\nLabelsets size:",x$k,"\n")
cat(x$m, "LP Models. Labelsets:\n")
print(do.call(rbind, lapply(x$labelsets, function (v) {
length(v) <- x$k
v
})))
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.