#' @title Get a summarizing task description.
#'
#' @description See title.
#' @template arg_task_or_desc
#' @return ret_taskdesc
#' @export
#' @family task
getTaskDesc = function(x) {
UseMethod("getTaskDesc")
}
#' @export
getTaskDesc.default = function(x) {
# FIXME: would be much cleaner to specialize here
x$task.desc
}
#' @export
getTaskDesc.TaskDesc = function(x) {
x
}
#' Deprecated, use \code{\link{getTaskDesc}} instead.
#' @inheritParams getTaskDesc
#' @export
getTaskDescription = function(x) {
.Deprecated("getTaskDesc")
getTaskDesc(x)
}
#' @title Get the type of the task.
#'
#' @description See title.
#' @template arg_task_or_desc
#' @return [\code{character(1)}].
#' @export
#' @family task
getTaskType = function(x) {
getTaskDesc(x)$type
}
#' @title Get the id of the task.
#'
#' @description See title.
#' @template arg_task_or_desc
#' @return [\code{character(1)}].
#' @export
#' @family task
getTaskId = function(x) {
getTaskDesc(x)$id
}
#' @title Get the name(s) of the target column(s).
#'
#' @description
#' NB: For multilabel, \code{\link{getTaskTargetNames}} and \code{\link{getTaskClassLevels}}
#' actually return the same thing.
#'
#' @template arg_task_or_desc
#' @return [\code{character}].
#' @export
#' @family task
getTaskTargetNames = function(x) {
UseMethod("getTaskTargetNames")
}
#' @export
getTaskTargetNames.Task = function(x) {
getTaskTargetNames(getTaskDesc(x))
}
#' @export
getTaskTargetNames.SupervisedTaskDesc = function(x) {
x$target
}
#' @export
getTaskTargetNames.UnsupervisedTaskDesc = function(x) {
character(0L)
}
#' @title Get the class levels for classification and multilabel tasks.
#'
#' @description
#' NB: For multilabel, \code{\link{getTaskTargetNames}} and \code{\link{getTaskClassLevels}}
#' actually return the same thing.
#'
#' @template arg_task_or_desc
#' @return [\code{character}].
#' @export
#' @family task
getTaskClassLevels = function(x) {
UseMethod("getTaskClassLevels")
}
#' @export
getTaskClassLevels.ClassifTask = function(x) {
getTaskClassLevels(getTaskDesc(x))
}
#' @export
getTaskClassLevels.MultilabelTask = function(x) {
getTaskClassLevels(getTaskDesc(x))
}
#' @export
getTaskClassLevels.ClassifTaskDesc = function(x) {
getTaskDesc(x)$class.levels
}
#' @export
getTaskClassLevels.MultilabelTaskDesc = function(x) {
getTaskDesc(x)$class.levels
}
#' Get feature names of task.
#'
#' Target column name is not included.
#'
#' @template arg_task
#' @return [\code{character}].
#' @family task
#' @export
getTaskFeatureNames = function(task) {
UseMethod("getTaskFeatureNames")
}
#' @export
getTaskFeatureNames.Task = function(task) {
setdiff(names(task$env$data), getTaskDesc(task)$target)
}
#' @title Get number of features in task.
#'
#' @description See title.
#' @template arg_task_or_desc
#' @return [\code{integer(1)}].
#' @export
#' @family task
getTaskNFeats = function(x) {
sum(getTaskDesc(x)$n.feat)
}
#' @title Get number of observations in task.
#'
#' @description See title.
#' @template arg_task_or_desc
#' @return [\code{integer(1)}].
#' @export
#' @family task
getTaskSize = function(x) {
getTaskDesc(x)$size
}
#' @title Get formula of a task.
#'
#' @description
#' This is usually simply \dQuote{<target> ~ .}.
#' For multilabel it is \dQuote{<target_1> + ... + <target_k> ~ .}.
#'
#' @template arg_task_or_desc
#' @param target [\code{character(1)}]\cr
#' Left hand side of the formula.
#' Default is defined by task \code{x}.
#' @param explicit.features [\code{logical(1)}]\cr
#' Should the features (right hand side of the formula) be explicitly listed?
#' Default is \code{FALSE}, i.e., they will be represented as \code{"."}.
#' @param env [\code{environment}]\cr
#' Environment of the formula.
#' Default is \code{parent.frame()}.
#' @return [\code{formula}].
#' @family task
#' @export
getTaskFormula = function(x, target = getTaskTargetNames(x), explicit.features = FALSE, env = parent.frame()) {
assertCharacter(target, any.missing = FALSE)
assertFlag(explicit.features)
assertEnvironment(env)
td = getTaskDesc(x)
type = td$type
if (type == "surv") {
target = sprintf("Surv(%s, %s, type = \"right\")", target[1L], target[2L])
} else if (type == "multilabel") {
target = collapse(target, "+")
} else if (type == "costsens") {
stop("There is no formula available for cost-sensitive learning.")
} else if (type == "cluster") {
stop("There is no formula available for clustering.")
}
if (explicit.features) {
if (!inherits(x, "Task"))
stopf("'explicit.features' can only be used when 'x' is of type 'Task'!")
features = getTaskFeatureNames(x)
} else {
features = "."
}
# FIXME in the future we might want to create formulas w/o an environment
# currently this is impossible for survival because the namespace is not imported
# properly in many packages -> survival::Surv not found
as.formula(stri_paste(target, "~", stri_paste(features, collapse = " + ", sep = " "), sep = " "), env = env)
}
#' @title Get target data of task.
#'
#' @description
#' Get target data of task.
#'
#' @template arg_task
#' @inheritParams getTaskData
#' @return A \code{factor} for classification or a \code{numeric} for regression, a data.frame
#' of logical columns for multilabel.
#' @family task
#' @export
#' @examples
#' task = makeClassifTask(data = iris, target = "Species")
#' getTaskTargets(task)
getTaskTargets = function(task, recode.target = "no") {
UseMethod("getTaskTargets")
}
#' @export
getTaskTargets.SupervisedTask = function(task, recode.target = "no") {
y = task$env$data[, task$task.desc$target, drop = TRUE]
recodeY(y, recode.target, task$task.desc)
}
#' @export
getTaskTargets.UnsupervisedTask = function(task, recode.target = "no") {
stop("There is no target available for unsupervised tasks.")
}
#' @export
getTaskTargets.CostSensTask = function(task, recode.target = "no") {
stop("There is no target available for costsens tasks.")
}
#' @title Extract data in task.
#'
#' @description
#' Useful in \code{\link{trainLearner}} when you add a learning machine to the package.
#'
#' @template arg_task
#' @template arg_subset
#' @template arg_features
#' @param target.extra [\code{logical(1)}]\cr
#' Should target vector be returned separately?
#' If not, a single data.frame including the target columns is returned, otherwise a list
#' with the input data.frame and an extra vector or data.frame for the targets.
#' Default is \code{FALSE}.
#' @param recode.target [\code{character(1)}]\cr
#' Should target classes be recoded? Supported are binary and multilabel classification and survival.
#' Possible values for binary classification are \dQuote{01}, \dQuote{-1+1} and \dQuote{drop.levels}.
#' In the two latter cases the target vector is converted into a numeric vector.
#' The positive class is coded as \dQuote{+1} and the negative class either as \dQuote{0} or \dQuote{-1}.
#' \dQuote{drop.levels} will remove empty factor levels in the target column.
#' In the multilabel case the logical targets can be converted to factors with \dQuote{multilabel.factor}.
#' For survival, you may choose to recode the survival times to \dQuote{left}, \dQuote{right} or \dQuote{interval2} censored times
#' using \dQuote{lcens}, \dQuote{rcens} or \dQuote{icens}, respectively.
#' See \code{\link[survival]{Surv}} for the format specification.
#' Default for both binary classification and survival is \dQuote{no} (do nothing).
#' @param functionals.as [\code{character(1)}]\cr
#' How to represents functional features?
#' Option \dQuote{matrix}: Keep them as matrix columns in the data.frame.
#' Option \dQuote{dfcols}: Convert them to individual numeric data.frame columns.
#' Default is \dQuote{dfcols}.
#' @return Either a data.frame or a list with data.frame \code{data} and vector \code{target}.
#' @family task
#' @export
#' @examples
#' library("mlbench")
#' data(BreastCancer)
#'
#' df = BreastCancer
#' df$Id = NULL
#' task = makeClassifTask(id = "BreastCancer", data = df, target = "Class", positive = "malignant")
#' head(getTaskData)
#' head(getTaskData(task, features = c("Cell.size", "Cell.shape"), recode.target = "-1+1"))
#' head(getTaskData(task, subset = 1:100, recode.target = "01"))
getTaskData = function(task, subset = NULL, features, target.extra = FALSE, recode.target = "no",
functionals.as = "dfcols") {
checkTask(task, "Task")
checkTaskSubset(subset, size = task$task.desc$size)
assertLogical(target.extra)
assertChoice(functionals.as, choices = c("matrix", "dfcols"))
task.features = getTaskFeatureNames(task)
# if supplied check if the input is right and always convert 'features'
# to character vec
if (!missing(features)) {
assert(
checkIntegerish(features, lower = 1L, upper = length(task.features)),
checkLogical(features), checkCharacter(features)
)
if (!is.character(features))
features = task.features[features]
}
tn = task$task.desc$target
indexHelper = function(df, i, j, drop = TRUE, functionals.as) {
df = switch(2L * is.null(i) + is.null(j) + 1L,
df[i, j, drop = drop],
df[i, , drop = drop],
df[, j, drop = drop],
df
)
# If we don't keep functionals and functionals are present, convert to numerics
if (functionals.as == "dfcols" && hasFunctionalFeatures(task)) {
df = functionalToNormalData(df)
}
return(df)
}
if (target.extra) {
if (missing(features))
features = task.features
res = list(
data = indexHelper(task$env$data, subset, setdiff(features, tn), drop = FALSE, functionals.as),
# in the next line we should not rtouch functionals anyway (just Y), so let us keep them as matrix
target = recodeY(indexHelper(task$env$data, subset, tn, functionals.as = "matrix"), type = recode.target, task$task.desc)
)
} else {
if (missing(features) || identical(features, task.features))
features = NULL
else
features = union(features, tn)
res = indexHelper(task$env$data, subset, features, drop = FALSE, functionals.as)
if (recode.target %nin% c("no", "surv")) {
res[, tn] = recodeY(res[, tn], type = recode.target, task$task.desc)
}
# first condition checks if 'getTaskData' was called directly, i.e. checks if
# the call was from the GlobalEnv
# second condition checks which function called 'getTaskData' if call was not
# from the GlobalEnv. If cond2 is FALSE, 'getTaskData' was called from
# 'subsetTask' in a nested resampling call. In this case we remove x and y
# later (later = we arrive in 'getTaskData' twice in a 'resample' call) as
# we still need it for partitioning in upcoming function calls and only need
# to remove `x` and `y` before we proceed to the training step.
if (!identical(parent.frame(n = 1), globalenv()) &&
!sys.call(-2) == "subsetTask(.task, .subset)" &&
task$task.desc$is.spatial == TRUE) {
res$x = NULL
res$y = NULL
}
}
res
}
recodeY = function(y, type, td) {
if (type == "no")
return(y)
if (type == "drop.levels")
return(factor(y))
if (type == "01")
return(as.numeric(y == td$positive))
if (type == "-1+1")
return(as.numeric(2L * (y == td$positive) - 1L))
if (type == "surv")
return(Surv(y[, 1L], y[, 2L], type = "right"))
if (type == "multilabel.factor")
return(lapply(y, function(x) factor(x, levels = c("TRUE", "FALSE"))))
stopf("Unknown value for 'type': %s", type)
}
#' @title Extract costs in task.
#'
#' @description
#' Returns \dQuote{NULL} if the task is not of type \dQuote{costsens}.
#'
#' @param task [\code{\link{Task}}]\cr
#' The task.
#' @template arg_subset
#' @return [\code{matrix} | \code{NULL}].
#' @family task
#' @export
getTaskCosts = function(task, subset = NULL) {
UseMethod("getTaskCosts")
}
#' @export
getTaskCosts.Task = function(task, subset = NULL) {
NULL
}
#' @export
getTaskCosts.CostSensTask = function(task, subset = NULL) {
subset = checkTaskSubset(subset, size = getTaskDesc(task)$size)
getTaskDesc(task)$costs[subset, , drop = FALSE]
}
#' @title Subset data in task.
#'
#' @description See title.
#' @template arg_task
#' @template arg_subset
#' @template arg_features
#' @return [\code{\link{Task}}]. Task with subsetted data.
#' @family task
#' @export
#' @examples
#' task = makeClassifTask(data = iris, target = "Species")
#' subsetTask(task, subset = 1:100)
subsetTask = function(task, subset = NULL, features) {
# FIXME: we recompute the taskdesc for each subsetting. do we want that? speed?
# FIXME: maybe we want this independent of changeData?
# Keep functionals here as they are (matrix)
task = changeData(task, getTaskData(task, subset, features, functionals.as = "matrix"), getTaskCosts(task, subset), task$weights)
if (!is.null(subset)) {
if (task$task.desc$has.blocking)
task$blocking = task$blocking[subset]
if (task$task.desc$has.weights)
task$weights = task$weights[subset]
}
return(task)
}
# we create a new env, so the reference is not changed
#' Change Task Data
#'
#' Mainly for internal use. Changes the data associated with a task, without modifying other task properties.
#'
#' @template arg_task
#' @param data [\code{data.frame}]\cr
#' The new data to associate with the task. The names and types of the feature columns must match with the old data.
#' @param costs [\code{data.frame}\cr
#' Optional: cost matrix.
#' @param weights [\code{numeric}]\cr
#' Optional: weight vector.
#' @keywords internal
#' @export
changeData = function(task, data, costs, weights) {
if (missing(data))
data = getTaskData(task)
if (missing(costs))
costs = getTaskCosts(task)
if (missing(weights))
weights = task$weights
task$env = new.env(parent = emptyenv())
task$env$data = data
task["weights"] = list(weights) # so also 'NULL' gets set
td = task$task.desc
# FIXME: this is bad style but I see no other way right now
task$task.desc = switch(td$type,
"classif" = makeClassifTaskDesc(td$id, data, td$target, task$weights, task$blocking, td$positive, td$is.spatial),
"regr" = makeRegrTaskDesc(td$id, data, td$target, task$weights, task$blocking, td$is.spatial),
"cluster" = makeClusterTaskDesc(td$id, data, task$weights, task$blocking, td$is.spatial),
"surv" = makeSurvTaskDesc(td$id, data, td$target, task$weights, task$blocking, td$is.spatial),
"costsens" = makeCostSensTaskDesc(td$id, data, td$target, task$blocking, costs, td$is.spatial),
"multilabel" = makeMultilabelTaskDesc(td$id, data, td$target, task$weights, task$blocking, td$is.spatial)
)
return(task)
}
# returns factor levels of all factors in a task a named list of char vecs
# non chars do not occur in the output
getTaskFactorLevels = function(task) {
cols = vlapply(task$env$data, is.factor)
lapply(task$env$data[cols], levels)
}
getTaskWeights = function(task) {
task$weights
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.