Nothing
## ----results = "asis", echo = FALSE-------------------------------------------
# output format should be of the form
#> output
#> output
knitr::opts_chunk$set(collapse = TRUE, comment = "#>")
# initialize: load library, make everything deterministic
library("mlrCPO")
set.seed(123)
# get the path of the parent document
# path = names(knitr::opts_knit$get("encoding"))[1]
base = knitr::opts_knit$get("output.dir")
file = sys.frame(min(grep("^knitr::knit$|^knit$", sapply(sys.calls(), function(x) as.character(x)[1]))))$input
file = basename(file)
path = file.path(base, file)
rpath = gsub("\\.[^.]*$", ".R", path)
# strip whitespace from lines in tangle (R file) output for lintr
knitr::knit_hooks$set(document = function(x) {
if (file_test("-f", rpath)) {
lines = readLines(rpath)
lines = gsub(" *(\n|$)", "\\1", lines)
cat(lines, file = rpath, sep = "\n", append = FALSE)
}
x
})
#############################
# do the trans-vignette ToC #
#############################
fullfile = file
allfiles = list.files(path = base, pattern = ".*\\.Rmd$")
stopifnot(file %in% allfiles)
# collect information (title, url, main / compact) for each file in vignette dir
fileinfolist = list()
for (cf in allfiles) {
ismain = TRUE
if (grepl("^z_", cf)) {
infoslot = gsub("^z_", "", cf)
infoslot = gsub("_terse\\.Rmd$", "", infoslot)
subslot = "compact"
} else {
infoslot = gsub("^a_", "", cf)
infoslot = gsub("\\.Rmd$", "", infoslot)
subslot = "main"
}
content = scan(paste(base, cf, sep = "/"), what = "character", quiet = TRUE)
pos = min(c(which(content == "title:"), Inf))
if (is.infinite(pos)) {
stop(sprintf("parsing error: %s", cf))
}
infolist = list(title = content[pos + 1], url = cf, iscurrent = cf == file)
applist = list(infolist)
names(applist) = subslot
fileinfolist[[infoslot]] = c(fileinfolist[[infoslot]], applist)
}
# helper function that creates a link for all files except the current one
linkify = function(info, title) {
if (info$iscurrent) {
title
} else {
sprintf("[%s](%s)", title, gsub("\\.Rmd$", ".html", info$url))
}
}
# output ToC
for (idx in seq_along(fileinfolist)) {
content = fileinfolist[[sort(names(fileinfolist))[idx]]]
if (!is.null(content$compact)) {
if (paste(sub("[0-9]\\. ", "", content$main$title), "(No Output)") != sub("^z ", "", content$compact$title)) {
stop(sprintf("File %s and its compact version %s have incompatible titles\nThe compact version must be paste(main_title, \"(No Output)\"). Is: '%s', expected: '%s'",
content$main$url, content$compact$url, content$compact$title, paste(content$main$title, "(No Output)")))
}
line = sprintf("%s (%s)", linkify(content$main, content$main$title), linkify(content$compact, "compact version"))
} else {
line = linkify(content$main, content$main$title)
}
cat(sprintf("%s. %s\n", idx, line))
if (content$main$iscurrent || content$compact$iscurrent) {
fullfile = content$main$url
}
}
fullpath = file.path(base, fullfile)
#############################
# Optional Document TOC #
#############################
# print everything up to level `print.level`.
# level is the number of '#' prefixes. The lowest level is usually 2.
printToc = function(print.level = 3) {
owncontent = readLines(fullpath)
tripletic = grepl("^```", owncontent)
owncontent = owncontent[cumsum(tripletic) %% 2 == 0] # exclude ```-delimited code
headlines = grep("^#+ +", owncontent, value = TRUE)
headlevels = nchar(gsub(" .*", "", headlines))
headlines = gsub("^[#]+ +", "", headlines)
links = gsub("[^-a-z. ]", "", tolower(headlines))
links = gsub(" +", "-", links)
links = gsub("-$", "", links)
if (!sum(headlevels <= print.level)) {
return(invisible(NULL))
}
cat("<h", headlevels[1], ">Table of Contents</h", headlevels[1], ">\n<div id=\"TOC\">\n", sep = "")
lastlevel = headlevels[1] - 1
for (idx in seq_along(headlines)) {
line = headlines[idx]
level = headlevels[idx]
link = links[idx]
if (level > print.level) {
next
}
if (level < headlevels[1]) {
stop("First headline level must be the lowest one used, but '", line, "' is lower.")
}
lvldiff = level - lastlevel
if (lvldiff > 1) {
stop("Cannot jump headline levels. Error on: ", line)
}
if (lvldiff > 0) {
# higher level -> open a <ul>
cat("<ul>")
} else {
cat("</li>\n")
}
if (lvldiff < 0) {
# lower level -> close a few <ul>
for (l in seq_len(-lvldiff)) {
cat("</ul></li>")
}
}
cat("<li><a href=\"#", link, "\">", line, "</a>", sep = "")
lastlevel = level
}
# if the last level is greater than the first level, close a few <ul>
lvldiff = lastlevel - headlevels[1]
cat("</li></ul>\n</div>\n")
}
#############################
# Some output settings #
#############################
options(width = 80)
replaceprint = function(ofunc) {
force(ofunc)
function(x, ...) {
cu = capture.output({ret = ofunc(x, ...)})
cu = grep("time: [-+e0-9.]{1,6}", cu, value = TRUE, invert = TRUE)
cat(paste(cu, collapse = "\n"))
if (!grepl("\n$", tail(cu, 1))) {
cat("\n")
}
ret
}
}
for (pfunc in grep("print\\.", ls(asNamespace("mlr")), value = TRUE)) {
ofunc = get(pfunc, asNamespace("mlr"))
assign(pfunc, replaceprint(ofunc))
}
## ----eval = TRUE, echo = FALSE, results = 'asis'------------------------------
printToc(4)
## -----------------------------------------------------------------------------
!cpoPca()
## -----------------------------------------------------------------------------
xmpSample = makeCPORetrafoless("exsample", # nolint
pSS(fraction: numeric[0, 1]),
dataformat = "df.all",
cpo.trafo = function(data, target, fraction) {
newsize = round(nrow(data) * fraction)
row.indices = sample(nrow(data), newsize)
data[row.indices, ]
})
cpo = xmpSample(0.01)
## -----------------------------------------------------------------------------
iris %>>% cpo
## -----------------------------------------------------------------------------
xmpSampleHeadless = makeCPORetrafoless("exsample", # nolint
pSS(fraction: numeric[0, 1]),
dataformat = "df.all",
cpo.trafo = {
newsize = round(nrow(data) * fraction)
row.indices = sample(nrow(data), newsize)
data[row.indices, ]
})
## -----------------------------------------------------------------------------
xmpFilterVar = makeCPO("exemplvar", # nolint
pSS(n.col: integer[0, ]),
dataformat = "numeric",
cpo.train = function(data, target, n.col) {
cat("*** cpo.train ***\n")
sapply(data, var, na.rm = TRUE)
},
cpo.retrafo = function(data, control, n.col) {
cat("*** cpo.retrafo ***\n")
cat("Control:\n")
print(control)
cat("\n")
greatest = order(-control) # columns, ordered greatest to smallest var
data[greatest[seq_len(n.col)]]
})
cpo = xmpFilterVar(2)
## -----------------------------------------------------------------------------
(trafd = head(iris) %>>% cpo)
## -----------------------------------------------------------------------------
head(iris %>>% cpo)
## -----------------------------------------------------------------------------
head(iris %>>% retrafo(trafd))
## -----------------------------------------------------------------------------
getCPOTrainedState(retrafo(trafd))
## -----------------------------------------------------------------------------
xmpFilterVarFunc = makeCPO("exemplvar.func", # nolint
pSS(n.col: integer[0, ]),
dataformat = "numeric",
cpo.retrafo = NULL,
cpo.train = function(data, target, n.col) {
cat("*** cpo.train ***\n")
ctrl = sapply(data, var, na.rm = TRUE)
function(x) { # the data is given to the only present parameter: 'x'
cat("*** cpo.retrafo ***\n")
cat("Control:\n")
print(ctrl)
cat("\ndata:\n")
print(data) # 'data' is deleted: NULL
cat("target:\n")
print(target) # 'target' is deleted: NULL
greatest = order(-ctrl) # columns, ordered greatest to smallest var
x[greatest[seq_len(n.col)]]
}
})
cpo = xmpFilterVarFunc(2)
## -----------------------------------------------------------------------------
(trafd = head(iris) %>>% cpo)
## -----------------------------------------------------------------------------
getCPOTrainedState(retrafo(trafd))
## -----------------------------------------------------------------------------
xmpAsNum = makeCPO("asnum", # nolint
cpo.train = NULL,
cpo.retrafo = function(data) {
data.frame(lapply(data, as.numeric))
})
cpo = xmpAsNum()
## -----------------------------------------------------------------------------
(trafd = head(iris) %>>% cpo)
## -----------------------------------------------------------------------------
getCPOTrainedState(retrafo(trafd))
## -----------------------------------------------------------------------------
xmpPca = makeCPOExtendedTrafo("simple.pca", # nolint
pSS(n.col: integer[0, ]),
dataformat = "numeric",
cpo.trafo = function(data, target, n.col) {
cat("*** cpo.trafo ***\n")
pcr = prcomp(as.matrix(data), center = FALSE, scale. = FALSE, rank = n.col)
# save the rotation matrix as 'control' variable
control = pcr$rotation
pcr$x
},
cpo.retrafo = function(data, control, n.col) {
cat("*** cpo.retrafo ***\n")
# rotate the data by the rotation matrix
as.matrix(data) %*% control
})
cpo = xmpPca(2)
## -----------------------------------------------------------------------------
(trafd = head(iris) %>>% cpo)
## -----------------------------------------------------------------------------
tail(iris) %>>% retrafo(trafd)
## -----------------------------------------------------------------------------
getCPOTrainedState(retrafo(trafd))
## -----------------------------------------------------------------------------
xmpPcaFunc = makeCPOExtendedTrafo("simple.pca.func", # nolint
pSS(n.col: integer[0, ]),
dataformat = "numeric",
cpo.retrafo = NULL,
cpo.trafo = function(data, target, n.col) {
cat("*** cpo.trafo ***\n")
pcr = prcomp(as.matrix(data), center = FALSE, scale. = FALSE, rank = n.col)
# save the rotation matrix as 'control' variable
cpo.retrafo = function(data) {
cat("*** cpo.retrafo ***\n")
# rotate the data by the rotation matrix
as.matrix(data) %*% pcr$rotation
}
pcr$x
})
cpo = xmpPcaFunc(2)
## -----------------------------------------------------------------------------
(trafd = head(iris) %>>% cpo)
## -----------------------------------------------------------------------------
getCPOTrainedState(retrafo(trafd))$pcr$x
## ----eval = FALSE-------------------------------------------------------------
# c(response = "response", se = "prob")
## -----------------------------------------------------------------------------
xmpMetaLearn = makeCPOTargetOp("xmp.meta", # nolint
pSS(lrn: untyped),
dataformat = "task",
properties.target = c("classif", "twoclass"),
predict.type.map = c(response = "response", prob = "prob"),
cpo.train = function(data, target, lrn) {
cat("*** cpo.train ***\n")
lrn = setPredictType(lrn, "prob")
train(lrn, data)
},
cpo.retrafo = function(data, target, control, lrn) {
cat("*** cpo.retrafo ***\n")
prediction = predict(control, target)
tname = getTaskTargetNames(target)
tdata = getTaskData(target)
tdata[[tname]] = factor(prediction$data$response == prediction$data$truth)
makeClassifTask(getTaskId(target), tdata, tname, positive = "TRUE",
fixup.data = "no", check.data = FALSE)
},
cpo.train.invert = function(data, control, lrn) {
cat("*** cpo.train.invert ***\n")
predict(control, newdata = data)$data
},
cpo.invert = function(target, control.invert, predict.type, lrn) {
cat("*** cpo.invert ***\n")
if (predict.type == "prob") {
outmat = as.matrix(control.invert[grep("^prob\\.", names(control.invert))])
revmat = outmat[, c(2, 1)]
outmat * target[, "prob.TRUE", drop = TRUE] +
revmat * target[, "prob.FALSE", drop = TRUE]
} else {
stopifnot(levels(target) == c("FALSE", "TRUE"))
numeric.prediction = as.numeric(control.invert$response)
numeric.res = ifelse(target == "TRUE",
numeric.prediction,
3 - numeric.prediction)
factor(levels(control.invert$response)[numeric.res],
levels(control.invert$response))
}
})
cpo = xmpMetaLearn(makeLearner("classif.logreg"))
## -----------------------------------------------------------------------------
set.seed(12)
split = makeResampleInstance(hout, pid.task)
train.task = subsetTask(pid.task, split$train.inds[[1]])
test.task = subsetTask(pid.task, split$predict.inds[[1]])
## -----------------------------------------------------------------------------
trafd = train.task %>>% cpo
attributes(trafd)
## -----------------------------------------------------------------------------
head(getTaskData(trafd))
## -----------------------------------------------------------------------------
model = train(makeLearner("classif.logreg", predict.type = "prob"), train.task)
head(predict(model, train.task)$data[c("truth", "response")])
## -----------------------------------------------------------------------------
retr = test.task %>>% retrafo(trafd)
attributes(retr)
## -----------------------------------------------------------------------------
retr.df = getTaskData(test.task, target.extra = TRUE)$data %>>% retrafo(trafd)
names(attributes(retr.df))
## -----------------------------------------------------------------------------
ext.model = train("classif.svm", trafd)
ext.pred = predict(ext.model, retr)
newpred = invert(inverter(retr), ext.pred)
performance(newpred)
## -----------------------------------------------------------------------------
cpo.learner = cpo %>>% makeLearner("classif.svm")
cpo.model = train(cpo.learner, train.task)
## -----------------------------------------------------------------------------
lrnpred = predict(cpo.model, test.task)
performance(lrnpred)
## -----------------------------------------------------------------------------
xmpMetaLearn = makeCPOTargetOp("xmp.meta.fnc", # nolint
pSS(lrn: untyped),
dataformat = "task",
properties.target = c("classif", "twoclass"),
predict.type.map = c(response = "response", prob = "prob"),
# set the cpo.* parameters not needed to NULL:
cpo.retrafo = NULL, cpo.train.invert = NULL, cpo.invert = NULL,
cpo.train = function(data, target, lrn) {
cat("*** cpo.train ***\n")
lrn = setPredictType(lrn, "prob")
model = train(lrn, data)
cpo.retrafo = function(data, target) {
cat("*** cpo.retrafo ***\n")
prediction = predict(model, target)
tname = getTaskTargetNames(target)
tdata = getTaskData(target)
tdata[[tname]] = factor(prediction$data$response == prediction$data$truth)
makeClassifTask(getTaskId(target), tdata, tname, positive = "TRUE",
fixup.data = "no", check.data = FALSE)
}
cpo.train.invert = function(data) {
cat("*** cpo.train.invert ***\n")
prediction = predict(model, newdata = data)$data
function(target, predict.type) { # this is returned as cpo.invert
cat("*** cpo.invert ***\n")
if (predict.type == "prob") {
outmat = as.matrix(prediction[grep("^prob\\.", names(prediction))])
revmat = outmat[, c(2, 1)]
outmat * target[, "prob.TRUE", drop = TRUE] +
revmat * target[, "prob.FALSE", drop = TRUE]
} else {
stopifnot(levels(target) == c("FALSE", "TRUE"))
numeric.prediction = as.numeric(prediction$response)
numeric.res = ifelse(target == "TRUE",
numeric.prediction,
3 - numeric.prediction)
factor(levels(prediction$response)[numeric.res],
levels(prediction$response))
}
}
}
})
## -----------------------------------------------------------------------------
xmpRegCenter = makeCPOTargetOp("xmp.center", # nolint
constant.invert = TRUE,
cpo.train.invert = NULL, # necessary for constant.invert = TRUE
dataformat = "df.feature",
properties.target = "regr",
cpo.train = function(data, target) {
# control value is just the mean of the target column
mean(target[[1]])
},
cpo.retrafo = function(data, target, control) {
# subtract mean from target column in retrafo
target[[1]] = target[[1]] - control
target
},
cpo.invert = function(target, predict.type, control.invert) {
target + control.invert
})
cpo = xmpRegCenter()
## -----------------------------------------------------------------------------
train.task = subsetTask(bh.task, 150:155)
getTaskTargets(train.task)
## -----------------------------------------------------------------------------
predict.task = subsetTask(bh.task, 156:160)
getTaskTargets(predict.task)
## -----------------------------------------------------------------------------
trafd = train.task %>>% cpo
getTaskTargets(trafd)
## -----------------------------------------------------------------------------
getTaskTargets(predict.task)
## -----------------------------------------------------------------------------
retr = retrafo(trafd)
predict.traf = predict.task %>>% retr
getTaskTargets(predict.traf)
## ----warnings = FALSE---------------------------------------------------------
model = train("regr.lm", trafd)
pred = predict(model, predict.traf)
pred
## -----------------------------------------------------------------------------
invert(inverter(predict.traf), pred)
## ----warnings = FALSE---------------------------------------------------------
model = train("regr.lm", train.task)
predict(model, predict.task)
## -----------------------------------------------------------------------------
getCPOTrainedCapability(retr)
## -----------------------------------------------------------------------------
invert(retr, pred)
## -----------------------------------------------------------------------------
xmpLogRegr = makeCPOTargetOp("log.regr", # nolint
constant.invert = TRUE,
properties.target = "regr",
cpo.train = NULL, cpo.train.invert = NULL,
cpo.retrafo = function(data, target) {
target[[1]] = log(target[[1]])
target
},
cpo.invert = function(target, predict.type) {
exp(target)
})
cpo = xmpLogRegr()
## -----------------------------------------------------------------------------
trafd = train.task %>>% cpo
getTaskTargets(trafd)
## -----------------------------------------------------------------------------
retr = retrafo(trafd)
predict.traf = predict.task %>>% retr
getTaskTargets(predict.traf)
## ----warnings = FALSE---------------------------------------------------------
model = train("regr.lm", trafd)
pred = predict(model, predict.traf)
pred
## -----------------------------------------------------------------------------
invert(inverter(predict.traf), pred)
## -----------------------------------------------------------------------------
invert(retr, pred)
## -----------------------------------------------------------------------------
xmpSynCPO = makeCPOExtendedTargetOp("syn.cpo", # nolint
properties.target = "regr",
cpo.trafo = function(data, target) {
cat("*** cpo.trafo ***\n")
target[[1]] = target[[1]] + 1
control = "control created in cpo.trafo"
control.invert = "control.invert created in cpo.trafo"
target
},
cpo.retrafo = function(data, target, control) {
cat("*** cpo.retrafo ***", "control is:", deparse(control), sep = "\n")
control.invert = "control.invert created in cpo.retrafo"
if (!is.null(target)) {
cat("target is non-NULL, performing transformation\n")
target[[1]] = target[[1]] - 1
return(target)
} else {
cat("target is NULL, no transformation (but control.invert was created)\n")
return(NULL) # is ignored.
}
},
cpo.invert = function(target, control.invert, predict.type) {
cat("*** invert ***", "control.invert is:", deparse(control.invert),
sep = "\n")
target
})
cpo = xmpSynCPO()
## -----------------------------------------------------------------------------
trafd = train.task %>>% cpo
getTaskTargets(trafd)
## -----------------------------------------------------------------------------
retrafd = train.task %>>% retrafo(trafd)
## -----------------------------------------------------------------------------
getTaskTargets(retrafd)
## -----------------------------------------------------------------------------
retrafd = getTaskData(train.task, target.extra = TRUE)$data %>>% retrafo(trafd)
## -----------------------------------------------------------------------------
inv = invert(inverter(trafd), 1:6)
## -----------------------------------------------------------------------------
inv = invert(inverter(retrafd), 1:6)
## ----echo = FALSE-------------------------------------------------------------
oscipen = options("scipen")
options(scipen = 10)
## -----------------------------------------------------------------------------
learners = list(
logreg = makeLearner("classif.logreg"),
svm = makeLearner("classif.svm"),
cpo = xmpMetaLearn(makeLearner("classif.logreg")) %>>%
makeLearner("classif.svm")
)
# suppress output of '*** cpo.train ***' etc.
configureMlr(show.info = FALSE, show.learner.output = FALSE)
perfs = sapply(learners, function(lrn) {
unname(replicate(20, resample(lrn, pid.task, cv10)$aggr))
})
# reset mlr settings
configureMlr()
boxplot(perfs)
## -----------------------------------------------------------------------------
pvals = c(
logreg = t.test(perfs[, "logreg"], perfs[, "cpo"], "greater")$p.value,
svm = t.test(perfs[, "svm"], perfs[, "cpo"], "greater")$p.value
)
round(p.adjust(pvals), 3)
## ----echo = FALSE-------------------------------------------------------------
options(scipen = oscipen$scipen)
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.