attic/experiments.R

library("paradox")
library("mlr3")

options(error=recover)
options(error=dump.frames)
data.table::setDTthreads(0)
data.table::setDTthreads(1)
Sys.setenv(NOT_CRAN = "true")

devtools::document("mlr3pipelines")

# devtools::load_all("paradox")

devtools::load_all("mlr3pipelines")

Sys.setenv(TESTTHAT_CPUS = 20)
testthat::test_package("mlr3pipelines")


tools::buildVignettes(dir = "mlr3pipelines")


system.time(testthat::test_package("mlr3pipelines", filter = "pipeop_impute"), gcFirst = FALSE)
devtools::run_examples("mlr3pipelines")

profvis::profvis(testthat::test_package("mlr3pipelines"))


testthat::test_package("mlr3pipelines", filter = "textvectorizer")


testthat::test_package("mlr3pipelines", filter = "Graph")


ntree <- 10
mtry <- 2

gr <- po("replicate", reps = ntree) %>>%
  po("select", selector = function(task) sample(task$feature_names, mtry)) %>>%
  po("subsample") %>>% lrn("classif.rpart") %>>%
  po("classifavg", collect = TRUE)

gr <- po("ovrsplit") %>>% lrn("classif.rpart") %>>% po("ovrunite")
gr <- po("ovrsplit") %>>%
  po("replicate", reps = 3) %>>%
  po("subsample") %>>% lrn("classif.rpart") %>>%
  po("classifavg", collect = TRUE) %>>%
  po("ovrunite")

gr <- po("replicate", reps = 3) %>>%
  po("ovrsplit") %>>%
  po("subsample") %>>% lrn("classif.rpart") %>>%
  po("ovrunite") %>>%
  po("classifavg", collect = TRUE)

library("mlr3learners")

gr <- list(
    po("nop"),
    po("learner_cv", lrn("regr.lm")),
    po("replicate", reps = 3) %>>% po("learner_cv", lrn("regr.ranger"))
  ) %>>%
  po("featureunion") %>>% lrn("regr.rpart") %>>% po("regravg", collect = TRUE)



testthat::test_package("mlr3pipelines", filter = "scale")

testthat::test_package("mlr3pipelines", filter = "removeconstants")
testthat::test_package("mlr3pipelines", filter = "conversion")

devtools::test("mlr3pipelines", stop_on_warning = TRUE, filter = "imputelearner")

devtools::test("mlr3pipelines", stop_on_warning = TRUE, filter = "impute")


testthat::test_package("mlr3pipelines", filter = "renamecolumns")


testthat::test_package("mlr3pipelines", filter = "^_[a-d].*")

testthat::test_package("mlr3pipelines", filter = "^_[a-m].*")
testthat::test_package("mlr3pipelines", filter = "^_[n-s].*")
testthat::test_package("mlr3pipelines", filter = "^_[^a-s].*")


pofi = PipeOpFixFactors$new()


poe = PipeOpEncodeImpact$new()

t2 = po("histbin")$train(list(tsk("iris")))[[1]]

poe$get_state(tsk("boston_housing"))
poe$get_state(t2)
poe$param_set$values$impute_zero = FALSE


poe$train(list(tsk("boston_housing")))[[1]]$data()

poe$predict(list(tsk("boston_housing")$clone()$filter(1)))[[1]]$data()

poe$train(list(tsk("boston_housing")))[[1]]$data()
poe$train(list(t2))[[1]]$data()
poe$predict(list(t2$clone()$filter(1)))[[1]]$data()

poe$param_set$values$smoothing = 1e10

pom = PipeOpMutate$new()

pom$train(list(task))

pom$param_set$values$delete_originals = TRUE

pom$train(list(task))

pom$param_set$values$delete_originals = FALSE

pom$param_set$values$mutation = alist(x = Sepal.Width + Petal.Length)

pom$train(list(task))[[1]]$data()

pom$param_set$values$delete_originals = TRUE

pom$train(list(task))[[1]]$data()

pom$param_set$values$delete_originals = FALSE

pom$param_set$values$mutation = alist(Sepal.Width = Sepal.Width + Petal.Length)

pom$train(list(task))[[1]]$data()

g = PipeOpScale$new() %>>% PipeOpApply$new()
g2 = g$clone()
g2$train(task)


g$model = g2$model

g$param_set$values$apply.applicator = as.factor

task = mlr_tasks$get("iris")
str(g$train(task)[[1]]$data())

str(task$data())

g = PipeOpScale$new() %>>% PipeOpSelect$new()


g$train(task)[[1]]$data()
g$param_set$values$select.invert = TRUE
g$train(task)[[1]]$data()
g$param_set$values$select.invert = FALSE
g$param_set$values$select.selector = selector_grep("^Petal\\.")
g$train(task)[[1]]$data()
g$param_set$values$select.invert = TRUE
g$train(task)[[1]]$data()


g = PipeOpScale$new() %>>% PipeOpPCA$new()

profvis::profvis(Reduce(`%>>%`, lapply(c(letters, paste0(letters, "1"), paste0(letters, "2")), PipeOpScale$new)))

  paste0(letters, as.character(0:9))

system.time(for (i in seq_len(100)) {gr <- PipeOpScale$new() %>>% PipeOpPCA$new() }) / 100
system.time(for (i in seq_len(100)) {gr <- PipeOpScale$new() %>>% PipeOpPCA$new() %>>% PipeOpScale$new("scale2") %>>% PipeOpPCA$new("pca2")}) / 100

system.time(for (i in seq_len(100)) {g1 <- ensure_graph(PipeOpScale$new())}) / 100
system.time(for (i in seq_len(100)) {g1 <- ensure_graph(PipeOpScale$new()) ; g2 <- ensure_graph(PipeOpPCA$new())}) / 100
system.time(for (i in seq_len(100)) {g1 <- ensure_graph(PipeOpScale$new()) ; g2 <- ensure_graph(PipeOpPCA$new()) ; gu <- gunion(list(g1, g2)) ; g1out <- g1$output}) / 100

system.time(for (i in seq_len(100)) {g1 <- ensure_graph(PipeOpScale$new()) ; g2 <- ensure_graph(PipeOpPCA$new()) ; gu <- gunion(list(g1, g2)) ; g1out <- g1$output}) / 100



system.time(for (i in seq_len(100)) {g1 <- ensure_graph(PipeOpScale$new()) ; g2 <- ensure_graph(PipeOpPCA$new()) ; gu <- gunion(list(g1, g2)) ; g1out <- g1$output ; g2in <- g2$input ;   newedges <- cbind(g1out[, list(src_id = op.id, src_channel = channel.name)],g2in[, list(dst_id = op.id, dst_channel = channel.name)]) ; do.call(gu$add_edge, transpose(newedges)[[1]])}) / 100


g1 <- ensure_graph(PipeOpScale$new())
g1$input
g1$output

g1 <- PipeOpSelect$new(param_vals = list(selector = selector_type("numeric"))) %>>% PipeOpCopy$new(2) %>>% gunion(list(PipeOpScale$new(), PipeOpPCA$new())) %>>% PipeOpFeatureUnion$new(2) %>>% PipeOpLearner$new(mlr_learners$get("regr.rpart"))

g1$train(mlr_tasks$get("boston_housing"))
predict(g1, mlr_tasks$get("boston_housing")$data()[, -"medv"])
predict(g1, mlr_tasks$get("boston_housing"))

(PipeOpSelect$new(param_vals = list(selector = selector_type("numeric"))) %>>% PipeOpCopy$new(2))$train(mlr_tasks$get("boston_housing"))

system.time(for (i in seq_len(1000)) {g1$input}) / 1000
system.time(for (i in seq_len(1000)) {g1$output}) / 1000

Graph$active$input
Graph$active$output

g1$input
g1$output



g

gr$add_pipeop(PipeOpUnbranch$new(2))

gr$input
gr$output
gr$lhs
gr$rhs

gr$edges


gr$param_vals$scale.scale = FALSE

gr$add_edge("scale", "1", "pca", "1")
gr$add_edge("scale", "1", "pca", "1")




scidf = cbind(scale(iris[1:4]), iris[5])
scalediris = TaskClassif$new("scalediris", as_data_backend(scidf), "Species")
expect_equal(graphpred, lrn$train(scalediris)$predict(scalediris))
expect_equal(graphpred2, lrn$train(scalediris)$predict(scalediris))

dblrn = mlr_learners$get("classif.debug")






MeasureClassifACC$new()$calculate(Experiment$new(task, lrn)$train()$predict())
MeasureClassifACC$new()$calculate(Experiment$new(task, lrn)$train()$predict())

MeasureClassifACC$new()$aggregate
C
# -----------

devtools::load_all("mlr3")
testthat::test_package("mlr3")


# ---------------------- multiplex

task = mlr_tasks$get("iris")
opchoice = PipeOpChoice$new(3)
opchoicenamed = PipeOpChoice$new(c("opscale", "oppca", "opnop"))
opscale = PipeOpScale$new()
oppca = PipeOpPCA$new()
opnop = PipeOpNOP$new()
opunchoice = PipeOpUnchoice$new(3)

graph1 = opchoice %>>% gunion(opscale, oppca, opnop) %>>% opunchoice
graph2 = opchoicenamed %>>% gunion(opscale, oppca, opnop) %>>% opunchoice

graph1$param_vals$choice.selection = 1
assert(all.equal(graph1$train(task), opscale$train(list(task))[[1]]))

graph1$param_vals$choice.selection = 2
assert(all.equal(graph1$train(task), oppca$train(list(task))[[1]]))
assert(all.equal(graph1$predict(task), oppca$predict(list(task))[[1]]))

graph2$param_vals$choice.selection = "opscale"
assert(all.equal(graph2$train(task), opscale$train(list(task))[[1]]))

graph2$param_vals$choice.selection = "oppca"
assert(all.equal(graph2$train(task), oppca$train(list(task))[[1]]))
assert(all.equal(graph2$predict(task), oppca$predict(list(task))[[1]]))

# ----------------------  Learner

task = mlr_tasks$get("iris")
lrn = mlr_learners$get("classif.rpart")
graph = PipeOpScale$new() %>>% PipeOpPCA$new() %>>% PipeOpLearner$new(lrn)
assert(is.null(graph$train(task)))
graph$predict(task)  # problem is in mlr3

# ---------------------- Complex graph structure

gr = BasicPOAny$new(1, 2, "beginning") %>>%
  greplicate(BasicPOAny$new(1, 2, "middle"), 2) %>>%
  gunion(BasicPOAny$new(1, 2, "left"), BasicPOAny$new(2, 2, "secondmiddle"), BasicPOAny$new(1, 2, "right")) %>>%
  greplicate(BasicPOAny$new(3, 2, "lower"), 2) %>>%
  BasicPOAny$new(4, 1, "end")

gr$plot()

assert(gr$train(0) == 8)
assert(gr$predict(0) == 8)


# --------------------- PipeOpLearnerCV







gr = Graph$new()
gr2 = Graph$new()
gr3 = Graph$new()




pipeop = BasicPO$new("testa")

gr2$add_node(BasicPOAny$new(1, 2, "testad"))
gr2$add_node(BasicPOAny$new(1, 2, "testbd"))
gr2$add_node(BasicPOAny$new(1, 2, "testcd"))
gr2[["testad"]]$next_node_channels[[1]] = gr2[["testbd"]]$in_channels[[1]]
gr2[["testcd"]]$next_node_channels[[2]] = gr2[["testbd"]]$in_channels[[1]]
gr2[["testcd"]]$next_node_channels[[1]] = gr2[["testad"]]$in_channels[[1]]
gr2$add_node(BasicPOAny$new(2, 1, "tested"))
gr2[["tested"]]$prev_node_channels[[1]] = gr2[["testbd"]]$out_channels[[2]]
gr2[["tested"]]$prev_node_channels[[2]] = gr2[["testbd"]]$out_channels[[1]]

gr2$out_channels
gr3 = gr2 %>>% BasicPOAny$new(3, 1, "testxx") %>>% BasicPOAnyNamed$new(1, 2, "testyy")
gr3$plot()

gr3 = gr2 %>>% BasicPOAny$new(3, 1, "testxx") %>>% BasicPOAny$new(1, 1, "testyy")


BasicPOAny$new(1, 2, "teest")$train(list(1))
BasicPOAny$new(2, 3, "teest2")$train(list(10, 20))


gr3[["testad"]]$pipeop$param_set

gr3$param_set
gr3$param_vals

gr3[["testad"]]$pipeop$param_vals$par = 1
gr3$param_vals
gr3$param_vals$testcd.par = 1
gr3$param_vals




gr3$train(1)

gr3$predict(2)



gr3[["testyy"]]$prev_node_channels['a'] = list(NULL)
gr3[["testxx"]]$next_node_channels[[1]] = gr3[["testyy"]]$in_channels$a

gr2[["testad"]]$graph
gr2[["testbd"]]$graph

bpo = BasicPO$new("testid")
bpo2 = BasicPO$new("testid2")
bpo3 = BasicPO$new("testid3")

(bpo %>>% bpo2 %>>% bpo3)$plot()

pon = PipeOpNOP$new()

pon$train(list("test"))

pon$predict(1)

graph = bpo %>>% bpo2
graph$plot()
graph2 = graph %>>% bpo3

graph$plot()
graph2$plot()

node = GraphNode$new(bpo, gr)
node2 = GraphNode$new(bpo2, gr)
node3 = GraphNode$new(bpo3, gr)




sort_nodes(gr$node_list)
gr$plot()

gr[["testid"]]$prev_node_channels[[1]] =gr[["testid2"]]$out_channels[[1]]


gr$out_channels
gr$in_channels


gr$plot()
gr2 = gr$clone(deep = TRUE)

gr2$plot()

(gr2[["testid"]], gr[["testid"]]


gr[["testid"]]$prev_node_channels[[1]] = gr[["testid2"]]$out_channels[[1]]
# gr[["testid"]]$next_node_edges[[1]] = gr[["testid2"]]$in_edges[[1]]

graph_plot(gr)

gr$node_list
gr$sorted_node_list

debugonce(sort_nodes)
attr(sort_nodes(gr$node_list, TRUE), "layer")

node$graph
node$pipeop
node$prev_nodes
node$next_nodes
node$in_edges
node$out_edges
node$intype
node$outtype
node$input_complete
node$output_complete

gr$node_list[[1]]$in_edges
gr$node_list[[1]]$intype
gr$node_list[[1]]$next_nodes
gr$sorted_node_list

gr$update_connections()
gr$intype
gr$outtype
gr$in_edges
gr$out_edges
gr$source_nodes
gr$sink_nodes

li = gr[["testid"]]$next_node_edges
li[[1]] = gr[["testid2"]]$in_edges[[1]]

el = gr[["testid"]]
el$next_node_edges = li


gr[["testid"]]$next_node_edges= li








gr[["testid"]]$next_node_edges[[1]]

gr[["testid"]]$next_nodes




gr$is_learnt
gr$param_set

gr




#------------------------------------




library("R6")

clx <- R6Class("test",
  public = list(
      test = "test",
      xyz = 111,
      initialize = function(param) {
        private$xy = list(a = param, b = "b")
      },
      get = function(xx) {
        private$xy = xx$private$xy
      }
  ),
  active = list(
      t2 = function(x) {
        if (missing(x)) {
          private$xy
        } else {
          print(x)
        }
      }
  ),
  private = list(
      xy = NULL
  )
)


cl <- clx$new(123)
cl2 <- clx$new(99)

cl2$get(cl)

cl$t2
cl2$t2


cl$t2$a <- 1000


clx2 <- R6Class("test2",
  inherit = clx,
  public = list(
      test2 = "t",
      xy = 9000,
      initialize = function(param) {
        super$initialize(param)
      }
  ),
  active = list(
      t2 = function() 112
  )
)

cl2 <- clx2$new(1222)

cl2$test
cl2$test2
cl2$xyz
cl2$xy
cl2$t2

# TODO: auto cycle
#

task = mlr_tasks$get("iris")
lrn_rp = mlr_learners$get("classif.rpart")
g = PipeOpPCA() %>>% PipeOpLearner(lrn_rp)

g1 = tune(g)
g1 >> g




)




dfa = data.frame( a = 1:5, b = 2:6)
dfb = data.frame( x = c("a", "b", "c", "x", "y"), y = rnorm(5))
dba = as_data_backend(dfa)
dbb = as_data_backend(dfb)
dbx = DataBackendCbind$new(dba, dbb, colnames(dfa), colnames(dfb))

dbx

dbx$data(cols = c("a", "x"), rows = c(3, 4))


dbi = as_data_backend(iris)

task_regr_iris = TaskRegr$new(id = "iris.regr", backend = dbi, target = "Sepal.Length")

task_regr_iris$filter(1:10)

task_regr_iris$data()

task_regr_iris$select(c("Petal.Length", "Species"))


task_regr_iris$data()


task_regr_iris_2 = TaskRegr$new(id = "iris.regr", backend = dbi, target = "Sepal.Length")

task_regr_iris_2$select(c("Petal.Length", "Species"))

task_regr_iris$data()
task_regr_iris_2$data()

task_regr_iris$rbind(task_regr_iris_2$data())

task_regr_iris$data()

mlr_tasks$get("iris")


lrn = mlr_learners$get("classif.rpart")

lrn$train(task)

lrn$data$model


lrn$predict(task)

pred

library("mlr3learners")


lrnsvm = mlr_learners$get("classif.svm")

lrnsvm$train(task)

lrnsvm$predict(task)


pred$score(MeasureClassifACC$new())

cv = mlr_resamplings$get("cv")

resres = resample(task, lrn, cv)

resres$aggregate(MeasureClassifACC$new())


resres = resample(task, lrnsvm , cv)
resres$aggregate(MeasureClassifACC$new())

design = data.table::data.table(
  task = mlr_tasks$mget(c("iris", "sonar")),
  learner = mlr_learners$mget(c("classif.featureless", "classif.rpart")),
  resampling = mlr_resamplings$mget(c("cv3", "holdout"))
)
design$resampling = Map(
  function(task, resampling) resampling$clone()$instantiate(task),
  task = design$task, resampling = design$resampling
)


design

bmr = benchmark(design)

bmr$aggregate()

names(bmr)


mlr_resamplings$get("holdout")
mlr_measures$get("classif.acc")
mlr_tasks$get("iris")
mlr_learners$get("classif.rpart")

mlr_control

rpart = LearnerClassifRpart$new()

rpart$param_set

rpart$param_set$values$cp = 0.4

rpart$param_set$values$cp = 1.1


lrn$param_set$values$cp = 0.2
resample(task, lrn, cv)$aggregate()

scaling = PipeOpScale$new()

transformed_data = scaling$train(list(task))[[1]]

plot(transformed_data$data()$Petal.Length)



scaling$state

task2 =task$clone(deep=TRUE)


task2$filter(1)

task2$data()

scaling$predict(list(task2))[[1]]$data()

scaling$train
scaling$predict
scaling$state
scaling$param_set$values$center = FALSE

transformed_data = scaling$train(list(task))[[1]]

transformed_data$data()

scaling$state





PipeOp


Graph




graph = PipeOpScale$new() %>>% PipeOpFilter$new(mlr3filters::FilterVariance$new(), param_vals = list(filter.frac = .5))

graph$pipeops

graph$edges

graph$plot()

task_out = graph$train(task)[[1]]
task_out$data()

graph$predict(task)[[1]]$data()

graph$plot()

graph$add_pipeop(PipeOpPCA$new())

graph$plot()

graph$ids()
graph$edges

graph$add_edge("variance", "pca")

graph$plot()

graph$edges


graph$add_pipeop(PipeOpCopy$new(3))

graph$plot()

graph$add_edge("copy", "scale", src_channel = "output2")

graph$plot()

graph$add_pipeop(PipeOpApply$new())

graph$plot()


graph$add_edge("copy", src_channel = "output1", "apply")

graph$plot()

graph$add_pipeop(PipeOpFeatureUnion$new(3))

graph$plot()

graph$add_edge("copy", "featureunion", src_channel = "output3", dst_channel = "input1")

graph$plot()


graph$add_edge("apply", "featureunion", dst_channel = "input2")

graph$add_edge("pca", "featureunion", dst_channel = "input3")

graph$param_set
graph$param_set$values$apply.applicator = as.numeric

graph$train(task)

g1 = Graph$new()$add_pipeop(PipeOpCopy$new(2))
g2 = Graph$new()$add_pipeop(PipeOpPCA$new())$add_pipeop(PipeOpScale$new())


g1$output

g2$input

gsum = g1 %>>% g2 %>>% PipeOpFeatureUnion$new(2)
gsum$plot()
gsum$train(task)[[1]]$data()

gsum$predict(task)[[1]]$data()


ghalf = g1 %>>% g2

str(ghalf$train(task))

gsum = PipeOpCopy$new(2) %>>%
  gunion(list(PipeOpPCA$new(), PipeOpScale$new())) %>>%
  PipeOpFeatureUnion$new(2)
gsum$plot()

gsum$state


gsum$add_edge("featureunion", "copy")

PipeOpPCA$new()$train(list(task))

por = PipeOpLearner$new(mlr_learners$get("classif.rpart"))

glrn = gsum %>>% por

glrn$plot()

glrn$train(task)

glrn$predict(task)

llrn = GraphLearner$new(glrn)
glrn$plot()
llrn

resample(task, llrn, cv)

llrn$param_set
task$select


gr = PipeOpBranch$new(2) %>>% gunion(list(PipeOpPCA$new(), PipeOpNOP$new())) %>>% PipeOpUnbranch$new(2)

gr$plot()

gr$param_set$values$branch.selection = 1

gr$train(task)[[1]]$data()


PipeOpScale$new()$input
PipeOpScale$new()$output

PipeOpLearner$new(mlr_learners$get("classif.rpart"))$output

PipeOpLearner$new(mlr_learners$get("classif.rpart")) %>>% PipeOpModelAvg$new(1)

PipeOpScale$new() %>>% PipeOpModelAvg$new(1)



task = mlr_tasks$get("iris")$
  set_col_role("Sepal.Length", character(0))$
  cbind(data.table(x = 1:150))$
  set_col_role("Sepal.Length.xxx", "feature")


task$data()

cbind(mlr_tasks$get("iris")$data(), data.table(x = 1:150))




po("pca")$train(list(tsk("iris")))[[1]]$data()


po("pca")$train(list(Multiplicity(tsk("iris"), tsk("boston_housing"))))


gr = po("replicate", reps = 10) %>>% po("subsample") %>>%
  lrn("classif.rpart") %>>% po("classifavg", collect = TRUE)
gr$train(tsk("iris"))

gr$predict(tsk("iris"))

gr$state$classif.rpart[1:2]

gr <- po("ovrsplit") %>>% lrn("classif.rpart") %>>% po("ovrunite")

gr <- po("ovrsplit") %>>%
  po("replicate", reps = 3) %>>%
  po("subsample") %>>% lrn("classif.rpart") %>>%
  po("classifavg", collect = TRUE) %>>%
  po("ovrunite")


gr <-   po("replicate", reps = 3) %>>%
  po("ovrsplit") %>>%
  po("subsample") %>>% lrn("classif.rpart") %>>%
  po("ovrunite") %>>%
  po("classifavg", collect = TRUE)

gr <- list(
    po("nop"),
    po("learner_cv", lrn("regr.lm")),
    po("replicate", reps = 3) %>>% po("learner_cv", lrn("regr.ranger"))
  ) %>>%
  po("featureunion") %>>% lrn("regr.rpart") %>>% po("regravg", collect = TRUE)


ntree <- 10
mtry <- 2

gr <- po("replicate", reps = ntree) %>>%
  po("select", selector = function(task) sample(task$feature_names, mtry)) %>>%
  po("subsample") %>>% lrn("classif.rpart") %>>%
  po("classifavg", collect = TRUE)



`%<>>%` = function(g1, g2) {
  force(g2)
  force(g1)
  cat(sprintf("'%s' >> '%s'\n", g1, g2))
  paste0(g1, g2)
}

"one" %<>>% "two" %<>>% "three"
#> 'one' >> 'two'
#> 'onetwo' >> 'three'
mlr-org/mlr3pipelines documentation built on March 29, 2024, 5:52 p.m.