inst/doc/extending.R

## ----extending-020------------------------------------------------------------
library("mlr3")
task = tsk("iris")
task$data()

## ----extending-022, eval = FALSE, tidy = FALSE--------------------------------
#  PipeOpCopyTwo = R6::R6Class("PipeOpCopyTwo",
#    inherit = mlr3pipelines::PipeOp,
#    public = list(
#      initialize = function(id = "copy.two") {
#        ....
#      },
#    ),
#    private == list(
#      .train = function(inputs) {
#        ....
#      },
#  
#      .predict = function(inputs) {
#        ....
#      }
#    )
#  )

## ----extending-023, eval = FALSE----------------------------------------------
#  initialize = function(id = "copy.two") {
#    input = data.table::data.table(name = "input", train = "*", predict = "*")
#    # the following will create two rows and automatically fill the `train`
#    # and `predict` cols with "*"
#    output = data.table::data.table(
#      name = c("output1", "output2"),
#      train = "*", predict = "*"
#    )
#    super$initialize(id,
#      input = input,
#      output = output
#    )
#  }

## ----extending-024, eval = FALSE----------------------------------------------
#  .train = function(inputs) {
#    self$state = list()
#    c(inputs, inputs)
#  }

## ----extending-025, eval = FALSE----------------------------------------------
#  .predict = function(inputs) {
#    c(inputs, inputs)
#  }

## ----extending-026, tidy = FALSE----------------------------------------------
PipeOpCopyTwo = R6::R6Class("PipeOpCopyTwo",
  inherit = mlr3pipelines::PipeOp,
  public = list(
    initialize = function(id = "copy.two") {
      super$initialize(id,
        input = data.table::data.table(name = "input", train = "*", predict = "*"),
        output = data.table::data.table(name = c("output1", "output2"),
                            train = "*", predict = "*")
      )
    }
  ),
  private = list(
    .train = function(inputs) {
      self$state = list()
      c(inputs, inputs)
    },

    .predict = function(inputs) {
      c(inputs, inputs)
    }
  )
)

## ----extending-027------------------------------------------------------------
library("mlr3pipelines")
poct = PipeOpCopyTwo$new()
gr = Graph$new()
gr$add_pipeop(poct)

print(gr)

result = gr$train(task)

str(result)

## ----extending-028, tidy = FALSE----------------------------------------------
PipeOpDropNA = R6::R6Class("PipeOpDropNA",
  inherit = mlr3pipelines::PipeOpTaskPreproc,
  public = list(
    initialize = function(id = "drop.na") {
      super$initialize(id)
    }
  ),

  private = list(
    .train_task = function(task) {
      self$state = list()
      featuredata = task$data(cols = task$feature_names)
      exclude = apply(is.na(featuredata), 1, any)
      task$filter(task$row_ids[!exclude])
    },

    .predict_task = function(task) {
      # nothing to be done
      task
    }
  )
)

## ----extending-029------------------------------------------------------------
smalliris = iris[(1:5) * 30, ]
smalliris[1, 1] = NA
smalliris[2, 2] = NA
sitask = as_task_classif(smalliris, target = "Species")
print(sitask$data())

## ----extending-030------------------------------------------------------------
gr = Graph$new()
gr$add_pipeop(PipeOpDropNA$new())

filtered_task = gr$train(sitask)[[1]]
print(filtered_task$data())

## ----extending-031, tidy = FALSE----------------------------------------------
PipeOpScaleAlways = R6::R6Class("PipeOpScaleAlways",
  inherit = mlr3pipelines::PipeOpTaskPreproc,
  public = list(
    initialize = function(id = "scale.always") {
      super$initialize(id = id)
    }
  ),

  private = list(
    .select_cols = function(task) {
      task$feature_types[type == "numeric", id]
    },

    .train_dt = function(dt, levels, target) {
      sc = scale(as.matrix(dt))
      self$state = list(
        center = attr(sc, "scaled:center"),
        scale = attr(sc, "scaled:scale")
      )
      sc
    },

    .predict_dt = function(dt, levels) {
      t((t(dt) - self$state$center) / self$state$scale)
    }
  )
)

## ----extending-032------------------------------------------------------------
gr = Graph$new()
gr$add_pipeop(PipeOpScaleAlways$new())

result = gr$train(task)

result[[1]]$data()

## ----extending-033, tidy = FALSE----------------------------------------------
PipeOpDropConst = R6::R6Class("PipeOpDropConst",
  inherit = mlr3pipelines::PipeOpTaskPreprocSimple,
  public = list(
    initialize = function(id = "drop.const") {
      super$initialize(id = id)
    }
  ),

  private = list(
    .get_state = function(task) {
      data = task$data(cols = task$feature_names)
      nonconst = sapply(data, function(column) length(unique(column)) > 1)
      list(cnames = colnames(data)[nonconst])
    },

    .transform = function(task) {
      task$select(self$state$cnames)
    }
  )
)

## ----extending-034------------------------------------------------------------
irishead = task$clone()$filter(1:5)
irishead$data()

## ----extending-035------------------------------------------------------------
gr = Graph$new()$add_pipeop(PipeOpDropConst$new())
dropped_task = gr$train(irishead)[[1]]

dropped_task$data()

## ----extending-036------------------------------------------------------------
gr$pipeops$drop.const$state

## ----extending-037------------------------------------------------------------
dropped_predict = gr$predict(task)[[1]]

dropped_predict$data()

## ----extending-038, tidy = FALSE----------------------------------------------
PipeOpScaleAlwaysSimple = R6::R6Class("PipeOpScaleAlwaysSimple",
  inherit = mlr3pipelines::PipeOpTaskPreprocSimple,
  public = list(
    initialize = function(id = "scale.always.simple") {
      super$initialize(id = id)
    }
  ),

  private = list(
    .select_cols = function(task) {
      task$feature_types[type == "numeric", id]
    },

    .get_state_dt = function(dt, levels, target) {
      list(
        center = sapply(dt, mean),
        scale = sapply(dt, sd)
      )
    },

    .transform_dt = function(dt, levels) {
      t((t(dt) - self$state$center) / self$state$scale)
    }
  )
)

## ----extending-039------------------------------------------------------------
gr = Graph$new()$add_pipeop(PipeOpScaleAlways$new())
result_posa = gr$train(task)[[1]]

gr = Graph$new()$add_pipeop(PipeOpScaleAlwaysSimple$new())
result_posa_simple = gr$train(task)[[1]]

## ----extending-040------------------------------------------------------------
result_posa$data()

## ----extending-041------------------------------------------------------------
result_posa_simple$data()

## ----extending-042------------------------------------------------------------
PipeOpScale$public_methods$initialize

## ----extending-043------------------------------------------------------------
pss = po("scale")
print(pss$param_set)

## ----extending-044------------------------------------------------------------
pss$param_set$values$center = FALSE
print(pss$param_set$values)

## ----extending-045, error = TRUE----------------------------------------------
pss$param_set$values$scale = "TRUE" # bad input is checked!

## ----extending-046------------------------------------------------------------
PipeOpScale$private_methods$.train_dt

## ----extending-047------------------------------------------------------------
pss$param_set$values$scale = FALSE
pss$param_set$values$center = FALSE

gr = Graph$new()
gr$add_pipeop(pss)

result = gr$train(task)

result[[1]]$data()

Try the mlr3pipelines package in your browser

Any scripts or data that you put into this service are public.

mlr3pipelines documentation built on May 31, 2023, 9:26 p.m.