partykit: Interfaces for partykit package for data science pipelines.

Description Usage Arguments Details Value Author(s) Examples

Description

Interfaces to partykit functions that can be used in a pipeline implemented by magrittr.

Usage

1
2
3
4
5
6
7
# Commented functions already defined for package party
# ntbt_cforest(data, ...)
# ntbt_ctree(data, ...)
ntbt_glmtree(data, ...)
ntbt_lmtree(data, ...)
# ntbt_mob(data, ...)
ntbt_palmtree(data, ...)

Arguments

data

data frame, tibble, list, ...

...

Other arguments passed to the corresponding interfaced function.

Details

Interfaces call their corresponding interfaced function.

Value

Object returned by interfaced function.

Author(s)

Roberto Bertolusso

Examples

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
## Not run: 
library(intubate)
library(magrittr)
library(partykit)


## ntbt_cforest: Conditional Random Forests

## Original function to interface
cf <- cforest(dist ~ speed, data = cars)

## The interface puts data as first parameter
cf <- ntbt_cforest(cars, dist ~ speed)

## so it can be used easily in a pipeline.
cf <- cars %>%
  ntbt_cforest(dist ~ speed)


## ntbt_ctree: Conditional Inference Trees
airq <- subset(airquality, !is.na(Ozone))

## Original function to interface
airct <- ctree(Ozone ~ ., data = airq)
plot(airct)

## The interface puts data as first parameter
airct <- ntbt_ctree(airq, Ozone ~ .)
plot(airct)

## so it can be used easily in a pipeline.
airq %>%
  ntbt_ctree(Ozone ~ .) %>%
  plot()


## ntbt_glmtree: Generalized Linear Model Trees
data("PimaIndiansDiabetes", package = "mlbench")

## Original function to interface
pid_tree2 <- glmtree(diabetes ~ glucose | pregnant +
                       pressure + triceps + insulin + mass + pedigree + age,
                     data = PimaIndiansDiabetes, family = binomial)
plot(pid_tree2)

## The interface puts data as first parameter
pid_tree2 <- ntbt_glmtree(PimaIndiansDiabetes, diabetes ~ glucose | pregnant +
                            pressure + triceps + insulin + mass + pedigree + age,
                          family = binomial)
plot(pid_tree2)

## so it can be used easily in a pipeline.
PimaIndiansDiabetes %>%
  ntbt_glmtree(diabetes ~ glucose | pregnant +
                 pressure + triceps + insulin + mass + pedigree + age,
               family = binomial) %>%
  plot()


## ntbt_lmtree: Linear Model Trees
data("BostonHousing", package = "mlbench")
BostonHousing <- 
  transform(BostonHousing,
            chas = factor(chas, levels = 0:1, labels = c("no", "yes")),
            rad = factor(rad, ordered = TRUE))

## Original function to interface
bh_tree <- lmtree(medv ~ log(lstat) + I(rm^2) | zn + indus + chas +
                    nox + age + dis + rad + tax + crim + b + ptratio,
                  data = BostonHousing, minsize = 40)
plot(bh_tree)

## The interface puts data as first parameter
bh_tree <- ntbt_lmtree(BostonHousing,
                       medv ~ log(lstat) + I(rm^2) | zn + indus + chas +
                         nox + age + dis + rad + tax + crim + b + ptratio,
                       minsize = 40)
plot(bh_tree)

## so it can be used easily in a pipeline.
BostonHousing %>%
  ntbt_lmtree(medv ~ log(lstat) + I(rm^2) | zn + indus + chas +
                nox + age + dis + rad + tax + crim + b + ptratio,
              minsize = 40) %>%
  plot()


## ntbt_mob: Model-based Recursive Partitioning
data("PimaIndiansDiabetes", package = "mlbench")

logit <- function(y, x, start = NULL, weights = NULL, offset = NULL, ...) {
  glm(y ~ 0 + x, family = binomial, start = start, ...)
}

## Original function to interface
pid_tree <- mob(diabetes ~ glucose | pregnant + pressure + triceps + insulin +
                  mass + pedigree + age, data = PimaIndiansDiabetes, fit = logit)
plot(pid_tree)

## The interface puts data as first parameter
pid_tree <- ntbt_mob(PimaIndiansDiabetes, diabetes ~ glucose | pregnant + pressure +
                       triceps + insulin + mass + pedigree + age, fit = logit)
plot(pid_tree)

## so it can be used easily in a pipeline.
PimaIndiansDiabetes %>%
  ntbt_mob(diabetes ~ glucose | pregnant + pressure +
             triceps + insulin + mass + pedigree + age, fit = logit) %>%
  plot()


## ntbt_palmtree: Partially Additive (Generalized) Linear Model Trees
dgp <- function(nobs = 1000, nreg = 5, creg = 0.4, ptreat = 0.5, sd = 1,
  coef = c(1, 0.25, 0.25, 0, 0, -0.25), eff = 1)
{
  d <- mvtnorm::rmvnorm(nobs,
    mean = rep(0, nreg),
    sigma = diag(1 - creg, nreg) + creg)
  colnames(d) <- paste0("x", 1:nreg)
  d <- as.data.frame(d)
  d$a <- rbinom(nobs, size = 1, prob = ptreat)
  d$err <- rnorm(nobs, mean = 0, sd = sd)

  gopt <- function(d) {
    as.numeric(d$x1 > -0.545) * as.numeric(d$x2 < 0.545)
  }
  d$y <- coef[1] + drop(as.matrix(d[, paste0("x", 1:5)]) %*% coef[-1]) -
    eff * (d$a - gopt(d))^2 + d$err
  d$a <- factor(d$a)
  return(d)
}
set.seed(1)
d <- dgp()

## Original function to interface
palm <- palmtree(y ~ a | x1 + x2 + x5 | x1 + x2 + x3 + x4 + x5, data = d)
plot(palm)

## The interface puts data as first parameter
palm <- ntbt_palmtree(d, y ~ a | x1 + x2 + x5 | x1 + x2 + x3 + x4 + x5)
plot(palm)

## so it can be used easily in a pipeline.
d %>%
  ntbt_palmtree(y ~ a | x1 + x2 + x5 | x1 + x2 + x3 + x4 + x5) %>%
  plot()

## End(Not run)

intubate documentation built on May 2, 2019, 2:46 p.m.