tests/regtest-glmtree.R

suppressWarnings(RNGversion("3.5.2"))

library("partykit")

set.seed(29)
n <- 1000
x <- runif(n)
z <- runif(n)
y <- rnorm(n, mean = x * c(-1, 1)[(z > 0.7) + 1], sd = 3)
z_noise <- factor(sample(1:3, size = n, replace = TRUE))
d <- data.frame(y = y, x = x, z = z, z_noise = z_noise)


fmla <- as.formula("y ~ x | z + z_noise")
fmly <- gaussian()
fit <- partykit:::glmfit

# versions of the data
d1 <- d
d1$z <- signif(d1$z, digits = 1)

k <- 20
zs_noise <- matrix(rnorm(n*k), nrow = n)
colnames(zs_noise) <- paste0("z_noise_", 1:k)
d2 <- cbind(d, zs_noise)
fmla2 <- as.formula(paste("y ~ x | z + z_noise +",
                          paste0("z_noise_", 1:k, collapse = " + ")))


d3 <- d2
d3$z <- factor(sample(1:3, size = n, replace = TRUE, prob = c(0.1, 0.5, 0.4)))
d3$y <- rnorm(n, mean = x * c(-1, 1)[(d3$z == 2) + 1], sd = 3)

## check weights
w <- rep(1, n)
w[1:10] <- 2
(mw1 <- glmtree(formula = fmla, data = d, weights = w))
(mw2 <- glmtree(formula = fmla, data = d, weights = w, caseweights = FALSE))



## check dfsplit
(mmfluc2 <- mob(formula = fmla, data = d, fit = partykit:::glmfit))
(mmfluc3 <- glmtree(formula = fmla, data = d))
(mmfluc3_dfsplit <- glmtree(formula = fmla, data = d, dfsplit = 10))


## check tests
if (require("strucchange"))
    print(sctest(mmfluc3, node = 1)) # does not yet work

x <- mmfluc3
(tst3 <- nodeapply(x, ids = nodeids(x), function(n) n$info$criterion))




## check logLik and AIC
logLik(mmfluc2)
logLik(mmfluc3)
logLik(mmfluc3_dfsplit)
logLik(glm(y ~ x, data = d))

AIC(mmfluc3)
AIC(mmfluc3_dfsplit)

## check pruning
pr2 <- prune.modelparty(mmfluc2)
AIC(mmfluc2)
AIC(pr2)

mmfluc_dfsplit3 <- glmtree(formula = fmla, data = d, alpha = 0.5, dfsplit = 3)
mmfluc_dfsplit4 <- glmtree(formula = fmla, data = d, alpha = 0.5, dfsplit = 4)
pr_dfsplit3 <- prune.modelparty(mmfluc_dfsplit3)
pr_dfsplit4 <- prune.modelparty(mmfluc_dfsplit4)
AIC(mmfluc_dfsplit3)
AIC(mmfluc_dfsplit4)
AIC(pr_dfsplit3)
AIC(pr_dfsplit4)

width(mmfluc_dfsplit3)
width(mmfluc_dfsplit4)
width(pr_dfsplit3)
width(pr_dfsplit4)

## check inner and terminal
options <- list(NULL,
                "object",
                "estfun",
                c("object", "estfun"))

arguments <- list("inner",
                  "terminal",
                  c("inner", "terminal"))


for (o in options) {
  print(o)
  x <- glmtree(formula = fmla, data = d, inner = o)
  str(nodeapply(x, ids = nodeids(x), function(n) n$info[c("object", "estfun")]), 2)
}

for (o in options) {
  print(o)
  x <- glmtree(formula = fmla, data = d, terminal = o)
  str(nodeapply(x, ids = nodeids(x), function(n) n$info[c("object", "estfun")]), 2)
}


## check model
m_mt <- glmtree(formula = fmla, data = d, model = TRUE)
m_mf <- glmtree(formula = fmla, data = d, model = FALSE)

dim(m_mt$data)
dim(m_mf$data)


## check multiway
(m_mult <- glmtree(formula = fmla2, data = d3, catsplit = "multiway", minsize = 80))


## check parm
fmla_p <- as.formula("y ~ x + z_noise + z_noise_1 | z + z_noise_2")
(m_interc <- glmtree(formula = fmla_p, data = d2, parm = 1))

(m_p3 <- glmtree(formula = fmla_p, data = d2, parm = 3))


## check trim
(m_tt <- glmtree(formula = fmla, data = d, trim = 0.2))

(m_tf <- glmtree(formula = fmla, data = d, trim = 300, minsize = 300))



## check breakties
m_bt <- glmtree(formula = fmla, data = d1, breakties = TRUE)
m_df <- glmtree(formula = fmla, data = d1, breakties = FALSE)

all.equal(m_bt, m_df, check.environment = FALSE)

unclass(m_bt)$node$info$criterion
unclass(m_df)$node$info$criterion


### example from mob vignette
data("PimaIndiansDiabetes", package = "mlbench")

logit <- function(y, x, start = NULL, weights = NULL, offset = NULL, ...) {
  glm(y ~ 0 + x, family = binomial, start = start, ...)
}

pid_formula <- diabetes ~ glucose | pregnant + pressure + triceps +
  insulin + mass + pedigree + age

pid_tree <- mob(pid_formula, data = PimaIndiansDiabetes, fit = logit)
pid_tree
nodeapply(pid_tree, ids = nodeids(pid_tree), function(n) n$info$criterion)

Try the partykit package in your browser

Any scripts or data that you put into this service are public.

partykit documentation built on April 14, 2023, 5:09 p.m.