inst/doc/glmtree.R

## ----segmentation_simulation1-------------------------------------------------
library(glmtree)
data <- generateData(n = 1000, scenario = "no tree", visualize = TRUE)

int_train <- sample.int(n = 1000, size = 0.2 * 1000)

test <- data[-int_train, ]
data <- data[int_train, ]

## ----segmentation_pca1--------------------------------------------------------
library(FactoMineR)
mixed <- PCA(data[, c("x1", "x2")])

data$pca1 <- predict(mixed, data)$coord[, 1]
data$pca2 <- predict(mixed, data)$coord[, 2]
test$pca1 <- predict(mixed, test)$coord[, 1]
test$pca2 <- predict(mixed, test)$coord[, 2]

data$cluster <- ifelse(data$pca1 > 1, 1, ifelse(data$pca1 > 0, 2, 3))
test$cluster <- ifelse(test$pca1 > 1, 1, ifelse(test$pca1 > 0, 2, 3))

pred <- matrix(0, nrow = 0.2 * 1000, ncol = 1)

for (j in 1:3) {
  modele <- glm(y ~ x1 + x2, data = data[data$cluster == j, ], family = binomial(link = "logit"))
  pred[test$cluster == j] <- predict(modele, test[test$cluster == j, ], type = "response")
}

normalizedGini(test$y, pred)
plot(mixed, choix = "ind", label = "none")

## ----segmentation_mob1--------------------------------------------------------
if (require(partykit, quietly = TRUE)) {
  mob_data <- partykit::glmtree(formula = y ~ x1 + x2 | x1 + x2, data = data, family = binomial)
  plot(mob_data)
  normalizedGini(test$y, predict(mob_data, test))
}

## ----segmentation_glmtree1, warning=FALSE-------------------------------------
tree <- glmtree::glmtree(x = data[, c("x1", "x2")], y = data$y)

plot(unlist(tree@performance$criterionEvolution), type = "l")

data$c_map <- factor(apply(predict(tree@best.tree$tree, data, type = "prob"), 1, function(p) names(which.max(p))))
test$c_map <- factor(apply(predict(tree@best.tree$tree, data, type = "prob"), 1, function(p) names(which.max(p))))

table(data$c_map)

plot(data[, 1], data[, 2], pch = 2 + data[, 3], col = as.numeric(data$c_map), xlab = "First coordinate", ylab = "Second coordinate")

plot(tree@best.tree$tree)

pred <- matrix(0, nrow = 0.2 * 1000, ncol = 1)

for (j in levels(data$c_map)) {
  modele <- glm(y ~ x1 + x2, data = data[data$c_map == j, ], family = binomial(link = "logit"))
  pred[test$c_map == j] <- predict(modele, test[test$c_map == j, ], type = "response")
}

normalizedGini(test$y, pred)

## ----segmentation_simulation2-------------------------------------------------
data <- generateData(n = 1000, scenario = "tree", visualize = TRUE)

int_train <- sample.int(n = 1000, size = 0.2 * 1000)

test <- data[-int_train, ]
data <- data[int_train, ]

## ----segmentation_pca2--------------------------------------------------------
mixed <- FAMD(data[, c("x1", "x2", "x3")])

dim_famd <- predict(mixed, test)$coord[, "Dim 1"] < 0

pred <- matrix(0, nrow = 0.2 * 1000, ncol = 1)

for (j in c(TRUE, FALSE)) {
  modele <- glm(y ~ x1 + x2 + x3, data = data[dim_famd == j, ], family = binomial(link = "logit"))
  pred[dim_famd == j] <- predict(modele, test[dim_famd == j, ], type = "response")
}

normalizedGini(test$y, pred)

## ----segmentation_mob2, warning=FALSE, message=FALSE--------------------------
if (require(partykit, quietly = TRUE)) {
  mob_data <- partykit::glmtree(formula = y ~ x1 + x2 + x3 | x1 + x2 + x3, data = data, family = binomial)
  plot(mob_data)
  normalizedGini(test$y, predict(mob_data, test))
}

## ----segmentation_glmtree2, warning=FALSE-------------------------------------
tree <- glmtree::glmtree(x = data[, c("x1", "x2", "x3")], y = data$y)

plot(unlist(tree@performance$criterionEvolution), type = "l")

data$c_map <- factor(apply(predict(tree@best.tree$tree, data, type = "prob"), 1, function(p) names(which.max(p))))
test$c_map <- factor(apply(predict(tree@best.tree$tree, test, type = "prob"), 1, function(p) names(which.max(p))))

table(data$c, data$c_map)

plot(data[, 1], data[, 2], pch = 2 + data[, 3], col = as.numeric(data$c_map), xlab = "First coordinate", ylab = "Second coordinate")

plot(tree@best.tree$tree)

pred <- matrix(0, nrow = 0.2 * 1000, ncol = 1)

for (j in 1:nlevels(data$c_map)) {
  pred[test$c_map == levels(data$c_map)[j]] <- predict(tree@best.tree$glms[[j]], test[test$c_map == levels(data$c_map)[j], ], type = "response")
}

normalizedGini(test$y, pred)

Try the glmtree package in your browser

Any scripts or data that you put into this service are public.

glmtree documentation built on May 29, 2024, 9:37 a.m.