`glmtree`: logistic regression trees for efficient segmentation
In glmtree: Logistic Regression Trees

Segmentation

Three clusters, one predictive law

Simulation

library(glmtree)
data = generateData(n = 1000, scenario = "no tree", visualize = TRUE)

int_train = sample.int(n = 1000, size = 0.2*1000)

test = data[-int_train,]
data = data[int_train,]

PCA

library(FactoMineR)
mixed = PCA(data[,c("x1","x2")])

data$pca1 = predict(mixed, data)$coord[,1]
data$pca2 = predict(mixed, data)$coord[,2]
test$pca1 = predict(mixed, test)$coord[,1]
test$pca2 = predict(mixed, test)$coord[,2]

data$cluster = ifelse(data$pca1 > 1, 1, ifelse(data$pca1 > 0, 2, 3))
test$cluster = ifelse(test$pca1 > 1, 1, ifelse(test$pca1 > 0, 2, 3))

pred = matrix(0, nrow = 0.2*1000, ncol = 1)

for (j in 1:3) {
  modele = glm(y ~ x1 + x2, data = data[data$cluster==j,], family=binomial(link = "logit"))
  pred[test$cluster==j] = predict(modele, test[test$cluster==j,], type="response")
}

normalizedGini(test$y,pred)
plot(mixed, choix = 'ind', label = "none")

MOB

if (require(partykit, quietly = TRUE)) {
  mob_data = partykit::glmtree(formula = y ~ x1 + x2 | x1 + x2, data = data, family = binomial)
  plot(mob_data)
  normalizedGini(test$y, predict(mob_data,test))
}

`glmtree` approach

tree = glmtree::glmtree(x = data[,c("x1", "x2")], y = data$y)

plot(unlist(tree@performance$criterionEvolution), type="l")

data$c_map <- factor(apply(predict(tree@best.tree$tree,data,type="prob"),1,function(p) names(which.max(p))))
test$c_map <- factor(apply(predict(tree@best.tree$tree,data,type="prob"),1,function(p) names(which.max(p))))

table(data$c_map)

plot(data[,1],data[,2],pch=2+data[,3],col=as.numeric(data$c_map),xlab="First coordinate",ylab="Second coordinate")

plot(tree@best.tree$tree)

pred = matrix(0, nrow = 0.2*1000, ncol = 1)

for (j in levels(data$c_map)) {
  modele = glm(y ~ x1 + x2, data = data[data$c_map==j,], family=binomial(link = "logit"))
  pred[test$c_map==j] = predict(modele, test[test$c_map==j,], type="response")
}

normalizedGini(test$y,pred)

One "cluster", three predictive laws

Simulation

data = generateData(n = 1000, scenario = "tree", visualize = TRUE)

int_train = sample.int(n = 1000, size = 0.2*1000)

test = data[-int_train,]
data = data[int_train,]

PCA

mixed = FAMD(data[,c("x1","x2","x3")])

dim_famd = predict(mixed,test)$coord[,"Dim 1"] < 0

pred = matrix(0, nrow = 0.2*1000, ncol = 1)

for (j in c(TRUE,FALSE)) {
  modele = glm(y ~ x1 + x2 + x3, data = data[dim_famd==j,], family=binomial(link = "logit"))
  pred[dim_famd==j] = predict(modele, test[dim_famd==j,], type="response")
}

normalizedGini(test$y,pred)

MOB

if (require(partykit, quietly = TRUE)) {
  mob_data = partykit::glmtree(formula = y ~ x1 + x2 +x3 | x1 + x2 + x3, data = data, family = binomial)
  plot(mob_data)
  normalizedGini(test$y, predict(mob_data,test))
}

`glmtree` approach

tree = glmtree::glmtree(x = data[,c("x1", "x2", "x3")], y = data$y)

plot(unlist(tree@performance$criterionEvolution), type="l")

data$c_map <- factor(apply(predict(tree@best.tree$tree,data,type="prob"),1,function(p) names(which.max(p))))
test$c_map <- factor(apply(predict(tree@best.tree$tree,test,type="prob"),1,function(p) names(which.max(p))))

table(data$c,data$c_map)

plot(data[,1],data[,2],pch=2+data[,3],col=as.numeric(data$c_map),xlab="First coordinate",ylab="Second coordinate")

plot(tree@best.tree$tree)

pred = matrix(0, nrow = 0.2*1000, ncol = 1)

for (j in 1:nlevels(data$c_map)) {
  pred[test$c_map==levels(data$c_map)[j]] = predict(tree@best.tree$glms[[j]], test[test$c_map==levels(data$c_map)[j],], type="response")
}

normalizedGini(test$y,pred)

Any scripts or data that you put into this service are public.

glmtree documentation built on Jan. 13, 2021, 4:58 p.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

glmtree
Logistic Regression Trees

`glmtree`: logistic regression trees for efficient segmentation
In glmtree: Logistic Regression Trees

Segmentation

Three clusters, one predictive law

Simulation

PCA

MOB

`glmtree` approach

One "cluster", three predictive laws

Simulation

PCA

MOB

`glmtree` approach

Try the glmtree package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

glmtree Logistic Regression Trees

`glmtree`: logistic regression trees for efficient segmentation In glmtree: Logistic Regression Trees

Segmentation

Three clusters, one predictive law

Simulation

PCA

MOB

glmtree approach

One "cluster", three predictive laws

Simulation

PCA

MOB

glmtree approach

Try the glmtree package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

glmtree
Logistic Regression Trees

`glmtree`: logistic regression trees for efficient segmentation
In glmtree: Logistic Regression Trees

`glmtree` approach

`glmtree` approach