Nothing
## ----setup, include = FALSE---------------------------------------------------
knitr::opts_chunk$set(
collapse = TRUE,
comment = "#>",
fig.width = 7,
fig.align = "center",
fig.height = 4
)
## ---- echo=FALSE, results='asis'----------------------------------------------
knitr::kable(data.frame(Job=c("Craftsman","Technician","Executive","Office employee"),Habitation = c("Owner","Renter","Starter","By family"),Time_in_job = c(10,20,5,2), Children = c(0,1,2,3), Family_status= c("Divorced","Widower","Single","Married"),Default = c("No","No","Yes","No")))
## ---- echo=TRUE, results='asis'-----------------------------------------------
x = matrix(runif(1000), nrow = 1000, ncol = 1)
p = 1/(1+exp(-3*x^5))
y = rbinom(1000,1,p)
modele_lin <- glm(y ~ x, family = binomial(link="logit"))
pred_lin <- predict(modele_lin,as.data.frame(x),type="response")
pred_lin_logit <- predict(modele_lin,as.data.frame(x))
## ---- echo=FALSE--------------------------------------------------------------
knitr::kable(head(data.frame(True_prob = p,Pred_lin = pred_lin)))
## ---- echo=TRUE, results='asis'-----------------------------------------------
x_disc <- factor(cut(x,c(-Inf,0.5,0.7,0.8,0.9,+Inf)),labels = c(1,2,3,4,5))
modele_disc <- glm(y ~ x_disc, family = binomial(link="logit"))
pred_disc <- predict(modele_disc,as.data.frame(x_disc),type="response")
pred_disc_logit <- predict(modele_disc,as.data.frame(x_disc))
## ---- echo=FALSE--------------------------------------------------------------
knitr::kable(head(data.frame(True_prob = p,Pred_lin = pred_lin,Pred_disc = pred_disc)))
plot(x,3*x^5,main = "Estimated logit transform of p(Y|X)", ylab = "p(Y|X) under different models")
lines(x,pred_lin_logit,type="p",col="red")
lines(x,pred_disc_logit,type="p",col="blue")
## ---- echo=TRUE, results='asis'-----------------------------------------------
x_disc_bad_idea <- factor(cut(x,c(-Inf,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,+Inf)),labels = c(1,2,3,4,5,6,7,8,9,10))
## ---- echo=FALSE, results='asis'----------------------------------------------
liste_coef <- list()
for (k in 1:10) {
x_part <- factor(x_disc_bad_idea[((k-1)*nrow(x)/10 +1) : (k/10*nrow(x))])
y_part <- y[((k-1)*length(y)/10 +1) : (k/10*length(y))]
modele_part <- glm(y_part ~ x_part, family=binomial(link = "logit"))
liste_coef[[k]] <- (modele_part$coefficients)
}
estim_coef <- matrix(NA, nrow = nlevels(x_disc_bad_idea), ncol = 10)
for (i in 1:nlevels(x_disc_bad_idea)) {
estim_coef[i,] <- unlist(lapply(liste_coef,function(batch) batch[paste0("x_part",levels(factor(x_disc_bad_idea))[i])]))
}
stats_coef <- matrix(NA, nrow = nlevels(x_disc_bad_idea), ncol = 3)
for (i in 1:nlevels(x_disc_bad_idea)) {
stats_coef[i,1] <- mean(estim_coef[i,], na.rm = TRUE)
stats_coef[i,2] <- sd(estim_coef[i,], na.rm = TRUE)
stats_coef[i,3] <- sum(is.na(estim_coef[i,]))
}
stats_coef <- stats_coef[-1,]
row.names(stats_coef) <- levels(x_disc_bad_idea)[2:nlevels(x_disc_bad_idea)]
plot (row.names(stats_coef), stats_coef[,1],ylab="Estimated coefficient",xlab="Factor value of x", ylim = c(-1,8))
segments(as.numeric(row.names(stats_coef)), stats_coef[,1]-stats_coef[,2],as.numeric(row.names(stats_coef)),stats_coef[,1]+stats_coef[,2])
lines(row.names(stats_coef),rep(0,length(row.names(stats_coef))),col="red")
## ---- echo=TRUE, results='asis'-----------------------------------------------
x = matrix(runif(300), nrow = 100, ncol = 3)
cuts = seq(0,1,length.out= 4)
xd = apply(x,2, function(col) as.numeric(cut(col,cuts)))
theta = t(matrix(c(0,0,0,2,2,2,-2,-2,-2),ncol=3,nrow=3))
log_odd = rowSums(t(sapply(seq_along(xd[,1]), function(row_id) sapply(seq_along(xd[row_id,]),
function(element) theta[xd[row_id,element],element]))))
y = rbinom(100,1,1/(1+exp(-log_odd)))
## ---- echo=TRUE,warning=FALSE, message=FALSE, results='hide',eval=FALSE-------
# library(glmdisc)
# set.seed(123)
# discretization <- glmdisc(x,y,iter=50,m_start=5,test=FALSE,validation=FALSE,criterion="aic",interact=FALSE)
## ---- echo=FALSE,warning=FALSE, message=FALSE, results='hide',eval=TRUE-------
library(glmdisc)
set.seed(1)
discretization <- glmdisc(x,y,iter=50,m_start=5,test=FALSE,validation=FALSE,criterion="aic",interact=FALSE)
## ---- echo=TRUE,warning=FALSE, message=FALSE, results='hide',eval=TRUE--------
all_formula <- list()
for (l in 1:10) {
set.seed(l)
x = matrix(runif(300), nrow = 100, ncol = 3)
cuts = seq(0,1,length.out= 4)
xd = apply(x,2, function(col) as.numeric(cut(col,cuts)))
theta = t(matrix(c(0,0,0,2,2,2,-2,-2,-2),ncol=3,nrow=3))
log_odd = rowSums(t(sapply(seq_along(xd[,1]), function(row_id) sapply(seq_along(xd[row_id,]),
function(element) theta[xd[row_id,element],element]))))
y = rbinom(100,1,1/(1+exp(-log_odd)))
discretization <- glmdisc(x,y,iter=50,m_start=5,test=FALSE,validation=FALSE,criterion="aic",interact=TRUE)
all_formula[[l]] <- discretization@best.disc$formulaOfBbestestLogisticRegression
}
#barplot(table(grepl(":",all_formula)),names.arg=c("No interaction","One interaction"),xlim = 2)
## ---- echo=TRUE,warning=FALSE, message=FALSE, results='hide'------------------
all_formula <- list()
for (l in 1:10) {
set.seed(100+l)
x = matrix(runif(3000), nrow = 1000, ncol = 3)
cuts = seq(0,1,length.out= 4)
xd = apply(x,2, function(col) as.numeric(cut(col,cuts)))
theta = t(matrix(c(0,0,0,2,2,2,-2,-2,-2),ncol=3,nrow=3))
log_odd = matrix(0,1000,1)
for (i in 1:1000) {
log_odd[i] = 2*(xd[i,1]==1)+
(-2)*(xd[i,1]==2)+
2*(xd[i,1]==1)*(xd[i,2]==1)+
4*(xd[i,1]==2)*(xd[i,2]==2)+
(-2)*(xd[i,2]==1)*(xd[i,3]==2)+
(-4)*(xd[i,2]==2)*(xd[i,3]==1)+
1*(xd[i,2]==3)*(xd[i,3]==1)+
(-1)*(xd[i,1]==1)*(xd[i,3]==3)+
3*(xd[i,1]==3)*(xd[i,3]==2)+
(-1)*(xd[i,1]==2)*(xd[i,3]==3)
}
y = rbinom(1000,1,1/(1+exp(-log_odd)))
discretization <- glmdisc(x,y,iter=50,m_start=5,test=FALSE,validation=FALSE,criterion="aic",interact=TRUE)
all_formula[[l]] <- discretization@best.disc$formulaOfBbestestLogisticRegression
}
#barplot(table(grepl(":",all_formula)),names.arg=c("No interaction","One interaction"),xlim = 2)
## ---- echo=TRUE---------------------------------------------------------------
discretization@parameters
## ---- echo=TRUE---------------------------------------------------------------
discretization@best.disc[[1]]
# The first link function is:
discretization@best.disc[[2]][[1]]
## ---- echo=TRUE---------------------------------------------------------------
discretization@performance[[1]]
## ---- echo=TRUE---------------------------------------------------------------
discretization@performance[[2]][1:5]
## ---- echo=TRUE, eval=FALSE---------------------------------------------------
# discretization@disc.data
## ---- echo=FALSE--------------------------------------------------------------
knitr::kable(head(discretization@disc.data))
## ---- echo=TRUE,eval=FALSE----------------------------------------------------
# discretization@cont.data
## ---- echo=FALSE--------------------------------------------------------------
knitr::kable(head(discretization@cont.data))
## ---- echo=FALSE--------------------------------------------------------------
plot(x[,1],xd[,1])
plot(discretization@cont.data[,1],discretization@disc.data[,1])
## ---- echo=TRUE---------------------------------------------------------------
print(discretization)
## ---- echo=TRUE---------------------------------------------------------------
show(discretization)
## ---- echo=TRUE, results='asis'-----------------------------------------------
x_new <- discretize(discretization,x)
## ---- echo=FALSE, warning=FALSE-----------------------------------------------
knitr::kable(head(x_new))
## ---- echo=TRUE---------------------------------------------------------------
# pred_new <- predict(discretization, data.frame(x))
## ---- echo=FALSE, warning=FALSE-----------------------------------------------
# knitr::kable(head(pred_new))
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.