# inst/india_rqssResults.R In hofnerb/mboost: Model-Based Boosting

```#-----------------------------------------------------------
# Explore results for empirical risk with all algorithms
# including rqss
#-----------------------------------------------------------

#-----------------------------------------------------------
# Path, Packages, source functions
#-----------------------------------------------------------

library("quantreg")
library("lattice")
library("mboost")

source("india_rqss_lambdaOptFunc.R")

#-----------------------------------------------------------
# Quantiles and Indices
#-----------------------------------------------------------
inds <- paste("w", 1:50, sep="")
quantiles <- c(0.05, 0.1, 0.5)

pred1 <- pred2 <- pred3 <- matrix(0, ncol=5, nrow=length(inds))
colnames(pred1) <- colnames(pred2) <- colnames(pred3) <- c("rqss", "add","vcm","stump","tree")

lambda1rqss <- lambda2rqss <- lambda3rqss <- matrix(NA, nrow=length(inds), ncol=6)

predVCM1 <- predVCM2 <-predVCM3 <- matrix(NA, ncol=50, nrow=nrow(india)/3)
nTest <- rep(0, length(inds))

# write cvRisks into a vector
for(i in 1:length(inds)){
print(i)

indiaTrain <- india[india[,inds[i]]==1,]

indiaTestOrg <- india[india[,inds[i]]==3,]

# Add variable for prediction with mboost
indiaTestOrg <- cbind(indiaTestOrg, ff = 0)
names(indiaTestOrg)[ncol(indiaTestOrg)] <- "FALSE"

# Indices of convex hull of the test data
indiaTestInd <- convexHullInd(dataOrg=indiaTestOrg, dataRef=indiaTrain)
indiaTest <- convexHull(dataOrg=indiaTestOrg, dataRef=indiaTrain)
nTest[i] <- nrow(indiaTest)

#----------------
# Prediction rqss
pred <- predict(model, newdata=indiaTest)
pred1[i,1] <- loss(indiaTest\$stunting, pred, tau=0.05)
lambda1rqss[i,] <- model\$lambdas

pred <- predict(model, newdata=indiaTest)
pred2[i,1] <- loss(indiaTest\$stunting, pred, tau=0.1)
lambda2rqss[i,] <- model\$lambdas

pred <- predict(model, newdata=indiaTest)
pred3[i,1] <- loss(indiaTest\$stunting, pred, tau=0.5)
lambda3rqss[i,] <- model\$lambdas

#---------------

#---------------
# Prediction VCM
predVCM1[,i] <- predict(model, newdata=indiaTestOrg)
pred1[i,3] <- loss(indiaTest\$stunting, predVCM1[indiaTestInd==0,i], tau=0.05)

predVCM2[,i] <- predict(model, newdata=indiaTestOrg)
pred2[i,3] <- loss(indiaTest\$stunting, predVCM2[indiaTestInd==0,i], tau=0.1)

predVCM3[,i] <- predict(model, newdata=indiaTestOrg)
pred3[i,3] <- loss(indiaTest\$stunting, predVCM3[indiaTestInd==0,i], tau=0.5)

#---------------
# Prediction Stumps

# -> The prediction on the test data was directly calculated after
# calculating the model. Therefore not any more necessary here
pred1[i,4] <- loss(indiaTest\$stunting, pred[indiaTestInd==0], tau=0.05)

pred2[i,4] <- loss(indiaTest\$stunting, pred[indiaTestInd==0], tau=0.1)

pred3[i,4] <- loss(indiaTest\$stunting, pred[indiaTestInd==0], tau=0.5)

pred1[i,5] <- loss(indiaTest\$stunting, pred[indiaTestInd==0], tau=0.05)

pred2[i,5] <- loss(indiaTest\$stunting, pred[indiaTestInd==0], tau=0.1)

pred3[i,5] <- loss(indiaTest\$stunting, pred[indiaTestInd==0], tau=0.5)

}

save(pred1, pred2, pred3, file="CVRisks.Rdata")
save(predVCM1, predVCM2, predVCM3, file="PredVCM.Rdata")

# Change the indices according to performance
pred1 <- pred1[,c(2,1,5,4,3)]
pred2 <- pred2[,c(2,1,5,4,3)]
pred3 <- pred3[,c(2,1,5,4,3)]

#-----------------------------------------------------------
# Boxplots for Empirical Risks
#-----------------------------------------------------------

#pdf("IndiaRisksAll.pdf", width=8, height=6.5)

cv <- data.frame(error = c(as.vector(pred1),
as.vector(pred2),
as.vector(pred3)),
algo = rep(factor(colnames(pred1), levels = colnames(pred1),
labels = c("additive", "rqss", "trees", "stumps", "VCM")),
rep(nrow(pred1), ncol(pred1))),
quant = rep(factor(c("5% Quantile", "10% Quantile", "Median"),
levels = c("5% Quantile", "10% Quantile", "Median"),
labels = c("5% Quantile", "10% Quantile", "Median")),
rep(length(pred1), 3)))

mypanel <- function(x, y, ...) {
panel.bwplot(x = x, y = y, ...)
z <- as.data.frame(matrix(y, ncol = 5))
apply(z, 1, function(x)
llines(x = 1:5, y = x, col = rgb(0.1, 0.1, 0.1, 0.1)))
}
print(bwplot(error ~ algo | quant, data = cv, ylab = "CV risk",
scales = list(y = list(relation = "sliced"), x = list(rot = 25)),
layout = c(3, 1), panel = mypanel, fill="grey", pch = "|",
par.settings = list(plot.symbol = list(col=1,pch=20, cex=0.7),
box.rectangle = list(col=1),
box.umbrella = list(lty=1, col=1)),
strip= strip.custom(bg="white")))
#dev.off()

#-----------------------------------------------------------
# Boxplots for Optimized Lambda Parameters rqss
#-----------------------------------------------------------

lims <- range(c(lambda1rqss, lambda2rqss, lambda3rqss))

#pdf("IndiaLambdas.pdf", width=8, height=6)
par(mfrow=c(1,3), las=3, mar=c(8,4,2,1))
boxplot(lambda1rqss[,1:6],axes=FALSE, ylab="lambda", xlab="",  main="0.05", ylim=range(lims))
box()
axis(2)
axis(1, at=1:6, labels=c("cage", "breastfeeding", "mbmi", "mage", "medu", "edupartner"))

boxplot(lambda2rqss[,1:6],axes=FALSE, ylab="lambda", xlab="",  main="0.1", ylim=range(lims))
box()
axis(2)
axis(1, at=1:6, labels=c("cage", "breastfeeding", "mbmi", "mage", "medu", "edupartner"))

boxplot(lambda3rqss[,1:6],axes=FALSE, ylab="lambda", xlab="",  main="0.5", ylim=range(lims))
box()
axis(2)
axis(1, at=1:6, labels=c("cage", "breastfeeding", "mbmi", "mage", "medu", "edupartner"))

#dev.off()

# Number of observations in the test data
#pdf("IndiaNTest.pdf")
boxplot(nTest)
#dev.off()

print(summary(nTest))

#-----------------------------------------------------------
# Boxplots for empirical risks without rqss
#-----------------------------------------------------------

inds <- paste("w", 1:50, sep="")
quantiles <- c(0.05, 0.1, 0.5)

pred1 <- pred2 <- pred3 <- matrix(0, ncol=4, nrow=length(inds))
colnames(pred1) <- colnames(pred2) <- colnames(pred3) <- c("add","vcm","stump","tree")

for(i in 1:length(inds)){
print(i)

indiaTrain <- india[india[,inds[i]]==1,]

indiaTest <- india[india[,inds[i]]==3,]

indiaTest <- cbind(indiaTest, ff = 0)
names(indiaTest)[ncol(indiaTest)] <- "FALSE"

#---------------

#---------------
# Prediction VCM
pred1[i,2] <- loss(indiaTest\$stunting, predVCM1[,i], tau=0.05)
pred2[i,2] <- loss(indiaTest\$stunting, predVCM2[,i], tau=0.1)
pred3[i,2] <- loss(indiaTest\$stunting, predVCM3[,i], tau=0.5)

#---------------
# Prediction Stumps
pred1[i,3] <- loss(indiaTest\$stunting, pred, tau=0.05)

pred2[i,3] <- loss(indiaTest\$stunting, pred, tau=0.1)

pred3[i,3] <- loss(indiaTest\$stunting, pred, tau=0.5)

pred1[i,4] <- loss(indiaTest\$stunting, pred, tau=0.05)

pred2[i,4] <- loss(indiaTest\$stunting, pred, tau=0.1)

pred3[i,4] <- loss(indiaTest\$stunting, pred, tau=0.5)

}

save(pred1, pred2, pred3, file="CVRisksTotal.Rdata")

pred1 <- pred1[,c(1,4,3,2)]
pred2 <- pred2[,c(1,4,3,2)]
pred3 <- pred3[,c(1,4,3,2)]

#pdf("IndiaRisksWithoutRqss.pdf", width=8, height=6.5)

cv <- data.frame(error = c(as.vector(pred1),
as.vector(pred2),
as.vector(pred3)),
algo = rep(factor(colnames(pred1), levels = colnames(pred1),
labels = c("additive", "trees", "stumps", "VCM")),
rep(nrow(pred1), ncol(pred1))),
quant = rep(factor(c("5% Quantile", "10% Quantile", "Median"),
levels = c("5% Quantile", "10% Quantile", "Median"),
labels = c("5% Quantile", "10% Quantile", "Median")),
rep(length(pred1), 3)))

mypanel <- function(x, y, ...) {
panel.bwplot(x = x, y = y, ...)
z <- as.data.frame(matrix(y, ncol = 4))
apply(z, 1, function(x)
llines(x = 1:4, y = x, col = rgb(0.1, 0.1, 0.1, 0.1)))
}
print(bwplot(error ~ algo | quant, data = cv, ylab = "CV risk",
scales = list(y = list(relation = "sliced"), x = list(rot = 25)),
layout = c(3, 1), panel = mypanel, fill="grey", pch = "|",
par.settings = list(plot.symbol = list(col=1,pch=20, cex=0.7),
box.rectangle = list(col=1),
box.umbrella = list(lty=1, col=1)),
strip= strip.custom(bg="white")))
#dev.off()
```
hofnerb/mboost documentation built on Nov. 13, 2019, 6:05 a.m.