Nothing
## ----setup, include=FALSE-----------------------------------------------------
knitr::opts_chunk$set(
collapse = TRUE,
comment = "#>"
)
## ----read the dataset---------------------------------------------------------
data(Boston, package='MASS')
dd <- Boston[, c(1, 3, 5:8, 10:14)]
dd[, names(dd) != 'medv'] <- log( dd[, names(dd) != 'medv'] )
## ----loadpckg-----------------------------------------------------------------
library(RBF)
## ----bandwidths---------------------------------------------------------------
bandw <- apply(dd[, names(dd) != 'medv'], 2, sd) / 2
## ----robustfit----------------------------------------------------------------
robust.fit <- backf.rob(medv ~ ., data = dd, degree = 0, type = 'Huber',
windows = bandw)
## ----summary------------------------------------------------------------------
summary(robust.fit)
## ----plot, out.width = "45%"-------------------------------------------------
plot(robust.fit)
## ----preds--------------------------------------------------------------------
po <- colMeans(dd[, names(dd) != 'medv'])
robust.fit1 <- backf.rob(medv ~ ., data = dd, degree = 0, type = 'Huber',
windows = bandw, point = po)
## ----showpred-----------------------------------------------------------------
robust.fit1$prediction
## ----outliers-----------------------------------------------------------------
dd2 <- dd
dd2$medv[1:5]<- rep(400, 5)
## ----robustplotswithoutliers--------------------------------------------------
robust.fit.new <- backf.rob(medv ~ ., data = dd2, degree = 0, type = 'Huber',
windows = bandw, point = po)
summary(robust.fit.new)
robust.fit.new$prediction
## ----robustplots2, warning=FALSE, out.width = "45%"--------------------------
for(j in 1:10) {
name.x <- names(dd)[j]
name.y <- bquote(paste(hat('g')[.(j)]))
oo <- order(dd2[,j])
plot(dd2[oo,j], robust.fit.new$g.matrix[oo,j], type="l", lwd=2, col='blue', lty=1,
xlab=name.x, ylab=name.y)
lines(dd2[oo,j], robust.fit$g.matrix[oo,j], lwd=2, col='green', lty=2)
}
## ----gam, warning=FALSE-------------------------------------------------------
library(gam)
fit.gam <- gam(medv ~ lo(crim, span=1.62) + lo(indus, span=0.58) +
lo(nox, span=0.15) + lo(rm, span=0.08) +
lo(age, span=0.46) + lo(dis, span=0.40) +
lo(tax, span=0.30) + lo(ptratio, span=0.09) +
lo(black, span=0.58) + lo(lstat, span=0.45), data=dd)
fits <- predict(fit.gam, type='terms')
fit.gam.new <- gam(medv ~ lo(crim, span=1.62) + lo(indus, span=0.58) +
lo(nox, span=0.15) + lo(rm, span=0.08) +
lo(age, span=0.46) + lo(dis, span=0.40) +
lo(tax, span=0.30) + lo(ptratio, span=0.09) +
lo(black, span=0.58) + lo(lstat, span=0.45), data=dd2)
fits.new <- predict(fit.gam.new, type='terms')
## ----gamplots, out.width = "45%"---------------------------------------------
for(j in 1:10) {
oo <- order(dd2[,j])
name.x <- names(dd)[j]
name.y <- bquote(paste(hat('g')[.(j)]))
plot(dd2[oo,j], fits.new[oo,j], type="l", lwd=2, col='purple', lty=1,
xlab=name.x, ylab=name.y)
lines(dd2[oo,j], fits[oo,j], lwd=2, col='darkorange2', lty=2)
}
## ----scatterplot, out.width = "75%"------------------------------------------
data(airquality)
ccs <- complete.cases(airquality)
aircomplete <- airquality[ccs, c('Ozone', 'Solar.R', 'Wind', 'Temp')]
pairs(aircomplete[, c('Ozone', 'Solar.R', 'Wind', 'Temp')], pch=19, col='gray30')
## ----robustcv, warning=FALSE, eval=FALSE--------------------------------------
# library(RBF)
#
# # Bandwidth selection with leave-one-out cross-validation
# ## Without outliers
# # This takes a long time to compute (approx 380 minutes running
# # R 3.6.1 on an Intel(R) Core(TM) i7-4790 CPU @ 3.60GHz)
# a <- c(1/2, 1, 1.5, 2, 2.5, 3)
# h1 <- a * sd(aircomplete[,2])
# h2 <- a * sd(aircomplete[,3])
# h3 <- a * sd(aircomplete[,4])
# hh <- expand.grid(h1, h2, h3)
# nh <- nrow(hh)
# rmspe <- rep(NA, nh)
# jbest <- 0
# cvbest <- +Inf
# n <- nrow(aircomplete)
# for(i in 1:nh) {
# # leave-one-out CV loop
# preds <- rep(NA, n)
# for(j in 1:n) {
# tmp <- try( backf.rob(Ozone ~ Solar.R + Wind + Temp, point = aircomplete[j, -1],
# windows = hh[i, ], epsilon = 1e-6, data = aircomplete,
# degree = 1, type = 'Tukey', subset = c(-j) ))
# if (class(tmp)[1] != "try-error") {
# preds[j] <- rowSums(tmp$prediction) + tmp$alpha
# }
# }
# tmp.re <- RobStatTM::locScaleM(preds - aircomplete$Ozone, na.rm=TRUE)
# rmspe[i] <- tmp.re$mu^2 + tmp.re$disper^2
# if( rmspe[i] < cvbest ) {
# jbest <- i
# cvbest <- rmspe[i]
# }
# }
# (bandw <- hh[jbest,])
## ----bandw--------------------------------------------------------------------
bandw <- c(136.7285, 10.67314, 4.764985)
## ----fitfull------------------------------------------------------------------
fit.full <- backf.rob(Ozone ~ Solar.R + Wind + Temp, windows = bandw,
epsilon = 1e-6, degree = 1, type = 'Tukey',
subset = ccs, data = airquality)
## ----plotfitfull, out.width = "45%"------------------------------------------
plot(fit.full)
## ----gamcv, warning=FALSE, eval=FALSE-----------------------------------------
# library(gam)
# a <- c(.3, .4, .5, .6, .7, .8, .9)
# hh <- expand.grid(a, a, a)
# nh <- nrow(hh)
# jbest <- 0
# cvbest <- +Inf
# n <- nrow(aircomplete)
# for(i in 1:nh) {
# fi <- rep(0, n)
# for(j in 1:n) {
# tmp <- gam(Ozone ~ lo(Solar.R, span=hh[i,1]) + lo(Wind, span=hh[i,2])
# + lo(Temp, span=hh[i,3]), data = aircomplete, subset=c(-j))
# fi[j] <- as.numeric(predict(tmp, newdata=aircomplete[j, -1], type='response'))
# }
# ss <- mean((aircomplete$Ozone - fi)^2)
# if(ss < cvbest) {
# jbest <- i
# cvbest <- ss
# }
# }
# (hh[jbest,])
# # Var1 Var2 Var3
# # 0.7 0.7 0.5
## ----fitgam-------------------------------------------------------------------
fit.gam <- gam(Ozone ~ lo(Solar.R, span=.7) + lo(Wind, span=.7)+
lo(Temp, span=.5), data = aircomplete)
## ----plotrobgam, out.width = "45%"-------------------------------------------
x <- as.matrix( aircomplete[ , c('Solar.R', 'Wind', 'Temp')] )
y <- as.vector( aircomplete[ , 'Ozone'] )
fits <- predict(fit.gam, type='terms')
for(j in 1:3) {
re <- fit.full$yp - fit.full$alpha - rowSums(fit.full$g.matrix[,-j])
plot(re ~ x[,j], type='p', pch=20, col='gray45', xlab=colnames(x)[j], ylab='')
oo <- order(x[,j])
lines(x[oo,j], fit.full$g.matrix[oo,j], lwd=2, col='blue', lty=1)
lines(x[oo,j], fits[oo,j], lwd=2, col='magenta', lty=2)
}
## ----boxplot, out.width = "50%"----------------------------------------------
re.ro <- residuals(fit.full)
ou.ro <- boxplot(re.ro, col='gray80')$out
in.ro <- (1:length(re.ro))[ re.ro %in% ou.ro ]
points(rep(1, length(in.ro)), re.ro[in.ro], pch=20, col='red')
(in.ro)
## ----scatterplotpoints, out.width = "75%"------------------------------------
cs <- rep('gray30', nrow(aircomplete))
cs[in.ro] <- 'red'
os <- 1:nrow(aircomplete)
os2 <- c(os[-in.ro], os[in.ro])
pairs(aircomplete[os2, c('Ozone', 'Solar.R', 'Wind', 'Temp')],
pch=19, col=cs[os2])
## ----plotoutred, out.width = "45%"-------------------------------------------
# Plot both fits (robust and classical)
x <- as.matrix( aircomplete[ , c('Solar.R', 'Wind', 'Temp')] )
y <- as.vector( aircomplete[ , 'Ozone'] )
fits <- predict(fit.gam, type='terms')
for(j in 1:3) {
re <- fit.full$yp - fit.full$alpha - rowSums(fit.full$g.matrix[,-j])
plot(re ~ x[,j], type='p', pch=20, col='gray45', xlab=colnames(x)[j], ylab='')
points(re[in.ro] ~ x[in.ro,j], pch=20, col='red')
oo <- order(x[,j])
lines(x[oo,j], fit.full$g.matrix[oo,j], lwd=2, col='blue', lty=1)
lines(x[oo,j], fits[oo,j], lwd=2, col='magenta', lty=2)
}
## ----cvgamclean, eval=FALSE---------------------------------------------------
# airclean <- aircomplete[-in.ro, c('Ozone', 'Solar.R', 'Wind', 'Temp')]
# a <- c(.3, .4, .5, .6, .7, .8, .9)
# hh <- expand.grid(a, a, a)
# nh <- nrow(hh)
# jbest <- 0
# cvbest <- +Inf
# n <- nrow(airclean)
# for(i in 1:nh) {
# fi <- rep(0, n)
# for(j in 1:n) {
# tmp <- gam(Ozone ~ lo(Solar.R, span=hh[i,1]) + lo(Wind, span=hh[i,2])
# + lo(Temp, span=hh[i,3]), data=airclean, subset=c(-j))
# fi[j] <- as.numeric(predict(tmp, newdata=airclean[j,], type='response'))
# }
# ss <- mean((airclean$Ozone - fi)^2)
# if(ss < cvbest) {
# jbest <- i
# cvbest <- ss
# }
# }
# (hh[jbest,])
# # Var1 Var2 Var3
# # 0.7 0.8 0.3
## ----fitgam2------------------------------------------------------------------
airclean <- aircomplete[-in.ro, c('Ozone', 'Solar.R', 'Wind', 'Temp')]
fit.gam2 <- gam(Ozone ~ lo(Solar.R, span=.7) + lo(Wind, span=.8)+
lo(Temp, span=.3), data=airclean)
## ----finalplot, out.width = "45%"--------------------------------------------
fits2 <- predict(fit.gam2, type='terms')
dd2 <- aircomplete[-in.ro, c('Solar.R', 'Wind', 'Temp')]
for(j in 1:3) {
re <- fit.full$yp - fit.full$alpha - rowSums(fit.full$g.matrix[,-j])
plot(re ~ x[,j], type='p', pch=20, col='gray45', xlab=colnames(x)[j], ylab='')
points(re[in.ro] ~ x[in.ro,j], pch=20, col='red')
oo <- order(dd2[,j])
lines(dd2[oo,j], fits2[oo,j], lwd=2, col='magenta', lty=2)
oo <- order(x[,j])
lines(x[oo,j], fit.full$g.matrix[oo,j], lwd=2, col='blue', lty=1)
}
## ----pred1--------------------------------------------------------------------
tms <- function(a, alpha=.1) {
# alpha is the proportion to trim
a2 <- sort(a^2, na.last=NA)
n0 <- floor( length(a) * (1 - alpha) )
return( mean(a2[1:n0], na.rm=TRUE) )
}
## ----pred2, warning=FALSE, eval=FALSE-----------------------------------------
# dd <- airquality
# dd <- dd[complete.cases(dd), c('Ozone', 'Solar.R', 'Wind', 'Temp')]
# # 100 runs of K-fold CV
# M <- 100
# # 5-fold
# K <- 5
# n <- nrow(dd)
# # store (trimmed) TMSPE for robust and gam, and also
# tmspe.ro <- tmspe.gam <- vector('numeric', M)
# set.seed(123)
# ii <- (1:n)%%K + 1
# for(runs in 1:M) {
# tmpro <- tmpgam <- vector('numeric', n)
# ii <- sample(ii)
# for(j in 1:K) {
# fit.full <- backf.rob(Ozone ~ Solar.R + Wind + Temp,
# point=dd[ii==j, -1], windows = bandw,
# epsilon = 1e-6, degree = 1, type = 'Tukey',
# subset = (ii!=j), data = dd)
# tmpro[ ii == j ] <- rowSums(fit.full$prediction) + fit.full$alpha
# fit.gam <- gam(Ozone ~ lo(Solar.R, span=.7) + lo(Wind, span=.7)+
# lo(Temp, span=.5), data = dd[ii!=j, ])
# tmpgam[ ii == j ] <- predict(fit.gam, newdata=dd[ii==j, ], type='response')
# }
# tmspe.ro[runs] <- tms( dd$Ozone - tmpro, alpha=0.05)
# tmspe.gam[runs] <- tms( dd$Ozone - tmpgam, alpha=0.05)
# }
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.