inst/doc/cellMCD_examples.R

## ----echo = FALSE-------------------------------------------------------------
knitr::opts_chunk$set(
 fig.width = 8 ,
 fig.height = 12,
 fig.align ='center'
)

## -----------------------------------------------------------------------------
library("cellWise")

## -----------------------------------------------------------------------------
library(robustHD)
data(TopGear)
dim(TopGear)
rownames(TopGear) = paste(TopGear[,1],TopGear[,2])
colnames(TopGear)
TopGear = TopGear[c(5,7,9:17)] # objective numerical variables
colnames(TopGear)
colnames(TopGear) = c("price", "displacement", "horsepower",
                      "torque", "acceleration", "top speed",
                      "miles/gallon", "weight", "length",
                      "width", "height")

## -----------------------------------------------------------------------------
out = checkDataSet(TopGear, silent = TRUE)
Xorig = out$remX
dim(Xorig) 
Xorig[,1] = Xorig[,1]/1000
head(Xorig)

## -----------------------------------------------------------------------------
X = Xorig
X[,c(1:4,6)] = log(Xorig[,c(1:4,6)])

## -----------------------------------------------------------------------------
X = as.matrix(X)
colSums(is.na(X))

## -----------------------------------------------------------------------------
out = estLocScale(X)
Z = scale(X,center=out$loc, scale=out$scale)
cutf = sqrt(qchisq(p=0.99,df=1))
boxplot(Z)

## -----------------------------------------------------------------------------
out <- robustbase::covMcd(X) 
plot(sqrt(out$mah))
abline(h = sqrt(qchisq(0.99, df = 11)))

## -----------------------------------------------------------------------------
cellout <- cellMCD(X)

## -----------------------------------------------------------------------------
Zres = cellout$Zres
boxplot(Zres) 
qqnorm(as.vector(Zres)); abline(0,1) 

## -----------------------------------------------------------------------------
W = cellout$W
sum(as.vector(1-W)) 
rowS = rowSums(1-W)
sum(rowS == 0) # 151 cars do not have a single flagged cell.

## -----------------------------------------------------------------------------
j = 1 # price
# Index plot of Zres:
# (ids = plot.cellMCD(cellout, j, type="index", identify=T)$ids)
ids = c(6,54,50,195,212,222,221)
Zres[ids,j,drop=F]
plot_cellMCD(cellout, j, type="index", ids=ids)

## -----------------------------------------------------------------------------
# Plot Zres versus X:
plot_cellMCD(cellout, j, type="Zres/X", ids=ids)
# Plot Zres versus predicted cell:
plot_cellMCD(cellout, j, type="Zres/pred", ids=ids)
# Plot observed cell versus predicted cell:
# (ids = plot.cellMCD(cellout, j, type="X/pred", identify=T)$ids)
ids = c(179,3,212,218,54,6,222,221,50,195)
Xorig[ids,j,drop=F]
plot_cellMCD(cellout, j, type="X/pred", vband=T, hband=T, ids=ids)

## ----fig.height=16,fig.width=8------------------------------------------------
###########
## Figure 2
###########

# pdf(file="TopGear_fig2_test.pdf",width=5.2,height=10.4)
par(mfrow=c(2,1)) # (nrows,ncols)
rn = rownames(X) # for object labels
#
######### 1. HP
par(mar = c(3.6, 3.5, 2.8, 1.0))
j = 3
ids = c(52,59,70,218)
labs = rn[ids]; labs[1] = "Caterham"
xvals = c(52, 59, 70, 218) 
yvals = c(-6.20, -8.35, -4.55, -4.8)
plot_cellMCD(cellout, j, type="index",
             ylab="standardized residual of log(horsepower)",
             main="standardized residual of log(horsepower)")
text(x = xvals, y = yvals, labels = labs, cex = 0.9)
#
######### 2. Length
par(mar = c(3.6, 3.5, 2.8, 1.0))
#
j = 9
ids = c(3,50,51,119,144,195,218,232,249)
labs = rn[ids]
labs[1] = "Aston Cygnet"; labs[3] = "Caterham"
xvals = c(2560, 5050, 3100, 5252, 4245, 4605, 2360, 2700, 3330) 
yvals = c(-4.46, -4.21, -3.91, 4.20, -4.67, -5.43,
          -5.46, -6.45, -5.26)
plot_cellMCD(cellout, j, type="Zres/X",
             main="standardized residual versus X for length",
             xlab="length (mm)", xlim=c(1970,6000), 
             ylim=c(-6.51,4.88)) #,ids=ids)
text(x = xvals, y = yvals, labels = labs, cex = 0.9)
# dev.off()

## ----fig.height=16,fig.width=8------------------------------------------------

# pdf(file="TopGear_fig3_test.pdf",width=5.2,height=10.4)
par(mfrow=c(2,1))
rn = rownames(X) # for object labels
#
######### 1. weight
par(mar = c(3.6, 3.5, 2.8, 1.0))
j = 8
ids = c(29,51,144,163,183,197,195)
pred = cellout$preds
labs = rn[ids]
labs[2] = "Caterham"
labs[4] = "Mercedes-Benz G"
xvals = c(1490, 1000, 1890, 1575, 1757,  810, 2250) 
yvals = c(5.63,-5.07,-5.10,4.52,-6.19,-6.60,-8.20)
plot_cellMCD(cellout, j, type="Zres/pred", # vband=F,
             xlab = "predicted weight (kg)") # , ids=ids)
text(x = xvals, y = yvals, labels = labs, cex = 0.9)
#
######### 2. top speed
par(mar = c(3.6, 3.5, 2.8, 1.0))
j = 6
ids = c(50,42,52,79,195,218,219,232,258)
labs = rn[ids]
labs[3] = "Caterham"
xvals = c(5.145, 4.937, 5.083, 5.065, 5.076, 4.90, 4.75, 4.63, 5.038)
yvals = c(5.53, 4.53, 4.72, 5.345, 5.44, 3.97, 4.43, 4.30, 4.61)
plot_cellMCD(cellout, j, type="X/pred", vband=F, vlines=F,
             xlab="predicted log(top speed)",
             ylab="observed log(top speed)",
             main="log(top speed) versus its prediction") #, ids=ids)
text(x = xvals, y = yvals, labels = labs, cex = 0.9)
# dev.off()

## ----fig.height=16,fig.width=8------------------------------------------------

############
### Figure 4
############

# pdf(file="TopGear_fig4_test.pdf",width=5.2,height=10.4)
par(mfrow=c(2,1)) # (nrows,ncols)
rn = rownames(X) # for object labels
#
######### 1. MPG versus torque
par(mar = c(3.6, 3.5, 2.8, 1.0))
ids=c(42,258,50) 
labs = rn[ids]
xvals = c(5.56, 6.28, 7.27) 
yvals = c(470, 235, -16)
plot_cellMCD(cellout, type = "bivariate", horizvar = 4, 
             vertivar = 7, ids = ids, 
             xlim=c(3.5,7.7), ylim = c(-27,480), 
             main = "miles/gallon versus log(torque)",
             xlab = "log(torque)",
             ylab = "miles/gallon",
             opacity=0.5, labelpoints=F)
text(x = xvals, y = yvals, labels = labs, cex = 0.9)
#
######### 2. Width versus acceleration
par(mar = c(3.6, 3.5, 2.8, 1.0))
ids = c(218,144,233,51,136,179) 
labs = rn[ids]
labs[3] = "Ssangyong" # name was too long for plot 
labs[4] = "Caterham" 
labs[5] = "Land Rover"
xvals = c(0.0,  -3.0, -3.23,  0.5, 14.2, 15.9) 
yvals = c(1200, 1850, 1915, 1688, 2039, 1445)
plot_cellMCD(cellout,  type = "bivariate", horizvar=5, 
             vertivar=10, ids=ids,
             xlab = "acceleration (seconds to 62 mph)",
             ylab = "width (mm)", xlim = c(-6.1,20),
             ylim = c(1200, 2150),
             opacity=0.5, labelpoints=F)
text(x = xvals, y = yvals, labels = labs, cex = 0.9)
# dev.off()

par(mfrow=c(1,1))

## -----------------------------------------------------------------------------
library(robustbase)

## -----------------------------------------------------------------------------
data(aircraft)
X      <- as.matrix(aircraft)[, 1:4]
pairs(X) 
X      <- transfo(X)$Y
rowout <- robustbase::covMcd(X)
rd     <- sqrt(mahalanobis(X, center = rowout$center, cov = rowout$cov))
cutoff <- sqrt(qchisq(0.975,ncol(X)))
sum(rd > cutoff)
cellout <- cellMCD(X)

cd <- sqrt(mahalanobis(X, center = cellout$mu, cov = cellout$S))
plot(rd, cd)
round(cor(rd, cd),3)

## -----------------------------------------------------------------------------
data(aircraft)
X <- as.matrix(aircraft)
nrow(X) / ncol(X)

## ----error=TRUE---------------------------------------------------------------
cellout <- cellMCD(X)

## -----------------------------------------------------------------------------
data(alcohol)
X <- as.matrix(alcohol)
X <- transfo(X)$Y
rowout <- robustbase::covMcd(X)
eigen(rowout$cov)$values
rd <- sqrt(mahalanobis(X, center = rowout$center, cov = rowout$cov))
cutoff <- sqrt(qchisq(0.975, ncol(X)))
sum(rd > cutoff)
cellout <- cellMCD(X)

cd <- sqrt(mahalanobis(X, center = cellout$mu, cov = cellout$S))
round(cor(rd, cd),3)  

## -----------------------------------------------------------------------------
data(Animals2)
X <- as.matrix(log(Animals2))
X <- transfo(X)$Y
rowout <- robustbase::covMcd(X)
rd <- sqrt(mahalanobis(X, center = rowout$center, cov = rowout$cov))
cellout <- cellMCD(X)
cd <- sqrt(mahalanobis(X, center = cellout$mu, cov = cellout$S))
plot(rd, cd)
round(cor(rd, cd), 3) 

## -----------------------------------------------------------------------------
data(bushfire)
X <- as.matrix(bushfire)
X <- transfo(X)$Y
rowout <- robustbase::covMcd(X)
rd <- sqrt(mahalanobis(X,center=rowout$center,cov=rowout$cov))
cutoff <- sqrt(qchisq(0.975, ncol(X)))
sum(rd > cutoff)
cellout <- cellMCD(X) 
cd <- sqrt(mahalanobis(X, center = cellout$mu, cov = cellout$S))
round(cor(rd, cd), 3)

## -----------------------------------------------------------------------------
data(cloud)
X <- as.matrix(cloud)
X <- transfo(X)$Y
rowout <- robustbase::covMcd(X)
rd <- sqrt(mahalanobis(X, center = rowout$center, cov = rowout$cov))
cellout <- cellMCD(X) 

cd <- sqrt(mahalanobis(X,center=cellout$mu,cov=cellout$S))
round(cor(rd,cd),3) 

## -----------------------------------------------------------------------------
data(delivery)
X <- as.matrix(delivery)[, 1:2]
X <- transfo(X)$Y
rowout <- robustbase::covMcd(X)
rd <- sqrt(mahalanobis(X, center = rowout$center, cov = rowout$cov))
cutoff <- sqrt(qchisq(0.975, ncol(X)))
sum(rd > cutoff)
cellout <- cellMCD(X)
cd <- sqrt(mahalanobis(X, center = cellout$mu, cov = cellout$S))
round(cor(rd, cd), 3) 

## -----------------------------------------------------------------------------
data(delivery)
X <- as.matrix(delivery)
X <- transfo(X)$Y
rowout <- robustbase::covMcd(X)
rd <- sqrt(mahalanobis(X, center = rowout$center, cov = rowout$cov))
cutoff <- sqrt(qchisq(0.975, ncol(X)))
sum(rd > cutoff) 
cellout <- cellMCD(X) 
cd <- sqrt(mahalanobis(X, center = cellout$mu, cov = cellout$S))
round(cor(rd, cd), 3) 

## -----------------------------------------------------------------------------
data(exAM)
X <- as.matrix(exAM)
X <- transfo(X)$Y
rowout <- robustbase::covMcd(X)
rd <- sqrt(mahalanobis(X, center = rowout$center, cov = rowout$cov))
cutoff <- sqrt(qchisq(0.975, ncol(X)))
sum(rd > cutoff) 
cellout <- cellMCD(X)
cd <- sqrt(mahalanobis(X, center = cellout$mu, cov = cellout$S))
round(cor(rd, cd), 3)

## -----------------------------------------------------------------------------
data(hbk)
X <- as.matrix(hbk)[, 1:3]
X <- transfo(X)$Y
rowout <- robustbase::covMcd(X)
rd <- sqrt(mahalanobis(X, center = rowout$center, cov = rowout$cov))
cutoff <- sqrt(qchisq(0.975, ncol(X)))
sum(rd > cutoff) 
cellout <- cellMCD(X)
cd <- sqrt(mahalanobis(X, center = cellout$mu, cov = cellout$S))
round(cor(rd, cd), 3) 

## -----------------------------------------------------------------------------
data(hbk)
X <- as.matrix(hbk)
X <- transfo(X)$Y
rowout <- robustbase::covMcd(X)
rd <- sqrt(mahalanobis(X, center = rowout$center, cov = rowout$cov))
cutoff <- sqrt(qchisq(0.975, ncol(X)))
sum(rd > cutoff)
cellout <- cellMCD(X)
cd <- sqrt(mahalanobis(X, center = cellout$mu, cov = cellout$S))
round(cor(rd, cd), 3)

## -----------------------------------------------------------------------------
data(kootenay)
X <- as.matrix(kootenay)
X <- transfo(X)$Y 
rowout <- robustbase::covMcd(X)
rd <- sqrt(mahalanobis(X, center = rowout$center, cov = rowout$cov))
cutoff <- sqrt(qchisq(0.975, ncol(X)))
sum(rd > cutoff)
cellout <- cellMCD(X)
cd = sqrt(mahalanobis(X, center = cellout$mu, cov = cellout$S))
round(cor(rd, cd), 3) 

## ----error=TRUE---------------------------------------------------------------
data(lactic)
X <- as.matrix(lactic)
X <- transfo(X)$Y

## -----------------------------------------------------------------------------
data(milk)
X <- as.matrix(milk)
X <- transfo(X)$Y
rowout <- robustbase::covMcd(X)
rd <- sqrt(mahalanobis(X, center = rowout$center, cov = rowout$cov))
cellout <- cellMCD(X)
cd <- sqrt(mahalanobis(X, center = cellout$mu, cov = cellout$S))
round(cor(rd, cd), 3) 

## ----error=TRUE---------------------------------------------------------------
data(pension)
X <- as.matrix(pension)
X <- transfo(X)$Y
rowout <- robustbase::covMcd(X)
rd <- sqrt(mahalanobis(X,center=rowout$center,cov=rowout$cov))
cellout <- cellMCD(X)
cd <- sqrt(mahalanobis(X,center=cellout$mu,cov=cellout$S))
round(cor(rd,cd),3) 

## -----------------------------------------------------------------------------
data(phosphor)
X <- as.matrix(phosphor)
X <- transfo(X)$Y
rowout <- robustbase::covMcd(X)
rd <- sqrt(mahalanobis(X, center = rowout$center, cov = rowout$cov))
cellout <- cellMCD(X)
cd <- sqrt(mahalanobis(X, center = cellout$mu, cov = cellout$S))
round(cor(rd, cd), 3) 

## -----------------------------------------------------------------------------
data(pilot)
X <- as.matrix(pilot)
X <- transfo(X)$Y
rowout <- robustbase::covMcd(X)
rd <- sqrt(mahalanobis(X, center = rowout$center, cov = rowout$cov))
cutoff <- sqrt(qchisq(0.975, ncol(X)))
sum(rd > cutoff) 
cellout <- cellMCD(X)
cd <- sqrt(mahalanobis(X, center = cellout$mu, cov = cellout$S))
round(cor(rd, cd), 3) 

## -----------------------------------------------------------------------------
cellout$raw.mu - colMeans(X) 
n <- nrow(X)
cellout$raw.S - (n - 1) * cov(X) / n

## -----------------------------------------------------------------------------
data(radarImage)
X <- as.matrix(radarImage)
X <- transfo(X)$Y
pairs(X) 
rowout <- robustbase::covMcd(X)
rd <- sqrt(mahalanobis(X, center = rowout$center, cov = rowout$cov))
cutoff <- sqrt(qchisq(0.975,ncol(X)))
sum(rd > cutoff) 
cellout <- cellMCD(X) 
cd <- sqrt(mahalanobis(X, center = cellout$mu, cov = cellout$S))
round(cor(rd,cd),3)

## -----------------------------------------------------------------------------
data(salinity)
X <- as.matrix(salinity)[, 1:3]
X <- transfo(X)$Y
rowout <- robustbase::covMcd(X)
rd <- sqrt(mahalanobis(X, center = rowout$center, cov = rowout$cov))
cutoff <- sqrt(qchisq(0.975, ncol(X)))
sum(rd > cutoff) 
cellout <- cellMCD(X)
cd <- sqrt(mahalanobis(X, center = cellout$mu, cov = cellout$S))
round(cor(rd,cd),3) 

## -----------------------------------------------------------------------------
data(salinity)
X <- as.matrix(salinity)
X <- transfo(X)$Y
rowout <- robustbase::covMcd(X)
rd <- sqrt(mahalanobis(X, center = rowout$center, cov = rowout$cov))
cutoff <- sqrt(qchisq(0.975, ncol(X)))
sum(rd > cutoff) 
cellout <- cellMCD(X) 
cd <- sqrt(mahalanobis(X, center = cellout$mu, cov = cellout$S))
round(cor(rd, cd), 3) 

## -----------------------------------------------------------------------------
data(starsCYG)
X <- as.matrix(starsCYG)
X <- transfo(X, nbsteps = 1)$Y
rowout <- robustbase::covMcd(X)
rd <- sqrt(mahalanobis(X, center = rowout$center, cov = rowout$cov))
cutoff <- sqrt(qchisq(0.975, ncol(X)))
sum(rd > cutoff)
cellout <- cellMCD(X)
cd <- sqrt(mahalanobis(X, center = cellout$mu, cov = cellout$S))
round(cor(rd,cd),3) 

## ----error = TRUE-------------------------------------------------------------
data(toxicity)
X <- as.matrix(toxicity)
X <- transfo(X)$Y
rowout <- robustbase::covMcd(X)
rd     <- sqrt(mahalanobis(X,center=rowout$center,cov=rowout$cov))
cutoff <- sqrt(qchisq(0.975,ncol(X)))
sum(rd > cutoff) 
cellout <- cellMCD(X)

## -----------------------------------------------------------------------------
data(wood)
X <- as.matrix(wood)
X <- transfo(X, nbsteps = 1)$Y
cellout <- cellMCD(X)

Try the cellWise package in your browser

Any scripts or data that you put into this service are public.

cellWise documentation built on Oct. 25, 2023, 5:07 p.m.