Nothing
## ---- echo = TRUE-------------------------------------------------------------
library(mlbench)
data(PimaIndiansDiabetes)
str(PimaIndiansDiabetes)
## ----echo = FALSE, fig.width=7,fig.height=6, fig.cap="Figure 1: Normal QQ plots of four Pima Indians diabetes variables.", warning = FALSE, message = FALSE----
par(mfrow = c(2, 2))
qqnorm(PimaIndiansDiabetes$glucose, ylab = "glucose")
title("Plasma glucose concentration")
qqnorm(PimaIndiansDiabetes$pressure, ylab = "pressure")
title("Diastolic blood pressure")
qqnorm(PimaIndiansDiabetes$triceps, ylab = "triceps")
title("Triceps skinfold thickness")
qqnorm(PimaIndiansDiabetes$insulin, ylab = "insulin")
title("Serum insulin")
## ---- echo = FALSE------------------------------------------------------------
MissPctInsulin <- round(100 * length(which(PimaIndiansDiabetes$insulin == 0)) /
nrow(PimaIndiansDiabetes), digits = 1)
## ---- echo = TRUE-------------------------------------------------------------
insulinMissing <- as.numeric(PimaIndiansDiabetes$insulin == 0)
modifiedPima <- PimaIndiansDiabetes
modifiedPima$insulin <- NULL
modifiedPima$insulinMissing <- insulinMissing
## ---- echo = TRUE, eval = FALSE-----------------------------------------------
# insulinProject <- StartProject(dataSource = modifiedPima,
# projectName = "InsulinProject",
# target = "insulinMissing",
# wait = TRUE)
## ---- echo = TRUE, eval = FALSE-----------------------------------------------
# insulinModelList <- ListModels(insulinProject)
## ----echo = FALSE, fig.width = 7, fig.height = 6, fig.cap = "Figure 2: Barplot of LogLoss values for the models predicting missingInsulin.", warning = FALSE, message = FALSE----
library(datarobot)
insulinModelList <- readRDS("insulinModelList.rds")
insulinModelFrame <- as.data.frame(insulinModelList, simple = FALSE)
par(mfrow = c(1, 1))
plot(insulinModelList, orderDecreasing = TRUE, xpos = 0.25, textSize = 0.6)
## ---- echo = FALSE------------------------------------------------------------
bestIndex <- which.min(insulinModelFrame$LogLoss.validation)
worstIndex <- which.max(insulinModelFrame$LogLoss.validation)
insulinModelFrame$expandedModel[bestIndex]
## ----echo = FALSE, fig.width = 7, fig.height = 6, fig.cap = "Figure 3: Plot of AUC values for the models predicting missingInsulin.", warning = FALSE, message = FALSE----
par(mfrow = c(1, 1))
plot(insulinModelFrame$AUC.validation, xlab = "Model number", ylab = "Area under the ROC curve")
points(bestIndex, insulinModelFrame$AUC.validation[bestIndex], pch = 16, col = "red")
## ---- echo = TRUE, eval = FALSE-----------------------------------------------
# modelList <- list(n = 9)
# modelList[[1]] <- insulinModelList
# allVars <- colnames(modifiedPima)[1:8]
# permFile <- tempfile(fileext = "permFile.csv")
# for (i in 1:8) {
# varName <- allVars[i]
# PermuteColumn("modifiedPima.csv", varName, permFile)
# projName <- paste("PermProject",varName,sep="")
# permProject <- StartProject(permFile, projectName = projName, target = "insulinMissing", wait = TRUE)
# modelList[[i+1]] <- ListModels(permProject)
# }
## ----echo = FALSE, fig.width = 7, fig.height = 6, fig.cap="Figure 4: Beanplot summary of LogLoss shifts versus random permutation.", warning = FALSE, message = FALSE----
par(mfrow = c(1, 1))
library(beanplot)
logLossDeltas <- readRDS("insulinDeltaFrame.rds")
beanplot(logLossDeltas[, 1:8], las = 2, xlab = "", ylab = "LogLoss Shift",
col = c("transparent", "red", "red", "blue"),
what = c(0, 1, 1, 1))
bestRow <- which.min(logLossDeltas$originalLogLoss)
points(seq(1, 8, 1), logLossDeltas[bestRow, 1:8], pch = 16, col = "limegreen", cex = 1.5)
legend("topright", col = c("limegreen", "blue"), pch = c(16, 15), cex = 1.2,
legend = c("Best", "Average"))
abline(h = 0, lty = 2)
## ----echo = FALSE, fig.width = 7, fig.height = 6, fig.cap = "Figure 5: Plot of AUC values for the original models (open circles) and those for the models with the random permutation applied to triceps (solid red triangles)", warning = FALSE, message = FALSE----
par(mfrow = c(1, 1))
AUCshiftFrame <- readRDS("AUCshiftFrame.rds")
sortIndex <- order(logLossDeltas$originalLogLoss)
plot(AUCshiftFrame$originalAUC[sortIndex], xlab = "Model number", ylab = "Area under ROC curve")
points(AUCshiftFrame$triceps[sortIndex], pch = 17, col = "red")
## ---- echo = FALSE------------------------------------------------------------
missingInsulin <- as.numeric(PimaIndiansDiabetes$insulin == 0)
missingTriceps <- as.numeric(PimaIndiansDiabetes$triceps == 0)
table(missingInsulin, missingTriceps)
## ---- echo = TRUE-------------------------------------------------------------
library(insuranceData)
data(dataCar)
## ---- echo = TRUE-------------------------------------------------------------
lossIndex <- which(dataCar$claimcst0 > 0)
keepVars <- c("veh_value", "exposure", "claimcst0", "veh_body", "veh_age",
"gender", "area", "agecat")
lossFrame <- subset(dataCar, claimcst0 > 0, select = keepVars)
## ---- echo = FALSE------------------------------------------------------------
lossPct <- round(100 * length(lossIndex) / nrow(dataCar), digits = 1)
anomIndex <- which(lossFrame$claimcst0 == 200)
anomPct <- round(100 * length(anomIndex) / length(lossIndex), digits = 1)
## ---- echo = TRUE, eval = FALSE-----------------------------------------------
# anomaly <- as.numeric(lossFrame$claimcst0 == 200)
# anomFrame <- lossFrame
# anomFrame$claimcst0 <- NULL
# anomFrame$anomaly <- anomaly
# anomProject <- StartProject(anomFrame, projectName = "AnomalyProject", target = anomaly, wait = TRUE)
# anomalyModelList <- ListModels(anomProject)
## ----echo = FALSE, fig.width=7,fig.height=6, fig.cap="Figure 7: Horizontal barplot LogLoss summary of the 64% models for the small loss data.", warning = FALSE, message = FALSE----
par(mfrow = c(1, 1))
anomalyLeaderboard <- readRDS("anomalyModelList.rds")
anomalyLeaderFrame <- as.data.frame(anomalyLeaderboard, simple = FALSE)
plotPct <- max(anomalyLeaderFrame$samplePct)
plot(anomalyLeaderboard, pct = plotPct, orderDecreasing = TRUE, xlim = c(0, 0.45))
abline(v = min(anomalyLeaderFrame$LogLoss.validation), lty = 2, lwd = 2, col = "magenta")
## ----echo = FALSE, fig.width=7,fig.height=6, fig.cap="Figure 8: Scatterplot summary of the AUC values for all models from AnomLossProject.", warning = FALSE, message = FALSE----
par(mfrow = c(1, 1))
AAUC <- anomalyLeaderFrame$AUC.validation
samplePct <- anomalyLeaderFrame$samplePct
sizes <- sort(unique(samplePct))
plot(AAUC, xlab = "Model number", ylab = "Area under ROC curve")
Index64 <- which(samplePct == sizes[3])
points(Index64, AAUC[Index64], pch = 16, col = "red")
Index32 <- which(samplePct == sizes[2])
points(Index32, AAUC[Index32], pch = 16, col = "limegreen")
Index16 <- which(samplePct == sizes[1])
points(Index16, AAUC[Index16], pch = 16, col = "blue")
legend("bottomleft", col = c("blue", "limegreen", "red"), pch = 16,
legend = c("16% data sample", "32% data sample", "64% data sample"))
## ----echo = FALSE, fig.width=7,fig.height=6, fig.cap="Figure 9: Beanplot summary of AUC shifts versus random permutation.", warning = FALSE, message = FALSE----
anomAUCDeltaFrame <- readRDS("anomAUCDeltaFrame.rds")
bestIndex <- which.min(anomalyLeaderFrame$LogLoss.validation)
bestExpModel <- as.character(anomalyLeaderFrame$expandedModel)[bestIndex]
bestRow <- which(anomAUCDeltaFrame$expandedModel == bestExpModel)
par(mfrow = c(1, 1))
beanplot(anomAUCDeltaFrame[, 1:7], las = 2, xlab = "", ylab = "AUC Shift",
col = c("transparent", "red", "red", "blue"),
what=c(0, 1, 1, 1), ylim=c(-0.1, 0.1))
points(seq(1, 7, 1), anomAUCDeltaFrame[bestRow, 1:7], pch = 16, col = "limegreen", cex = 1.5)
legend("topright", col = c("limegreen", "blue"), pch = c(16, 15), cex = 1.2,
legend = c("Best", "Average"))
abline(h = 0, lty = 2)
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.