Nothing
## ----setup, include=FALSE, warning=FALSE--------------------------------------
knitr::opts_chunk$set(echo = TRUE,
warning = FALSE,
message = FALSE,
fig.align = "center",
fig.width = 6,
fig.height = 5,
out.width = "60%",
collapse = TRUE,
comment = "#>",
tidy.opts = list(width.cutoff = 65),
tidy = FALSE)
library(knitr)
library(magrittr)
library(loon.tourr, quietly = TRUE)
library(tidyverse, quietly = TRUE)
library(class, quietly = TRUE)
set.seed(12314159)
imageDirectory <- "./images/classification"
dataDirectory <- "./data/classification"
path_concat <- function(path1, ..., sep="/") {
# The "/" is standard unix directory separator and so will
# work on Macs and Linux.
# In windows the separator might have to be sep = "\" or
# even sep = "\\" or possibly something else.
paste(path1, ..., sep = sep)
}
## ----table, echo = FALSE, warning=FALSE, message=FALSE, error=FALSE, fig.width=4, fig.height=3, fig.align="center"----
data.frame(
Region = c("North", "South", "Sardinia"),
Area = c("North-Apulia, South-Apulia, Calabria, Sicily",
"East-Liguria, West-Liguria, Umbria",
"Coastal-Sardinia, Inland-Sardinia")
) %>%
kable()
## ----split data, warning=FALSE, message=FALSE, error=FALSE, fig.width=4, fig.height=3, fig.align="center"----
set.seed(123)
N <- nrow(olive)
trainId <- sample(seq(N),
size = floor(0.8 * N))
testId <- setdiff(seq(N), trainId)
acids <- setdiff(colnames(olive), c("region", "area"))
trainX <- olive[trainId, acids]
testX <- olive[testId, acids]
trainY <- olive[trainId, "region"]
testY <- olive[testId, "region"]
## ----scaling, warning=FALSE, message=FALSE, error=FALSE, fig.width=4, fig.height=3, fig.align="center"----
row.names(trainX) <- NULL
kable(head(trainX))
scalingTrainX <- loon::l_getScaledData(trainX, scaling = "variable")
scalingTestX <- loon::l_getScaledData(testX, scaling = "variable")
kable(head(scalingTrainX), digits = 2)
## ----xgboost, warning=FALSE, message=FALSE, error=FALSE, fig.width=4, fig.height=3, fig.align="center"----
knn_pred <- function(trainX, trainY, testX, testY, k = c(5, 10, 20)) {
len_test <- length(testY)
vapply(k,
function(num) {
yhat <- class::knn(trainX, testX, trainY, k = num)
sum(yhat == testY)/len_test
}, numeric(1L))
}
low_dim_knn_pred <- function(dims = 2:5, fun,
trainX, trainY, testX, testY,
k = c(5, 10, 20), setNames = TRUE) {
tab <- lapply(dims,
function(d) {
knn_pred(
fun(trainX, d), trainY,
fun(testX, d), testY
)
}) %>%
as.data.frame() %>%
as_tibble()
if(setNames) {
tab <- tab %>%
setNames(nm = paste0("d = ", dims))
}
rownames(tab) <- paste0("k = ", k)
tab
}
## ----p choose k, warning=FALSE, message=FALSE, error=FALSE, fig.width=4, fig.height=3, fig.align="center"----
# the number of k
dims <- 2:5
var_names <- colnames(scalingTrainX)
low_dim_names <- c()
K <- ncol(trainX)
pChooseD <- lapply(dims,
function(d) {
com <- combn(K, d)
pred <- apply(com, 2,
function(pair) {
knn_pred(trainX[, pair], trainY,
testX[, pair], testY)
})
mean_pred <- apply(pred, 2, mean)
id <- which.max(mean_pred)
low_dim_names <<- c(low_dim_names,
paste(var_names[com[, id]],
collapse = ":"))
pred[, id]
}) %>%
as.data.frame() %>%
as_tibble() %>%
setNames(nm = paste0("d = ", dims))
rownames(pChooseD) <- paste0("k = ", c(5, 10, 20))
## ----pairs--------------------------------------------------------------------
names(low_dim_names) <- paste0("d = ", dims)
low_dim_names
## ----nav graph prediction table-----------------------------------------------
kable(pChooseD, row.names = TRUE,
digits = 3)
## ----PCA, warning=FALSE, message=FALSE, error=FALSE, fig.width=4, fig.height=3, fig.align="center"----
trainXPCA <- princomp(scalingTrainX)
testXPCA <- princomp(scalingTestX)
round(trainXPCA$sdev, 2)
## ----PCA knn, warning=FALSE, message=FALSE, error=FALSE, fig.width=4, fig.height=3, fig.align="center"----
PCA <- low_dim_knn_pred(2:5,
fun = function(princomp, d)
{princomp$scores[, seq(d)]},
trainXPCA,
trainY,
testXPCA,
testY)
kable(PCA, row.names = TRUE,
digits = 3)
## ----LLE, warning=FALSE, message=FALSE, error=FALSE, fig.width=4, fig.height=3, fig.align="center", eval = FALSE----
# library(RDRToolbox)
# lle <- low_dim_knn_pred(2:5,
# fun = function(data, d) {
# LLE(data, dim = d, k = 5)
# },
# scalingTrainX, trainY,
# scalingTestX, testY)
# kable(lle, row.names = TRUE,
# digits = 3)
## ----LLE readRDS, echo = FALSE------------------------------------------------
lle <- readRDS(path_concat(dataDirectory, "lle.RDS"))
kable(lle, row.names = TRUE,
digits = 3)
## ----tour 2D, eval = FALSE, warning=FALSE, message=FALSE, error=FALSE, fig.width=4, fig.height=3, fig.align="center"----
# p2 <- l_tour(scalingTrainX, color = trainY)
# l <- l_layer_hull(p2, group = trainY)
## ----2D projection, echo = FALSE, warning=FALSE, message=FALSE, error=FALSE, fig.width=3, fig.height=3, fig.align="center", out.width = "70%"----
include_graphics(path_concat(imageDirectory, "proj2D.PNG"))
proj2D <- readRDS(path_concat(dataDirectory, "proj2D.RDS")) %>%
as.matrix()
## ----2D show, eval = FALSE, warning=FALSE, message=FALSE, error=FALSE, fig.width=4, fig.height=3, fig.align="center", out.width = "70%"----
# proj2D <- p2["projection"]
## ----2D projection display, echo = FALSE, warning=FALSE, message=FALSE, error=FALSE, fig.width=3, fig.height=3, fig.align="center", out.width = "70%"----
kable(as.data.frame(proj2D, row.names = colnames(trainX)),
digits = 2)
## ----tour 3D, eval = FALSE, warning=FALSE, message=FALSE, error=FALSE, fig.width=4, fig.height=3, fig.align="center", out.width = "70%"----
# p3 <- l_tour(scalingTrainX,
# tour_path = tourr::grand_tour(3),
# color = trainY,
# axesLayout = "parallel")
# proj3D <- p3["projection"]
## ----tour 3D projection, echo = FALSE, warning=FALSE, message=FALSE, error=FALSE, fig.width=3, fig.height=3, fig.align="center", out.width = "70%"----
include_graphics(path_concat(imageDirectory, "proj3D.PNG"))
proj3D <- readRDS(path_concat(dataDirectory, "proj3D.RDS")) %>%
as.matrix()
## ----tour 4D, eval = FALSE, warning=FALSE, message=FALSE, error=FALSE, fig.width=4, fig.height=3, fig.align="center", out.width = "70%"----
# p4 <- l_tour(scalingTrainX,
# tour_path = tourr::grand_tour(4),
# color = trainY,
# axesLayout = "parallel")
# proj4D <- p4["projection"]
## ----tour 4D projection, echo = FALSE, warning=FALSE, message=FALSE, error=FALSE, fig.width=3, fig.height=3, fig.align="center", out.width = "70%"----
include_graphics(path_concat(imageDirectory, "proj4D.PNG"))
proj4D <- readRDS(path_concat(dataDirectory, "proj4D.RDS")) %>%
as.matrix()
## ----tour 5D, eval = FALSE, warning=FALSE, message=FALSE, error=FALSE, fig.width=4, fig.height=3, fig.align="center", out.width = "70%"----
# p5 <- l_tour(scalingTrainX,
# tour_path = tourr::grand_tour(5),
# color = trainY,
# axesLayout = "parallel")
# proj5D <- p5["projection"]
## ----tour 5D projection, echo = FALSE, warning=FALSE, message=FALSE, error=FALSE, fig.width=3, fig.height=3, fig.align="center", out.width = "70%"----
include_graphics(path_concat(imageDirectory, "proj5D.PNG"))
proj5D <- readRDS(path_concat(dataDirectory, "proj5D.RDS")) %>%
as.matrix()
## ----tour compute, warning=FALSE, message=FALSE, error=FALSE, fig.width=4, fig.height=3, fig.align="center", out.width = "70%"----
tour <- low_dim_knn_pred(list(proj2D, proj3D,
proj4D, proj5D),
fun = function(data, proj) {
data %*% as.matrix(proj)
},
scalingTrainX, trainY,
scalingTestX, testY,
setNames = FALSE)
colnames(tour) <- paste0("d = ", 2:5)
kable(tour, row.names = TRUE,
digits = 3)
## ----bind_states, warning=FALSE, message=FALSE, echo=FALSE, eval = FALSE------
# callback <- function(W) {
# # W is the widget path name
# pred <- knn_pred(scalingTrainX %*% p5["projection"],
# trainY,
# scalingTestX %*% p5["projection"],
# testY,
# k = c(5, 10, 20))
# cat(
# paste0(
# "k = 5: accuracy rate ", round(pred[1], 3), "\n",
# "k = 10: accuracy rate ", round(pred[2], 3), "\n",
# "k = 20: accuracy rate ", round(pred[3], 3), "\n"
# )
# )
# }
# l_bind_state(target = l_getPlots(p5),
# event = "all",
# callback = callback)
# # [1] "stateBinding0"
## ----graphical summary, warning=FALSE, message=FALSE, error=FALSE, fig.width=12, fig.height=6, fig.align="center", out.width = "70%"----
rbind(tour, lle, PCA, pChooseD) %>%
mutate(k = rep(c(5, 10, 20), 4),
method = rep(c("tour", "LLE", "PCA", "pChooseD"), each = 3)) %>%
pivot_longer(cols = -c(k, method),
names_to = "Dimensions",
values_to = "Accuracy") %>%
mutate(Dimensions = parse_number(Dimensions)) %>%
ggplot(mapping = aes(x = Dimensions,
y = Accuracy,
colour = method)) +
geom_path() +
facet_wrap(~k) +
ggtitle("Facet by the number of neibourhoods")
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.