Nothing
## ---- include = FALSE---------------------------------------------------------
knitr::opts_chunk$set(
collapse = TRUE,
comment = "#>",
fig.path = "vig/"
)
options(rmarkdown.html_vignette.check_title = FALSE)
## ----setup, echo = FALSE, warning=FALSE, message=FALSE------------------------
library(vivid)
## ---- warning=FALSE, message=FALSE--------------------------------------------
library(vivid)
## ---- messages = FALSE--------------------------------------------------------
set.seed(101)
genFriedman <- function(noFeatures = 10, noSamples = 100, sigma = 1) {
# Create dataframe with named columns
df <- setNames(as.data.frame(matrix(runif(noSamples * noFeatures, 0, 1), nrow = noSamples),
stringsAsFactors = FALSE),
paste0("x", 1:noFeatures))
# Equation: y = 10sin(πx1x2) + 20(x3−0.5)^2 + 10x4 + 5x5 + ε
df$y <- 10 * sin(pi * df$x1 * df$x2) +
20 * (df$x3 - 0.5)^2 +
10 * df$x4 +
5 * df$x5 +
rnorm(noSamples, sd = sigma) # error
return(df)
}
myData <- genFriedman(noFeatures = 9, noSamples = 350, sigma = 1)
## ---- eval = FALSE------------------------------------------------------------
# library(randomForest) # for model fit
# set.seed(1701)
# rf <- randomForest(y ~ ., data = myData)
## ---- eval = FALSE------------------------------------------------------------
# set.seed(1701)
# viviRf <- vivi(fit = rf,
# data = myData,
# response = "y",
# gridSize = 50,
# importanceType = "agnostic",
# nmax = 500,
# reorder = TRUE,
# predictFun = NULL,
# numPerm = 4,
# showVimpError = FALSE)
## ---- eval = FALSE------------------------------------------------------------
# viviHeatmap(mat = viviRf)
## ---- echo = F, out.width = '100%'-------------------------------------------
knitr::include_graphics("https://raw.githubusercontent.com/AlanInglis/vivid/master/vividVigplots/rfheatmap.png")
## ---- eval = FALSE------------------------------------------------------------
# viviNetwork(mat = viviRf)
## ---- echo = F, out.width = '100%'-------------------------------------------
knitr::include_graphics("https://raw.githubusercontent.com/AlanInglis/vivid/master/vividVigplots/rfnetwork.png")
## ---- eval = FALSE------------------------------------------------------------
# viviNetwork(mat = viviRf, intThreshold = 0.12, removeNode = FALSE)
# viviNetwork(mat = viviRf, intThreshold = 0.12, removeNode = TRUE)
## ---- echo = F, out.width = '100%'-------------------------------------------
knitr::include_graphics("https://raw.githubusercontent.com/AlanInglis/vivid/master/vividVigplots/rfnet_filter_comb1.png")
## ---- eval = FALSE------------------------------------------------------------
# viviNetwork(mat = viviRf,
# layout = cbind(c(1,1,1,1,2,2,2,2,2), c(1,2,4,5,1,2,3,4,5)))
## ---- echo = F, out.width = '70%', fig.align='center'------------------------
knitr::include_graphics("https://raw.githubusercontent.com/AlanInglis/vivid/master/vividVigplots/rfnet_custom_layout.png")
## ---- eval = FALSE------------------------------------------------------------
# set.seed(1701)
# # clustered and filtered network for rf
# intVals <- viviRf
# diag(intVals) <- NA
#
#
# # select VIVI values in top 20%
# impTresh <- quantile(diag(viviRf),.8)
# intThresh <- quantile(intVals,.8,na.rm=TRUE)
# sv <- which(diag(viviRf) > impTresh |
# apply(intVals, 1, max, na.rm=TRUE) > intThresh)
#
# h <- hclust(-as.dist(viviRf[sv,sv]), method="single")
#
# viviNetwork(viviRf[sv,sv],
# cluster = cutree(h, k = 3), # specify number of groups
# layout = igraph::layout_as_star)
## ---- echo = F, out.width = '80%', fig.align='center'------------------------
knitr::include_graphics("https://raw.githubusercontent.com/AlanInglis/vivid/master/vividVigplots/rfnet_cluster.png")
## ---- eval = FALSE------------------------------------------------------------
# top5 <- colnames(viviRf)[1:5]
# pdpVars(data = myData,
# fit = rf,
# response = 'y',
# vars = top5,
# nIce = 100)
## ---- echo = F, out.width = '100%', fig.align='center'-----------------------
knitr::include_graphics("https://raw.githubusercontent.com/AlanInglis/vivid/master/vividVigplots/rfpdp.png")
## ---- eval = FALSE------------------------------------------------------------
# set.seed(1701)
# pdpPairs(data = myData,
# fit = rf,
# response = "y",
# nmax = 500,
# gridSize = 10,
# vars = c("x1", "x2", "x3", "x4", "x5"),
# nIce = 100)
## ---- echo = F, out.width = '80%', fig.align='center'------------------------
knitr::include_graphics("https://raw.githubusercontent.com/AlanInglis/vivid/master/vividVigplots/rfGPGP_subset.png")
## ---- eval = FALSE------------------------------------------------------------
# set.seed(1701)
# pdpZen(data = myData, fit = rf, response = "y", nmax = 500, gridSize = 10)
## ---- echo = F, out.width = '60%', fig.align='center'------------------------
knitr::include_graphics("https://raw.githubusercontent.com/AlanInglis/vivid/master/vividVigplots/rfzen_all.png")
## ---- eval = FALSE------------------------------------------------------------
# set.seed(1701)
# pdpZen(data = myData,
# fit = rf,
# response = "y",
# nmax = 500,
# gridSize = 10,
# zpath = c("x1", "x2", "x3", "x4", "x5"))
## ---- echo = F, out.width = '70%', fig.align='center'------------------------
knitr::include_graphics("https://raw.githubusercontent.com/AlanInglis/vivid/master/vividVigplots/rfzen_subset.png")
## ---- eval = FALSE------------------------------------------------------------
# # set zpaths with different parameters
# intVals <- viviRf
# diag(intVals) <- NA
# intThresh <- quantile(intVals, .90, na.rm=TRUE)
# zpSw <- zPath(viv = viviRf, cutoff = intThresh, connect = FALSE, method = 'strictly.weighted')
#
#
#
# set.seed(1701)
# pdpZen(data = myData,
# fit = rf,
# response = "y",
# nmax = 500,
# gridSize = 10,
# zpath = zpSw)
## ---- echo = F, out.width = '70%', fig.align='center'------------------------
knitr::include_graphics("https://raw.githubusercontent.com/AlanInglis/vivid/master/vividVigplots/rfzen_SW.png")
## ---- eval = FALSE------------------------------------------------------------
# library("xgboost")
# gbst <- xgboost(data = as.matrix(myData[,1:9]),
# label = as.matrix(myData[,10]),
# nrounds = 100,
# verbose = 0)
## ---- eval = FALSE------------------------------------------------------------
# # predict function for GBM
# pFun <- function(fit, data, ...) predict(fit, as.matrix(data[,1:9]))
#
# set.seed(1701)
# viviGBst <- vivi(fit = gbst,
# data = myData,
# response = "y",
# reorder = FALSE,
# normalized = FALSE,
# predictFun = pFun,
# gridSize = 50,
# nmax = 500)
## ---- eval = FALSE------------------------------------------------------------
# viviHeatmap(mat = viviGBst)
## ---- echo = F, out.width = '100%', fig.align='center'-----------------------
knitr::include_graphics("https://raw.githubusercontent.com/AlanInglis/vivid/master/vividVigplots/gbmheat.png")
## ---- eval = FALSE------------------------------------------------------------
# set.seed(1701)
# rfEmbedded <- randomForest(y ~ ., data = myData, importance = TRUE)
## ---- eval = FALSE------------------------------------------------------------
# viviRfEmbedded <- vivi(fit = rfEmbedded,
# data = myData,
# response = "y",
# importanceType = "%IncMSE")
## ---- eval = FALSE------------------------------------------------------------
# library("ranger")
# rang <- ranger(y~., data = myData, importance = 'impurity')
## ---- eval = FALSE------------------------------------------------------------
# viviRangEmbedded <- vivi(fit = rang,
# data = myData,
# response = "y",
# importanceType = "impurity")
## ---- eval = FALSE------------------------------------------------------------
# set.seed(1701)
# rfClassif <- ranger(Species~ ., data = iris, probability = T,
# importance = "impurity")
#
# set.seed(101)
# viviClassif <- vivi(fit = rfClassif,
# data = iris,
# response = "Species",
# gridSize = 10,
# nmax = 50,
# reorder = TRUE,
# class = "setosa")
## ---- eval = FALSE------------------------------------------------------------
# viviHeatmap(mat = viviClassif)
# viviNetwork(mat = viviClassif)
## ---- echo = F, out.width = '100%', fig.align='center'-----------------------
knitr::include_graphics("https://raw.githubusercontent.com/AlanInglis/vivid/master/vividVigplots/classifVIVI.png")
## ---- eval = FALSE------------------------------------------------------------
# set.seed(1701)
# pdpPairs(data = iris,
# fit = rfClassif,
# response = "Species",
# class = "setosa",
# convexHull = T,
# gridSize = 10,
# nmax = 50)
#
## ---- echo = F, out.width = '80%', fig.align='center'------------------------
knitr::include_graphics("https://raw.githubusercontent.com/AlanInglis/vivid/master/vividVigplots/classifGPDP.png")
## ---- eval = FALSE------------------------------------------------------------
#
# # Load libraries
# library("vivid")
# library("randomForest")
#
# # Create data based on the Friedman equation
# set.seed(101)
# genFriedman <- function(noFeatures = 10, noSamples = 100, sigma = 1) {
#
# # Create dataframe with named columns
# df <- setNames(as.data.frame(matrix(runif(noSamples * noFeatures, 0, 1), nrow = noSamples),
# stringsAsFactors = FALSE),
# paste0("x", 1:noFeatures))
#
# # Equation: y = 10sin(πx1x2) + 20(x3−0.5)^2 + 10x4 + 5x5 + ε
#
# df$y <- 10 * sin(pi * df$x1 * df$x2) +
# 20 * (df$x3 - 0.5)^2 +
# 10 * df$x4 +
# 5 * df$x5 +
# rnorm(noSamples, sd = sigma) # error
#
# return(df)
# }
#
# myData <- genFriedman(noFeatures = 9, noSamples = 350, sigma = 1)
#
# -------------------------------------------------------------------
#
# # Fit random forest using randomForest package
# set.seed(1701)
# rf <- randomForest(y ~ ., data = myData)
#
# -------------------------------------------------------------------
#
# # Run vivid
# set.seed(1701)
# viviRf <- vivi(fit = rf,
# data = myData,
# response = "y",
# gridSize = 50,
# importanceType = "agnostic",
# nmax = 500,
# reorder = TRUE,
# predictFun = NULL,
# numPerm = 4,
# showVimpError = FALSE)
#
# -------------------------------------------------------------------
#
# # Visualisations:
#
# # 1.0: Heatmap
# viviHeatmap(mat = viviRf)
#
# # 1.1: Heatmap with custom colour palettes and a small border around the diagonal importance values
# viviHeatmap(mat = viviRf,
# intPal = colorspace::sequential_hcl(palette = "Oslo", n = 100),
# impPal = rev(colorspace::sequential_hcl(palette = "Reds 3", n = 100)),
# border = T)
#
# # 2.0: Network
# viviNetwork(mat = viviRf)
#
# # 2.1: Network with interactions below 0.12 are filtered.
# # By default, unconnected nodes are displayed, however, they can be removed by setting `removeNode = T`
# viviNetwork(mat = viviRf, intThreshold = 0.12, removeNode = FALSE)
# viviNetwork(mat = viviRf, intThreshold = 0.12, removeNode = TRUE)
#
# # 2.3: Network clustered and with interactions thresholded
# set.seed(1701)
# # clustered and filtered network for rf
# intVals <- viviRf
# diag(intVals) <- NA
#
#
# # select VIVI values in top 20%
# impTresh <- quantile(diag(viviRf),.8)
# intThresh <- quantile(intVals,.8,na.rm=TRUE)
# sv <- which(diag(viviRf) > impTresh |
# apply(intVals, 1, max, na.rm=TRUE) > intThresh)
#
# h <- hclust(-as.dist(viviRf[sv,sv]), method="single")
#
# # plot
# viviNetwork(viviRf[sv,sv],
# cluster = cutree(h, k = 3), # specify number of groups
# layout = igraph::layout_as_star)
#
# # 3.0: PDP of the top 5 variables extracted from the vivi matrix and number of ICe curves set to 100
# top5 <- colnames(viviRf)[1:5]
# pdpVars(data = myData,
# fit = rf,
# response = 'y',
# vars = top5,
# nIce = 100)
#
# # 4.0: GPDP of the variables x1 to x5, with 100 ICE curves shown.
# set.seed(1701)
# pdpPairs(data = myData,
# fit = rf,
# response = "y",
# nmax = 500,
# gridSize = 10,
# vars = c("x1", "x2", "x3", "x4", "x5"),
# nIce = 100)
#
# # 5.0: ZPDP of all variables
# set.seed(1701)
# pdpZen(data = myData, fit = rf, response = "y", nmax = 500, gridSize = 10)
#
# # 5.1: ZPDP where the `zpath` argument specifies the variables to be plotted. In this case, x1 to x5.
# set.seed(1701)
# pdpZen(data = myData,
# fit = rf,
# response = "y",
# nmax = 500,
# gridSize = 10,
# zpath = c("x1", "x2", "x3", "x4", "x5"))
#
# # 5.2: ZPDP with the zpaths set with different parameters using the `zPath` function.
# intVals <- viviRf
# diag(intVals) <- NA
# intThresh <- quantile(intVals, .90, na.rm=TRUE)
# zpSw <- zPath(viv = viviRf, cutoff = intThresh, connect = FALSE, method = 'strictly.weighted')
#
# set.seed(1701)
# pdpZen(data = myData,
# fit = rf,
# response = "y",
# nmax = 500,
# gridSize = 10,
# zpath = zpSw)
## ---- eval = FALSE------------------------------------------------------------
# library("vivid")
# library("xgboost")
# gbst <- xgboost(data = as.matrix(myData[,1:9]),
# label = as.matrix(myData[,10]),
# nrounds = 100,
# verbose = 0)
#
# # predict function for GBM
# pFun <- function(fit, data, ...) predict(fit, as.matrix(data[,1:9]))
#
# # run vivid
# set.seed(1701)
# viviGBst <- vivi(fit = gbst,
# data = myData,
# response = "y",
# reorder = FALSE,
# normalized = FALSE,
# predictFun = pFun,
# gridSize = 50,
# nmax = 500)
#
# # plot heatmap
# viviHeatmap(mat = viviGBst)
#
## ---- eval = FALSE------------------------------------------------------------
# library("vivid")
# library("randomForest")
# library("ranger")
#
# # randomForest
# # Note importance must be set to TRUE to use embedded importance scores.
# set.seed(1701)
# rfEmbedded <- randomForest(y ~ ., data = myData, importance = TRUE)
#
# # Using % increase in MSE as the importance metric in vivid
# viviRfEmbedded <- vivi(fit = rfEmbedded,
# data = myData,
# response = "y",
# importanceType = "%IncMSE")
#
# # Plot Heatmap
# viviHeatmap(mat = viviRfEmbedded)
#
# # ranger
# # Note the importance metric is selected directly in ranger
# rang <- ranger(y~., data = myData, importance = 'impurity')
#
# # Run vivid
# viviRangEmbedded <- vivi(fit = rang,
# data = myData,
# response = "y",
# importanceType = "impurity")
#
# # Plot Heatmap
# viviHeatmap(mat = viviRangEmbedded)
## ---- eval = FALSE------------------------------------------------------------
# library("vivid")
# library("randomForest")
#
# set.seed(1701)
# rfClassif <- ranger(Species~ ., data = iris, probability = T,
# importance = "impurity")
#
# set.seed(101)
# viviClassif <- vivi(fit = rfClassif,
# data = iris,
# response = "Species",
# gridSize = 10,
# nmax = 50,
# reorder = TRUE,
# class = "setosa")
#
# viviHeatmap(mat = viviClassif)
#
# set.seed(1701)
# pdpPairs(data = iris,
# fit = rfClassif,
# response = "Species",
# class = "setosa",
# convexHull = T,
# gridSize = 10,
# nmax = 50)
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.