Nothing
## ---- include = FALSE---------------------------------------------------------
is_check <- ("CheckExEnv" %in% search()) || any(c("_R_CHECK_TIMINGS_",
"_R_CHECK_LICENSE_") %in% names(Sys.getenv()))
knitr::opts_chunk$set(
collapse = TRUE,
comment = "",
warning = FALSE,
message = FALSE,
eval = !is_check
)
## ----table, echo = FALSE------------------------------------------------------
library(dplyr)
functions <- data.frame("Purpose" = c(rep("Model", 2),
rep("Summarize", 5),
rep("Tune", 2),
rep("Graph", 3),
rep("Calculate efficiency scores", 3),
rep("Graph efficiency scores", 2),
rep("Predict", 1),
rep("Rank", 2),
rep("Simulation", 2)),
"Function name" = c("EAT", "RFEAT",
"print", "summary", "EAT_size", "EAT_frontier_levels", "EAT_leaf_stats",
"bestEAT", "bestRFEAT",
"frontier", "plotEAT", "plotRFEAT",
"efficiencyEAT", "efficiencyCEAT", "efficiencyRFEAT",
"efficiencyDensity", "efficiencyJitter",
"predict",
"rankingEAT", "rankingRFEAT",
"Y1.sim", "X2Y2.sim"),
"Usage" = c("It generates a pruned Efficiency Analysis Trees model and returns an `EAT` object.",
"It generates a Random Forest for Efficiency Analysis Trees model and returns a `RFEAT` object",
"Print method for an `EAT` or a `RFEAT` object.",
"Summary method for an `EAT` object.",
"For an `EAT` object. It returns the number of leaf nodes.",
"For an `EAT` object. It returns the frontier output levels at the leaf nodes.",
"For an `EAT` object. It returns a descriptive summary statistics table for each output variable calculated from the leaf nodes observations.",
"For an EAT model. Hyperparameter tuning.",
"For an RFEAT model Hyperparameter tuning.",
"For an `EAT` object. It plots the estimated frontier in a two-dimensional scenario (1 input and 1 output).",
"For an `EAT` object. It plots the tree structure.",
"For an `RFEAT` object. It plots a line plot graph with the Out-of-Bag (OOB) error for a forest consisting of k trees.",
"It calculates the efficiency scores through an EAT (and FDH) model.",
"It calculates the efficiency scores through a convexified EAT (and DEA) model.",
"It calculates the efficiency scores through a RFEAT (and FDH) model.",
"Density plot for a `data.frame` of efficiency scores (EAT, FDH, CEAT, DEA and RFEAT are available).",
"For an `EAT` object. Jitter plot for a vector of efficiency scores calculated through an EAT /CEAT model. ",
"Predict method for an `EAT` or a `RFEAT` object.",
"For an `EAT` object. It calculates variable importance scores.",
"For an `RFEAT` object. It calculates variable importance scores.",
"It simulates a data set in 1 output scenario. 1, 3, 6, 9, 12 and 15 inputs can be generated.",
"It simulates a data set in 2 outputs and 2 inputs scenario.")
)
kableExtra::kable(functions) %>%
kableExtra::kable_styling("striped", full_width = F) %>%
kableExtra::collapse_rows(columns = 1, valign = "middle")
## ----seed---------------------------------------------------------------------
# We save the seed for reproducibility of the results
set.seed(120)
## ----library------------------------------------------------------------------
library(eat)
data("PISAindex")
## ----EAT, eval = FALSE--------------------------------------------------------
# EAT(data, x, y,
# fold = 5,
# numStop = 5,
# max.depth = NULL,
# max.leaves = NULL,
# na.rm = TRUE)
## ----single.output, collapse = FALSE------------------------------------------
single_model <- EAT(data = PISAindex,
x = 15, # input
y = 3) # output
## ----print.single.output, collapse = FALSE------------------------------------
print(single_model)
## ----summary.single.output, collapse = FALSE----------------------------------
summary(single_model)
## ----size.single.output, collapse = FALSE-------------------------------------
EAT_size(single_model)
## ----frt.single.output, collapse = FALSE--------------------------------------
EAT_frontier_levels(single_model)
## ----perf.single.output, collapse = FALSE-------------------------------------
EAT_leaf_stats(single_model)
## ----node.charac, collapse = FALSE--------------------------------------------
single_model[["tree"]][[5]]
## ----table2, echo = FALSE-----------------------------------------------------
types <- data.frame("Variable" = c("Independent variables (inputs)", "Dependent variables (outputs)"),
"Integer" = c("x", "x"),
"Numeric" = c("x", "x"),
"Factor" = c("", ""),
"Ordered factor" = c("x", ""))
kableExtra::kable(types, align = rep("c", 5)) %>%
kableExtra::kable_styling("striped", full_width = F)
## ----continent----------------------------------------------------------------
# Transform Continent to Factor
PISAindex_factor_Continent <- PISAindex
PISAindex_factor_Continent$Continent <- as.factor(PISAindex_factor_Continent$Continent)
## ----GDP_PPP_category, collapse = FALSE---------------------------------------
# Cateogirze GDP_PPP into 4 groups: Low, Medium, High, Very High.
PISAindex_GDP_PPP_cat <- PISAindex
PISAindex_GDP_PPP_cat$GDP_PPP_cat <- cut(PISAindex_GDP_PPP_cat$GDP_PPP,
breaks = c(0, 16.686, 31.419, 47.745, Inf),
include.lowest = T,
labels = c("Low", "Medium", "High", "Very high"))
class(PISAindex_GDP_PPP_cat$GDP_PPP_cat) # "factor" --> error
# It is necessary to indicate order = TRUE, before applying the EAT function
PISAindex_GDP_PPP_cat$GDP_PPP_cat <- factor(PISAindex_GDP_PPP_cat$GDP_PPP_cat,
order = TRUE)
class(PISAindex_GDP_PPP_cat$GDP_PPP_cat) # "ordered" "factor" --> correct
## ----categorized_model--------------------------------------------------------
categorized_model <- EAT(data = PISAindex_GDP_PPP_cat,
x = c(15, 19),
y = 3)
## ----frontier, eval = FALSE---------------------------------------------------
# frontier(object,
# FDH = FALSE,
# observed.data = FALSE,
# observed.color = "black",
# pch = 19,
# size = 1,
# rwn = FALSE,
# max.overlaps = 10)
## ----single.output.frontier, fig.width = 7.2, fig.height = 6------------------
frontier <- frontier(object = single_model,
FDH = TRUE,
observed.data = TRUE,
rwn = TRUE)
plot(frontier)
## ----single.output.max.depth, collapse = FALSE--------------------------------
single_model_md <- EAT(data = PISAindex,
x = 15,
y = 3,
max.leaves = 5)
## ----size.single.output_md, collapse = FALSE----------------------------------
EAT_size(single_model_md)
## ----pred.single.output_md, collapse = FALSE----------------------------------
single_model_md[["model"]][["y"]]
## ----single.output.frontier_md, fig.width = 7.2, fig.height = 6---------------
frontier_md <- frontier(object = single_model_md,
observed.data = TRUE)
plot(frontier_md)
## ----multioutput.scenario, collapse = FALSE-----------------------------------
multioutput_model <- EAT(data = PISAindex,
x = 6:18,
y = 3:5
)
## ----ranking, eval = FALSE----------------------------------------------------
# rankingEAT(object,
# barplot = TRUE,
# threshold = 70,
# digits = 2)
## ----multioutput.importance, fig.width = 7.2, fig.height = 6------------------
rankingEAT(object = multioutput_model,
barplot = TRUE,
threshold = 70,
digits = 2)
## ----plotEAT, eval = FALSE----------------------------------------------------
# plotEAT(object)
## ----model.graph1, collapse = FALSE-------------------------------------------
reduced_model1 <- EAT(data = PISAindex,
x = c(6, 7, 8, 12, 17),
y = 3:5,
numStop = 9)
## ----graph1, fig.dim = c(8.4, 7.5)--------------------------------------------
plotEAT(object = reduced_model1)
# Leaf nodes: 8
# Depth: 6
## ----model.graph2, collapse = FALSE-------------------------------------------
reduced_model2 <- EAT(data = PISAindex,
x = c(6, 7, 8, 12, 17),
y = 3:5,
numStop = 9,
max.depth = 5)
## ----graph2, fig.dim = c(8.4, 7.5)--------------------------------------------
plotEAT(object = reduced_model2)
# Leaf nodes: 6
# Depth: 5
## ----model.graph3, collapse = FALSE-------------------------------------------
reduced_model3 <- EAT(data = PISAindex,
x = c(6, 7, 8, 12, 17),
y = 3:5,
numStop = 9,
max.leaves = 4)
## ----graph3, fig.dim = c(8.4, 7.5)--------------------------------------------
plotEAT(object = reduced_model3)
# Leaf nodes: 4
# Depth: 3
## ----training_test------------------------------------------------------------
n <- nrow(PISAindex) # Observations in the dataset
selected <- sample(1:n, n * 0.7) # Training indexes
training <- PISAindex[selected, ] # Training set
test <- PISAindex[- selected, ] # Test set
## ----bestEAT, eval = FALSE----------------------------------------------------
# bestEAT(training, test,
# x, y,
# numStop = 5,
# fold = 5,
# max.depth = NULL,
# max.leaves = NULL,
# na.rm = TRUE)
## ----eat.tuning, collapse = FALSE---------------------------------------------
bestEAT(training = training,
test = test,
x = c(6, 7, 8, 12, 17),
y = 3:5,
numStop = c(3, 5, 7),
fold = c(5, 7))
## ----bestEAT_model, collapse = FALSE------------------------------------------
bestEAT_model <- EAT(data = PISAindex,
x = c(6, 7, 8, 12, 17),
y = 3:5,
numStop = 7,
fold = 5)
## ----summary.bestEAT_model, collapse = FALSE----------------------------------
summary(bestEAT_model)
## ----efficiencyEAT, eval = FALSE----------------------------------------------
# efficiencyEAT(data, x, y,
# object,
# score_model,
# digits = 3,
# FDH = TRUE,
# print.table = FALSE,
# na.rm = TRUE)
## ----scoresEAT, collapse = FALSE----------------------------------------------
# single_model <- EAT(data = PISAindex, x = 15, y = 3)
scores_EAT <- efficiencyEAT(data = PISAindex,
x = 15,
y = 3,
object = single_model,
scores_model = "BCC.OUT",
digits = 3,
FDH = TRUE,
print.table = TRUE,
na.rm = TRUE)
scores_EAT
## ----scoresEAT2, collapse = FALSE---------------------------------------------
scores_EAT2 <- efficiencyEAT(data = PISAindex,
x = 15,
y = 3,
object = single_model,
scores_model = "BCC.INP",
digits = 3,
FDH = TRUE,
print.table = FALSE,
na.rm = TRUE)
scores_EAT2
## ----efficiencyCEAT, eval = FALSE---------------------------------------------
# efficiencyCEAT(data, x, y,
# object,
# score_model,
# digits = 3,
# DEA = TRUE,
# print.table = FALSE,
# na.rm = TRUE)
## ----scoresCEAT, collapse = FALSE---------------------------------------------
scores_CEAT <- efficiencyCEAT(data = PISAindex,
x = 15,
y = 3,
object = single_model,
scores_model = "BCC.INP",
digits = 3,
DEA = TRUE,
print.table = TRUE,
na.rm = TRUE)
scores_CEAT
## ----efficiency_jitter, eval = FALSE------------------------------------------
# efficiencyJitter(object, df_scores,
# scores_model,
# lwb = NULL, upb = NULL)
## ----jitter_single, collapse = FALSE, fig.width = 7.2, fig.height = 5---------
efficiencyJitter(object = single_model,
df_scores = scores_EAT$EAT_BCC_OUT,
scores_model = "BCC.OUT",
lwb = 1.2)
## ----jitter_single2, collapse = FALSE, fig.width = 7.2, fig.height = 5--------
efficiencyJitter(object = single_model,
df_scores = scores_EAT2$EAT_BCC_INP,
scores_model = "BCC.INP",
upb = 0.65)
## ----frontier_comparar, fig.width = 7.2, fig.height = 6, fig.align = 'center'----
# frontier <- frontier(object = single_model, FDH = TRUE,
# observed.data = TRUE, rwn = TRUE)
plot(frontier)
## ----efficiency_density, eval = FALSE-----------------------------------------
# efficiencyDensity(df_scores,
# model = c("EAT", "FDH"))
#
## ----density_single, collapse = FALSE, fig.width = 7.2, fig.height = 6, fig.align = 'center'----
efficiencyDensity(df_scores = scores_EAT,
model = c("EAT", "FDH"))
efficiencyDensity(df_scores = scores_CEAT,
model = c("CEAT", "DEA"))
## ----cursed.scores, collapse = FALSE------------------------------------------
# multioutput_model <- EAT(data = PISAindex, x = 6:18, y = 3:5)
cursed_scores <- efficiencyEAT(data = PISAindex,
x = 6:18,
y = 3:5,
object = multioutput_model,
scores_model = "BCC.OUT",
digits = 3,
print.table = TRUE,
FDH = TRUE)
## ----cursed.density, collapse = FALSE, fig.width = 7.2, fig.height = 6, fig.align = 'center'----
efficiencyDensity(df_scores = cursed_scores, model = c("EAT", "FDH"))
## ----RF, eval = FALSE---------------------------------------------------------
# RFEAT(data, x, y,
# numStop = 5, m = 50,
# s_mtry = "BRM",
# na.rm = TRUE)
## ----RFmodel------------------------------------------------------------------
forest <- RFEAT(data = PISAindex,
x = 6:18,
y = 3:5,
numStop = 5,
m = 30,
s_mtry = "BRM",
na.rm = TRUE)
## ----print.RFEAT, collapse = FALSE--------------------------------------------
print(forest)
## ----plot.RFEAT, collapse = FALSE, fig.width = 7.2, fig.height = 6------------
plotRFEAT(forest)
## ----rankingRFEAT, eval = FALSE-----------------------------------------------
# rankingRFEAT(object,
# barplot = TRUE,
# digits = 2)
## ----RFmodel2-----------------------------------------------------------------
forestReduced <- RFEAT(data = PISAindex,
x = c(6, 7, 8, 12, 17),
y = 3:5,
numStop = 5,
m = 30,
s_mtry = "BRM",
na.rm = TRUE)
## ----rankingRFEAT_forestReduced, fig.width = 7.2, fig.height = 6--------------
rankingRFEAT(object = forestReduced,
barplot = TRUE,
digits = 2)
## ----bestRFEAT, eval = FALSE--------------------------------------------------
# bestRFEAT(training, test,
# x, y,
# numStop = 5,
# m = 50,
# s_mtry = c("5", "BRM"),
# na.rm = TRUE)
## ----tuning.bestRFEAT, collapse = FALSE---------------------------------------
# n <- nrow(PISAindex)
# selected <- sample(1:n, n * 0.7)
# training <- PISAindex[selected, ]
# test <- PISAindex[- selected, ]
bestRFEAT(training = training,
test = test,
x = c(6, 7, 8, 12, 17),
y = 3:5,
numStop = c(5, 10), # set of possible numStop
m = c(20, 30), # set of possible m
s_mtry = c("1", "BRM")) # set of possible s_mtry
## ----bestModelRFEAT, collapse = FALSE-----------------------------------------
bestRFEAT_model <- RFEAT(data = PISAindex,
x = c(6, 7, 8, 12, 17),
y = 3:5,
numStop = 5,
m = 20,
s_mtry = "BRM")
## ----eff_scores, eval = FALSE-------------------------------------------------
# efficiencyRFEAT(data, x, y,
# object,
# digits = 2,
# FDH = TRUE,
# print.table = FALSE,
# na.rm = TRUE)
## ----scores_RF----------------------------------------------------------------
scoresRF <- efficiencyRFEAT(data = PISAindex,
x = c(6, 7, 8, 12, 17),
y = 3:5,
object = bestRFEAT_model,
FDH = TRUE,
print.table = TRUE)
## ----predict, eval = FALSE----------------------------------------------------
# predict(object, newdata, x, ...)
## ----predictions, collapse = FALSE--------------------------------------------
# bestEAT_model <- EAT(data = PISAindex, x = c(6, 7, 8, 12, 17), y = 3:5,
# numStop = 5, fold = 5)
# bestRFEAT_model <- RFEAT(data = PISAindex, x = c(6, 7, 8, 12, 17), y = 3:5,
# numStop = 3, m = 30, s_mtry = 'BRM')
predictions_EAT <- predict(object = bestEAT_model,
newdata = PISAindex,
x = c(6, 7, 8, 12, 17))
predictions_RFEAT <- predict(object = bestRFEAT_model,
newdata = PISAindex,
x = c(6, 7, 8, 12, 17))
## ----EAT_vs_RFEAT, collapse = FALSE, echo = FALSE-----------------------------
predictions <- data.frame(
"S_PISA" = PISAindex[, 3],
"R_PISA" = PISAindex[, 4],
"M_PISA" = PISAindex[, 5],
"S_EAT" = predictions_EAT[, 1],
"R_EAT" = predictions_EAT[, 2],
"M_EAT" = predictions_EAT[, 3],
"S_RFEAT" = predictions_RFEAT[, 1],
"R_RFEAT" = predictions_RFEAT[, 2],
"M_RFEAT" = predictions_RFEAT[, 3]
)
kableExtra::kable(predictions) %>%
kableExtra::kable_styling("striped", full_width = F)
## ----newDF, collapse = FALSE--------------------------------------------------
new <- data.frame(WS = c(87, 92, 99), S = c(93, 90, 90), NBMC = c(90, 95, 93),
HW = c(90, 91, 92), AAE = c(88, 91, 89))
predictions_EAT <- predict(object = bestEAT_model,
newdata = new,
x = 1:5)
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.