Nothing
## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
echo = TRUE,
#eval = FALSE,
#collapse = TRUE,
comment = "#>"
)
## ----loading data-------------------------------------------------------------
library(lionfish)
library(data.table)
data(ChemicalManufacturingProcess)
data <- data.table(ChemicalManufacturingProcess)
col_names <- names(data)
mp_cols <- grep("^ManufacturingProcess", col_names, value = TRUE)
bm_cols <- grep("^BiologicalMaterial", col_names, value = TRUE)
new_mp_names <- sub("^ManufacturingProcess", "ManPr", mp_cols)
new_bm_names <- sub("^BiologicalMaterial", "BioMat", bm_cols)
setnames(data, old = mp_cols, new = new_mp_names)
setnames(data, old = bm_cols, new = new_bm_names)
## ----removing incomplete entries----------------------------------------------
data <- data[rowSums(is.na(data))<3]
data <- data[, lapply(.SD, function(x) if (sum(is.na(x)) <= 3) x else NULL)]
data <- data[-c(99,153,154,158,159,160)]
data <- data[, lapply(.SD, function(x) if (sum(is.na(x)) <= 0) x else NULL)]
## -----------------------------------------------------------------------------
min_yield <- min(data$Yield, na.rm = TRUE)
max_yield <- max(data$Yield, na.rm = TRUE)
intervals <- seq(min_yield, max_yield, length.out = 11)
yield_values <- findInterval(data$Yield, intervals, rightmost.closed = TRUE)
yield_labels <- paste0("Yield ", head(intervals, -1), "-", tail(intervals, -1))
## ----eval = FALSE-------------------------------------------------------------
# library(ggplot2)
# library(viridis)
#
# yield_values_factor <- factor(yield_values, labels = yield_labels)
#
# ggplot(data, aes(x = Yield, fill = yield_values_factor)) +
# geom_histogram(binwidth = (max_yield - min_yield) / 30, color = "black") +
# scale_fill_viridis(option = "plasma", discrete = TRUE) +
# theme_minimal() +
# labs(title = "Histogram of the process yield",
# x = "Yield",
# y = "Count",
# fill = "Yield Intervals")
## -----------------------------------------------------------------------------
data <- data[, lapply(.SD, function(x) (x - mean(x, na.rm = TRUE)) / sd(x, na.rm = TRUE))]
data_wo_y <- copy(data)
data_wo_y[, Yield := NULL]
## -----------------------------------------------------------------------------
set.seed(42)
library(tourr)
display_size <- 5 # Adjust to fit your display
## -----------------------------------------------------------------------------
if (check_venv()){
init_env(env_name = "r-lionfish", virtual_env = "virtual_env")
} else if (check_conda_env()){
init_env(env_name = "r-lionfish", virtual_env = "anaconda")
}
## ----results="hide", message=FALSE--------------------------------------------
lda_tour_history_1d <- save_history(data_wo_y,
tour_path = guided_tour(lda_pp(yield_values),d=1))
lda_tour_history_2d <- save_history(data_wo_y,
tour_path = guided_tour(lda_pp(yield_values),d=2))
half_range <- max(sqrt(rowSums(data_wo_y^2)))
feature_names <- colnames(data_wo_y)
obj1 <- list(type="1d_tour", obj=lda_tour_history_1d)
obj2 <- list(type="2d_tour", obj=lda_tour_history_2d)
## -----------------------------------------------------------------------------
if (interactive()){
interactive_tour(data=data_wo_y,
plot_objects=list(obj2),
feature_names=feature_names,
half_range=half_range/2,
n_plot_cols=2,
preselection=yield_values,
preselection_names=yield_labels,
display_size=8,
color_scale="plasma")
}
## ----results = 'hide', warning=FALSE, message=FALSE---------------------------
if (requireNamespace("randomForest")) {
set.seed(42)
library(randomForest)
}
## -----------------------------------------------------------------------------
if (requireNamespace("randomForest")) {
rf <- randomForest(Yield~.,data)
importance_df <- data.frame(rf$importance)
importance_df <- as.data.table(rf$importance, keep.rownames = "Feature")
sorted_importance <- importance_df[order(-IncNodePurity)]
top_10_features <- sorted_importance[1:10, Feature]
print(top_10_features)
}
## ----results = 'hide', warning=FALSE, message=FALSE---------------------------
if (requireNamespace("randomForest")) {
data_rf_sel <- data[, ..top_10_features]
grand_tour_history_1d <- save_history(data_rf_sel,
tour_path = guided_tour(lda_pp(yield_values),d=1))
lda_tour_history_2d <- save_history(data_rf_sel,
tour_path = guided_tour(lda_pp(yield_values),d=2))
half_range <- max(sqrt(rowSums(data_rf_sel^2)))
feature_names <- colnames(data_rf_sel)
obj1 <- list(type="1d_tour", obj=grand_tour_history_1d)
obj2 <- list(type="2d_tour", obj=lda_tour_history_2d)
}
## -----------------------------------------------------------------------------
if (interactive()){
interactive_tour(data=data_rf_sel,
plot_objects=list(obj1,obj2),
feature_names=feature_names,
half_range=half_range/2,
n_plot_cols=2,
preselection=yield_values,
preselection_names=yield_labels,
n_subsets=11,
display_size=8,
color_scale = "plasma")
}
## -----------------------------------------------------------------------------
intervals <- seq(min_yield, max_yield, length.out = 4)
new_yield_labels <- paste0("Yield ", head(intervals, -1), "-", tail(intervals, -1))
new_min_yield <- min(data$Yield, na.rm = TRUE)
new_max_yield <- max(data$Yield, na.rm = TRUE)
new_intervals <- seq(new_min_yield, new_max_yield, length.out = 4)
new_yield_values <- findInterval(data$Yield, new_intervals, rightmost.closed = TRUE)
## ----results = 'hide', warning=FALSE, message=FALSE---------------------------
if (requireNamespace("randomForest")) {
grand_tour_history_1d <- save_history(data_rf_sel,
tour_path = guided_tour(lda_pp(new_yield_values),d=1))
lda_tour_history_2d <- save_history(data_rf_sel,
tour_path = guided_tour(lda_pp(new_yield_values),d=2))
half_range <- max(sqrt(rowSums(data_rf_sel^2)))
feature_names <- colnames(data_rf_sel)
obj1 <- list(type="1d_tour", obj=grand_tour_history_1d)
obj2 <- list(type="2d_tour", obj=lda_tour_history_2d)
}
## -----------------------------------------------------------------------------
if (interactive()){
interactive_tour(data=data_rf_sel,
plot_objects=list(obj1,obj2),
feature_names=feature_names,
half_range=half_range/2,
n_plot_cols=2,
preselection=new_yield_values,
preselection_names=new_yield_labels,
n_subsets=3,
display_size=8,
color_scale = "plasma")
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.