Nothing
## ----include=FALSE------------------------------------------------------------
knitr::opts_chunk$set(
collapse = TRUE,
comment = "#>",
fig.width = 7,
fig.height = 5,
fig.align = "center"
)
## ----echo = FALSE, results='hide', message=FALSE, warning=FALSE---------------
library(VIM)
library(data.table)
## ----setup_2, message = FALSE-------------------------------------------------
data <- as.data.table(VIM::sleep)
a <- aggr(sleep, plot = FALSE)
plot(a, numbers = TRUE, prop = FALSE)
## ----message = FALSE----------------------------------------------------------
dataDS <- sleep[, c("Dream", "Sleep")]
marginplot(dataDS, main = "Missing Values")
## ----include=TRUE, results='hide', message=FALSE, warning=FALSE---------------
result <- vimpute(
data = data,
pred_history = TRUE)
## -----------------------------------------------------------------------------
print(head(result$data, 3))
## -----------------------------------------------------------------------------
dataDS <- as.data.frame(result$data[, c("Dream", "Sleep", "Dream_imp", "Sleep_imp")])
marginplot(dataDS, delimiter = "_imp", main = "Imputation with Default Model")
## ----include=TRUE, results='hide', message=FALSE, warning=FALSE---------------
result_mixed <- vimpute(
data = data,
method = list(NonD = "robust", Dream = "ranger", Sleep = "xgboost", Span = "ranger" , Gest = "regularized"),
pred_history = TRUE
)
## -----------------------------------------------------------------------------
dataDS <- as.data.frame(result_mixed$data[, c("Dream", "Sleep", "Dream_imp", "Sleep_imp")])
marginplot(dataDS, delimiter = "_imp", main = "Imputation with different Models for each Variable")
## ----include=TRUE, results='hide', message=FALSE, warning=FALSE, echo=F-------
result_xgboost <- vimpute(
data = data,
method = setNames(as.list(rep("xgboost", ncol(data))), names(data)),
pred_history = TRUE, verbose = FALSE
)
dataDS_xgboost <- as.data.frame(result_xgboost$data[, c("Dream", "Sleep", "Dream_imp", "Sleep_imp")])
result_regularized <- vimpute(
data = data,
method = setNames(as.list(rep("regularized", ncol(data))), names(data)),
pred_history = TRUE
)
dataDS_regularized <- as.data.frame(result_regularized$data[, c("Dream", "Sleep", "Dream_imp", "Sleep_imp")])
## ----echo=F, warning=F--------------------------------------------------------
par(mfrow = c(1, 2))
marginplot(dataDS_xgboost, delimiter = "_imp", main = "Imputation with xgboost")
marginplot(dataDS_regularized, delimiter = "_imp", main = "Imputation with Regularized")
par(mfrow = c(1, 1))
## ----eval = FALSE-------------------------------------------------------------
# result <- vimpute(
# data = data,
# method = list(NonD = "robust",
# Dream = "ranger",
# Sleep = "xgboost",
# Span = "ranger" ,
# Gest = "regularized"),
# pmm = list(NonD = FALSE, Dream = TRUE, Sleep = FALSE, Span = FALSE , Gest = TRUE)
# )
## ----eval = FALSE-------------------------------------------------------------
# result <- vimpute(
# data = data,
# method = setNames(as.list(rep("regularized", ncol(data))), names(data))
# formula = list(
# NonD ~ Dream + Sleep, # Linear combination
# Span ~ Dream:Sleep + Gest, # With interaction term
# log(Gest) ~ Sleep + exp(Span) # With transformations
# )
# )
## ----eval = FALSE-------------------------------------------------------------
# result <- vimpute(
# data = data,
# tune = TRUE
# )
## ----eval = FALSE-------------------------------------------------------------
# result <- vimpute(
# data = data,
# nseq = 20,
# eps = 0.01
# )
## ----eval = FALSE-------------------------------------------------------------
# result <- vimpute(
# data = data,
# imp_var = TRUE
# )
## -----------------------------------------------------------------------------
print(tail(result$pred_history, 9))
## -----------------------------------------------------------------------------
library(reactable)
data(iris)
df <- as.data.table(iris)
colnames(df) <- c("S.Length","S.Width","P.Length","P.Width","Species")
# randomly produce some missing values in the data
set.seed(1)
nbr_missing <- 50
y <- data.frame(row=sample(nrow(iris),size = nbr_missing,replace = T),
col=sample(ncol(iris)-1,size = nbr_missing,replace = T))
y<-y[!duplicated(y),]
df[as.matrix(y)]<-NA
aggr(df)
## -----------------------------------------------------------------------------
sapply(df, function(x)sum(is.na(x)))
## ----results='hide', message=FALSE, warning=FALSE,include=FALSE---------------
library(reactable)
data(iris)
df <- as.data.table(iris)
colnames(df) <- c("S.Length","S.Width","P.Length","P.Width","Species")
# Create complete copy before introducing NAs
complete_data <- df
# Randomly produce missing values
set.seed(1)
nbr_missing <- 50
y <- data.frame(row = sample(nrow(df), size = nbr_missing, replace = TRUE),
col = sample(ncol(df), size = nbr_missing, replace = TRUE))
y <- y[!duplicated(y),]
df[as.matrix(y)] <- NA
# Perform imputation
result <- vimpute(data = df, pred_history = TRUE)
# Extracting the imputed columns from result$data
imputed_columns <- grep("_imp$", names(result$data), value = TRUE)
# Create a function to compare true and imputed values
compare_values <- function(true_data, pred_data, imputed_data, col_name) {
comparison <- data.frame(
True_Value = true_data[[col_name]],
Imputed_Value = ifelse(imputed_data, pred_data[[col_name]], NA)
)
comparison <- comparison[!is.na(comparison$Imputed_Value), ]
return(comparison)
}
# Initialize an empty list to store the comparison tables
comparison_list <- list()
# Loop through each imputed column and create a comparison table
for (imputed_col in imputed_columns) {
col_name <- sub("_imp$", "", imputed_col)
comparison_list[[col_name]] <- compare_values(complete_data, result$data, result$data[[imputed_col]], col_name)
}
# Prepare the results in a combined wide format, ensuring equal row numbers
results <- cbind(
"TRUE1" = head(comparison_list[["S.Length"]][, "True_Value"], 5),
"IMPUTED1" = head(comparison_list[["S.Length"]][, "Imputed_Value"], 5),
"TRUE2" = head(comparison_list[["S.Width"]][, "True_Value"], 5),
"IMPUTED2" = head(comparison_list[["S.Width"]][, "Imputed_Value"], 5),
"TRUE3" = head(comparison_list[["P.Length"]][, "True_Value"], 5),
"IMPUTED3" = head(comparison_list[["P.Length"]][, "Imputed_Value"], 5),
"TRUE4" = head(comparison_list[["P.Width"]][, "True_Value"], 5),
"IMPUTED4" = head(comparison_list[["P.Width"]][, "Imputed_Value"], 5)
)
# Print the combined wide format table
print(results)
## ----echo=F,warning=F---------------------------------------------------------
# Load the reactable library
library(reactable)
# Create the reactable
reactable(results, columns = list(
TRUE1 = colDef(name = "True"),
IMPUTED1 = colDef(name = "Imputed"),
TRUE2 = colDef(name = "True"),
IMPUTED2 = colDef(name = "Imputed"),
TRUE3 = colDef(name = "True"),
IMPUTED3 = colDef(name = "Imputed"),
TRUE4 = colDef(name = "True"),
IMPUTED4 = colDef(name = "Imputed")
),
columnGroups = list(
colGroup(name = "S.Length", columns = c("TRUE1", "IMPUTED1")),
colGroup(name = "S.Width", columns = c("TRUE2", "IMPUTED2")),
colGroup(name = "P.Length", columns = c("TRUE3", "IMPUTED3")),
colGroup(name = "P.Width", columns = c("TRUE4", "IMPUTED4"))
),
striped = TRUE,
highlight = TRUE,
bordered = TRUE
)
## ----results='hide', message=FALSE, warning=FALSE,include=FALSE---------------
library(reactable)
library(VIM)
data(iris)
# Create complete copy before introducing NAs
complete_data <- iris
colnames(complete_data) <- c("S.Length","S.Width","P.Length","P.Width","Species")
df <- copy(complete_data)
# Randomly produce missing values
set.seed(1)
nbr_missing <- 50
y <- data.frame(row = sample(nrow(df), size = nbr_missing, replace = TRUE),
col = sample(ncol(df), size = nbr_missing, replace = TRUE))
y <- y[!duplicated(y),]
df[as.matrix(y)] <- NA
# Perform imputation with proper method specification
result <- vimpute(
data = df,
method = setNames(lapply(names(df), function(x) "xgboost"),names(df)),
pred_history = TRUE
)
# Extracting the imputed columns from result$data
imputed_columns <- grep("_imp$", names(result$data), value = TRUE)
# Create a function to compare true and imputed values
compare_values <- function(true_data, pred_data, imputed_data, col_name) {
comparison <- data.frame(
True_Value = true_data[[col_name]],
Imputed_Value = ifelse(imputed_data, pred_data[[col_name]], NA)
)
comparison <- comparison[!is.na(comparison$Imputed_Value), ]
return(comparison)
}
# Initialize an empty list to store the comparison tables
comparison_list <- list()
# Loop through each imputed column and create a comparison table
for (imputed_col in imputed_columns) {
col_name <- sub("_imp$", "", imputed_col)
comparison_list[[col_name]] <- compare_values(complete_data, result$data, result$data[[imputed_col]], col_name)
}
# Prepare the results in a combined wide format, ensuring equal row numbers
results <- cbind(
"TRUE1" = head(comparison_list[["S.Length"]][, "True_Value"], 5),
"IMPUTED1" = head(comparison_list[["S.Length"]][, "Imputed_Value"], 5),
"TRUE2" = head(comparison_list[["S.Width"]][, "True_Value"], 5),
"IMPUTED2" = head(comparison_list[["S.Width"]][, "Imputed_Value"], 5),
"TRUE3" = head(comparison_list[["P.Length"]][, "True_Value"], 5),
"IMPUTED3" = head(comparison_list[["P.Length"]][, "Imputed_Value"], 5),
"TRUE4" = head(comparison_list[["P.Width"]][, "True_Value"], 5),
"IMPUTED4" = head(comparison_list[["P.Width"]][, "Imputed_Value"], 5)
)
# Print the combined wide format table
print(results)
## ----echo=F,warning=F---------------------------------------------------------
# Load the reactable library
library(reactable)
# Create the reactable
reactable(results, columns = list(
TRUE1 = colDef(name = "True"),
IMPUTED1 = colDef(name = "Imputed"),
TRUE2 = colDef(name = "True"),
IMPUTED2 = colDef(name = "Imputed"),
TRUE3 = colDef(name = "True"),
IMPUTED3 = colDef(name = "Imputed"),
TRUE4 = colDef(name = "True"),
IMPUTED4 = colDef(name = "Imputed")
),
columnGroups = list(
colGroup(name = "S.Length", columns = c("TRUE1", "IMPUTED1")),
colGroup(name = "S.Width", columns = c("TRUE2", "IMPUTED2")),
colGroup(name = "P.Length", columns = c("TRUE3", "IMPUTED3")),
colGroup(name = "P.Width", columns = c("TRUE4", "IMPUTED4"))
),
striped = TRUE,
highlight = TRUE,
bordered = TRUE
)
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.