#' Dashboard generation for logistic regression models
#'
#' @param config list of configuration elements
#' @param data dataframe
#' @param model model object - should be one of
#' glm - with binomial family
#' glmnet
#' cv.glmnet
#' @param cv_metrics metrics returned by runCrossValidationLogReg
#' @return dashboard object for rendering
#' @export
#' @author Todd Morley
interactive_lr <- function(
config,
data,
model,
cv_metrics = NULL
){
if(config$`Use Weights`) {
data <- data[,-NCOL(data)]
}
requireNamespace("flightdeck")
# optimal cutoff probability from ROC analysis,
# weighing sensitivity and specificity equally;
# returns a named vector: sensitivity, specificity,
# and optimal_cutoff
optimal_cutoff <- function(
perf,
pred
){
cut.ind = mapply(FUN = function(x, y, p){
d = (x - 0)^2 + (y - 1)^2
ind = which(d == min(d))
c(
sensitivity = y[[ind]],
specificity = 1 - x[[ind]],
optimal_cutoff = p[[ind]])
},
perf@x.values,
perf@y.values,
pred@cutoffs
)
}
# UI layout constants
totalWidth <- 12
halfWidth <- 6
digits <- 3
# Prep and test inputs.
glm_b <- FALSE
regularized_b <- FALSE
cv_b <- FALSE
if('glm' %in% class(model)){
glm_b <- TRUE
title <- XMSG(in.targetString_sc = 'Classical Logistic Regression')
} else if(any(c('reg_glm', 'glmnet') %in% class(model))){
regularized_b <- TRUE
title <- XMSG(in.targetString_sc = 'Regularized Logistic Regression')
} else if(any(c('cv.glmt', 'cv.glmnet') %in% class(model))){
cv_b <- TRUE
title <- XMSG(in.targetString_sc = 'Cross-Validated Logistic Regression')
} else{
return(
badDash(
XMSG(
in.targetString_sc = 'Interactive visualization not available for models of class @1.',
in.firstBindVariable_sc = class(model)
)
)
)
}
logistic_b <- FALSE
probit_b <- FALSE
log_log_b <- FALSE
if (glm_b) {
if(
model$family$family == 'binomial'
){
if(model$family$link == 'logit'){
logistic_b <- TRUE
link_function <- 'logit'
} else if(model$family$link == 'probit'){
probit_b <- TRUE
link_function <- 'probit'
} else if(model$family$link == 'cloglog'){
log_log_b <- TRUE
link_function <- 'complementary log log'
} else{
return(badDash(
XMSG(
in.targetString_sc = 'An invalid link function was passed to interactive_lr. Please contact Alteryx support!')
))
}
} else{
return(badDash(
XMSG(
in.targetString_sc = 'An invalid model family was passed to interactive_lr. Please contact Alteryx support!'
)
))
}
}
the_actual_values <- data[, 1]
fitted_intercept <- !config$`Omit Constant`
alpha <- config$alpha
use_cv_lambda_1se <- config$lambda_1se
lambda <- config$lambda_no_cv
n <- nrow(data)
p <- ncol(data) - 1 - as.numeric(config$`Use Weights`)
# model-summary numbers
if(glm_b){
the_fitted_values <- unname(model$fitted.values)
} else{
independent_variable_m <- df2NumericMatrix(
x = data[ , -1, drop = FALSE],
filtering_message = XMSG(
in.targetString_sc = "Non-numeric variables are among the predictors. They are now being removed."
),
convertVectorToDataFrame = TRUE
)
if(regularized_b){
lambda <- config$lambda_no_cv
} else{
if(use_cv_lambda_1se){
lambda <- model$lambda.1se
} else{
lambda <- model$lambda.min
}
}
if(all(unlist(model$coefficients[-1] == 0))){
msg1 <- XMSG(
in.targetString_sc = "All model coefficients were zero. Cannot generate dashboard."
)
if(regularized_b) {
msg2 <- XMSG(in.targetString_sc = "Consider using a smaller value of lambda.")
} else { # cv_b is true
if (config$lambda_1se) {
msg2 <- XMSG(
in.targetString_sc = "Consider using lambda.min instead of lambda for simple model."
)
} else {
msg2 <- ""
}
}
return(badDash(paste0(msg1,msg2)))
}
the_fitted_values <- unname(
predict(
object = model,
newx = independent_variable_m,
s = lambda,
type = 'response'
)
)
}
use_sampling_weights_b <- config$`Use Weights`
n <- nrow(data)
p <- ncol(data) - 1
actual_values <- data[, 1]
if (is.factor(actual_values)) {
actual_values <- as.numeric(actual_values) - 1
}
actual_values_f <- factor(
actual_values,
levels = 0:1,
labels = c('no', 'yes')
)
probability_v <- the_fitted_values
# ROCR computations
prediction_object <- ROCR::prediction(
predictions = probability_v,
labels = actual_values
)
roc_performance <- ROCR::performance(
prediction.obj = prediction_object,
measure = 'tpr',
x.measure = 'fpr'
)
optimal_cutoff_nv <- optimal_cutoff(
perf = roc_performance,
pred = prediction_object
)
fitted_values <- as.integer(probability_v >= optimal_cutoff_nv[3])
if(length(unique(fitted_values)) == 1) {
msg1 <- XMSG(in.targetString_sc = "All values are being fitted to the same class. ")
if(regularized_b) {
msg2 <- XMSG(in.targetString_sc = "Consider using a smaller value of lambda. ")
}
else if(cv_b) {
msg2 <- XMSG(in.targetString_sc = "Consider using a different value of lambda. ")
}
msg3 <- XMSG(in.targetString_sc = "Interactive dashboard could not be generated.")
return(badDash(paste0(msg1,msg2,msg3)))
}
if(is.null(x = cv_metrics)){
true_positive_count <- length(
intersect(
which(fitted_values == 1),
which(actual_values == 1)
)
)
true_negative_count <- length(
intersect(
which(fitted_values == 0),
which(actual_values == 0)
)
)
false_positive_count <- length(which(fitted_values > actual_values))
false_negative_count <- length(which(fitted_values < actual_values))
accuracy <- (true_positive_count + true_negative_count) / n
precision <- true_positive_count / (true_positive_count + false_positive_count)
recall <- true_positive_count / (true_positive_count + false_negative_count)
# This should be a harmonic mean. It could be NaN, which for now is OK.
f1 <- 1 / mean(1 / c(precision, recall))
} else{
true_positive_count <- cv_metrics['pred_pos_actual_pos']
true_negative_count <- cv_metrics['pred_neg_actual_neg']
false_positive_count <- cv_metrics['pred_pos_actual_neg']
false_negative_count <- cv_metrics['pred_neg_actual_pos']
accuracy <- cv_metrics['accuracy']
precision <- cv_metrics['precision']
recall <- cv_metrics['recall']
f1 <- cv_metrics['f1']
}
confusion_matrix_m <- matrix(
data = c(
round(x = true_positive_count, digits = digits),
round(x = false_negative_count, digits = digits),
round(x = false_positive_count, digits = digits),
round(x = true_negative_count, digits = digits)
),
nrow = 2,
ncol = 2
)
rownames(confusion_matrix_m) <- c(
XMSG(in.targetString_sc = 'Predicted Positive'),
XMSG(in.targetString_sc = 'Predicted Negative')
)
colnames(confusion_matrix_m) <- c(
XMSG(in.targetString_sc = 'Actual Positive'),
XMSG(in.targetString_sc = 'Actual Negative')
)
# Prepare UI elements.
# page 1: model summary
row_1_1 <- fdRow(
fdInfoBox(
title = XMSG('Accuracy'),
value = round(
x = accuracy,
digits = digits
),
icon = fdIcon(
name = 'check',
lib = 'font-awesome'
),
color = 'blue',
width = halfWidth
),
fdInfoBox(
title = XMSG(in.targetString_sc = 'Precision'),
value = round(
x = precision,
digits = digits
),
icon = fdIcon(
name = 'check',
lib = 'font-awesome'
),
color = 'blue',
width = halfWidth
)
)
row_1_2 <- fdRow(
fdInfoBox(
title = XMSG(in.targetString_sc = 'Recall'),
value = round(
x = recall,
digits = digits
),
icon = fdIcon(
name = 'check',
lib = 'font-awesome'
),
color = 'blue',
width = halfWidth
),
fdInfoBox(
title = XMSG(in.targetString_sc = 'F1'),
value = round(
x = f1,
digits = digits
),
icon = fdIcon(
name = 'check',
lib = 'font-awesome'
),
color = 'blue',
width = halfWidth
)
)
row_1_3 <- fdRow(
fdInfoBox(
title = XMSG(in.targetString_sc = 'Optimal Probability Cutoff'),
value = round(
x = optimal_cutoff_nv[3],
digits = digits
),
icon = fdIcon(
name = 'check',
lib = 'font-awesome'
),
color = 'blue',
width = totalWidth
)
)
row_1_4 <- fdRow(
fdBox(
fdPlotConfusionMatrix(x = confusion_matrix_m),
width = totalWidth
)
)
page_1 <- fdPage(
row_1_1,
row_1_2,
row_1_3,
row_1_4,
id = 'page_1',
display = TRUE
)
# page 2: conditional-density plots
independent_variables <- names(data[, -1])
cd_plots <- lapply(
independent_variables,
function(x){
if(is.numeric(data[[x]])){
plt <- fdPlotConditionalDensity(
x = data[[x]],
y = actual_values_f,
xlab = x,
showlegend = F
)
fdColumn(
plt,
width = halfWidth
)
} else {
print(
XMSG(
in.targetString_sc = "Conditional-density plot not generated for variable @1' because it was categorical.",
in.firstBindVariable_sc = x
)
)
}
}
)
row_2_1 <- fdRow(
fdBox(
cd_plots,
width = totalWidth
)
)
page_2 <- fdPage(
row_2_1,
id = 'page_2',
display = FALSE
)
# page 3: advanced diagnostics
roc_chart <- fdPlotClassificationPerformance(
performance(
prediction_object,
"tpr",
"fpr"
),
digits = digits
)
precision_recall_chart <- fdPlotClassificationPerformance(
performance(
prediction_object,
"prec",
"rec"
),
digits = digits
)
row_3_1 <- fdRow(
fdBox(
fdTabsetPanel(
selected = makeHtmlId('ROC Chart'),
fdTabPanel(
XMSG(in.targetString_sc = 'ROC Chart'),
roc_chart
),
fdTabPanel(
XMSG(in.targetString_sc = 'Precision vs. Recall'),
precision_recall_chart
)
),
width = totalWidth
)
)
page_3 <- fdPage(
row_3_1,
id = 'page_3',
display = FALSE
)
# render
the_header <- fdHeader(title = title)
sidebar <- fdSidebarMenu(
fdMenuItem(
text = XMSG(in.targetString_sc = 'Summary'),
icon = fdIcon(
name = 'caret-right',
lib = "font-awesome"
),
pageName = 'page_1'
),
fdMenuItem(
text = XMSG(in.targetString_sc = 'Conditional-Density Plots'),
icon = fdIcon(
name = 'caret-right',
lib = "font-awesome"
),
pageName = 'page_2'
),
fdMenuItem(
text = XMSG(in.targetString_sc = 'Performance'),
icon = fdIcon(
name = 'caret-right',
lib = "font-awesome"
),
pageName = 'page_3'
)
)
body <- fdBody(
page_1,
page_2,
page_3
)
titleWidth <- computeWidth(title)
fdBoard(
fdHeader(
title = title,
titleWidth = titleWidth
),
fdSidebar(
sidebar,
sidebarWidth = titleWidth
),
body,
fixed = TRUE
)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.