#' Visualizing Maximum Likelihood Estimation Shiny Application
#'
#' This will start a shiny app explore maximum likelihood estimation visually.
#'
#' @references http://rstudio.com/shiny
#' @param df a data.frame to explore.
#' @param default_x default variable for the x-axis.
#' @param default_y default variable for the y-axis.
#' @param ... other parameters passed to [shiny::shinyApp]
#' @export
#' @rdname mle_shiny
mle_shiny <- function(df, default_x, default_y, ...) {
shiny_env <- new.env()
if(!missing(df)) {
assign('thedata', df, shiny_env)
}
if(!missing(default_x)) {
assign('default_x', default_x, shiny_env)
}
if(!missing(default_y)) {
assign('default_y', default_y, shiny_env)
}
environment(mle_shiny_ui) <- shiny_env
environment(mle_shiny_server) <- shiny_env
shiny::shinyApp(
ui = mle_shiny_ui,
server = mle_shiny_server,
...
)
}
#' Shiny UI for maximum likelihood estimation
#'
#' @return a Shiny UI object.
#' @export
#' @rdname mle_shiny
mle_shiny_ui <- function() {
fluidPage(
sidebarLayout(
sidebarPanel(
width = 3,
uiOutput('outcome_ui'),
uiOutput('predictor_ui'),
uiOutput('highlight_ui'),
hr(),
uiOutput('iteration_ui'),
checkboxInput('showOLSRegression',
'Show Regression',
value = TRUE)
),
mainPanel(
width = 9,
tabsetPanel(
tabPanel(
'Plots',
fluidRow(
column(6, plotOutput("scatter_plot", click = "scatter_plot_click")),
# column(6, plotOutput('likelihood_plot'))
column(6, plotOutput("parameter_plot"))
)
# plotOutput("parameter_plot")
),
tabPanel(
'Summary',
h4('MLE Results'),
verbatimTextOutput('mle_summary'),
h4('lm or glm Results'),
verbatimTextOutput('summary'),
h4('Point Info'),
verbatimTextOutput("click_info")
),
tabPanel(
'Likelihood Plots',
plotOutput('likelihood_plots', height = '600px')
),
tabPanel(
'Data',
shiny::dataTableOutput('datatable')
)
)
)
)
)
}
#' Shiny server for maximum likelihood estimation
#'
#' @param input Shiny input object.
#' @param output Shiny output object.
#' @param session Shiny session object.
#' @return a function with Shiny server logic.
#' @export
#' @importFrom reshape2 melt
#' @rdname mle_shiny
mle_shiny_server <- function(input, output, session) {
# Can use a different data set if desired
if(!exists('thedata')) {
message('No data specified, using mtcars...')
data('mtcars', envir = environment())
thedata <- mtcars
default_y <- 'mpg' # Default variable selected for the dependent variable
default_x <- 'wt' # Default variable selected for the independent variable
}
# Keep only numeric and logical columns
numeric.cols <- sapply(thedata, FUN = function(x) { return(is.numeric(x) | is.logical(x)) })
thedata <- thedata[,numeric.cols]
default_x <- mget('default_x', ifnotfound = names(thedata)[2])
default_y <- mget('default_y', ifnotfound = names(thedata)[1])
# NOTE: This app will use row.names to identify and highlight specific points.
# Make sure the row.names are interpretable
row_highlights <- c('None', row.names(thedata))
observe({
row <- nearPoints(thedata, input$scatter_plot_click, maxpoints = 1, addDist = FALSE)
if(nrow(row) > 0) {
updateSelectInput(session, 'highlight', selected = rownames(row)[1])
}
})
getData <- reactive({ # Continuous outcome
req(input$outcome)
req(input$predictor)
mle <- NULL
if(!isBinary(thedata[,input$outcome])) {
mle <- optim_save(
runif(3), # Random initial values
loglikelihood_normal, # likelihood function
lower = c(-Inf, -Inf, 1.e-5), # The lower bounds for the values, note sigma (error), cannot be negative
upper = c(Inf, Inf, sd(thedata[,input$predictor]) ),
method = "L-BFGS-B", # https://en.wikipedia.org/wiki/Nelder–Mead_method
control = list(fnscale = -1), # Indicates that the maximum is desired rather than the minimum
predictor = thedata[,input$predictor],
outcome = thedata[,input$outcome]
)
} else { # Logistic Regression
mle <- optim_save(
c(0, 1), # Initial values
loglikelihood_binomial,
method = "L-BFGS-B",
control = list(fnscale = -1),
predictor = thedata[,input$predictor],
outcome = thedata[,input$outcome]
)
}
return(mle)
})
output$outcome_ui <- renderUI({
selectInput('outcome',
'Outcome (dependent) variable:',
choices = names(thedata),
selected = default_y)
})
output$predictor_ui <- renderUI({
selectInput('predictor',
'Predictor (independent) variable:',
choices = names(thedata),
selected = default_x)
})
output$highlight_ui <- renderUI({
selectInput('highlight',
'Highlight data point:',
choices = row_highlights,
selected = '')
})
output$datatable <- shiny::renderDataTable({ return(thedata) })
output$iteration_ui <- renderUI({
optim_run <- getData()
sliderInput("iteration",
"Iteration:",
min = 1,
max = length(optim_run$iterations),
value = 1,
animate = animationOptions(
interval = 2000, # TODO: need to find a good pause
loop = FALSE
))
})
output$scatter_plot <- renderPlot({
req(input$iteration)
p <- NULL
optim_run <- getData()
if(!isBinary(thedata[,input$outcome])) {
intercept <- optim_run$iterations[[input$iteration]][1]
slope <- optim_run$iterations[[input$iteration]][2]
sigma <- optim_run$iterations[[input$iteration]][3]
p <- ggplot(thedata, aes_string(x = input$predictor,
y = input$outcome)) +
geom_point()
if(input$showOLSRegression) {
p <- p + geom_smooth(method = 'lm',
formula = y ~ x,
color = 'grey30',
se = FALSE, size = 2, alpha = 0.5)
}
p <- p + geom_abline(intercept = intercept,
slope = slope,
color = 'blue')
if(input$highlight != 'None') {
row <- thedata[input$highlight,,drop = FALSE]
p <- p + geom_point(data = row, color = 'red')
heightFactor <- 1 # Increase the multiplier to make the distribution higher
k <- 4 # How many standard deviations to plot
x <- seq(-k * sigma, k * sigma, length.out = 50)
y <- dnorm(x, 0, sigma) / dnorm(0, 0, sigma) * heightFactor
x0 <- row[input$highlight, input$predictor]
y0 <- row[input$highlight, input$outcome]
yhat <- slope * x0 + intercept
path <- data.frame(x = y + x0, y = x + yhat)
segment2 <- data.frame(x = x0,
y = y0,
xend = dnorm(yhat - y0, 0, sigma) / dnorm(0, 0, sigma) * heightFactor + x0,
yend = y0)
segment3 <- data.frame(x = x0,
y = yhat,
xend = dnorm(0, 0, sigma) / dnorm(0, 0, sigma) * heightFactor + x0,
yend = yhat)
p <- p +
geom_segment(data = segment3, aes(x = x, y = y, xend = xend, yend = yend), alpha = 0.3) +
geom_segment(data = segment2, aes(x = x, y = y, xend = xend, yend = yend), color = 'red') +
geom_point(data = segment2, aes(x = x, y = y), color = '#e31a1c', size = 2) +
geom_point(data = segment2, aes(x = xend, y = y), color = '#e31a1c', size = 2) +
geom_vline(xintercept = x0) +
geom_path(data = path, aes(x,y), color = "blue")
}
} else {
beta0 <- optim_run$iterations[[input$iteration]][1]
beta1 <- optim_run$iterations[[input$iteration]][2]
p <- ggplot(thedata, aes_string(x = input$predictor,
y = input$outcome)) +
geom_point(aes(color = logistic(thedata[,input$predictor], beta0, beta1) > 0.5))
if(input$showOLSRegression) {
p <- p + geom_smooth(method = 'glm', formula = y ~ x, se = FALSE,
method.args = list(family = binomial(link = 'logit')))
}
if(input$highlight != 'None') {
row <- thedata[input$highlight,]
segment <- data.frame(x = row[,input$predictor],
y = row[,input$outcome],
xend = row[,input$predictor],
yend = logistic(row[,input$predictor], beta0, beta1))
p <- p +
geom_point(data = row, color = 'red', size = 2) +
geom_segment(data = segment, aes(x = x, xend = xend, y = y, yend = yend),
color = 'red')
}
p <- p + stat_function(fun = logistic, n = 101,
args = list(beta0 = beta0, beta1 = beta1) ) +
scale_color_hue('Predicted Pass > 0.5') +
theme(legend.position = c(0.85, 0.15))
}
return(p)
})
output$parameter_plot <- renderPlot({
optim_run <- getData()
df <- optim_run$iterations_df
if(!isBinary(thedata[,input$outcome])) {
names(df) <- c('Intercept', 'Slope', 'Sigma', 'LogLikelihood', 'Iteration')
} else {
names(df) <- c('Intercept', 'Slope', 'LogLikelihood', 'Iteration')
}
df <- reshape2::melt(df, id.var = 'Iteration')
p <- ggplot(df, aes(x = Iteration, y = value, color = variable)) +
geom_vline(xintercept = input$iteration)
p <- p + geom_path()
p <- p + facet_wrap(~ variable, scales = "free_y", ncol = 1) +
xlab('Iteration') + ylab('Parameter Estimate') +
theme(legend.position = 'none')
return(p)
})
output$likelihood_plot <- renderPlot({
optim_run <- getData()
p <- NULL
if(input$highlight != 'None') {
row <- thedata[input$highlight,]
df <- optim_run$iterations_df
if(!isBinary(thedata[,input$outcome])) {
tmp <- df %>% dplyr::filter(Iteration == input$iteration)
a <- tmp[1,1]
b <- tmp[1,2]
sigma <- tmp[1,3]
predictor <- row[1,input$predictor]
predicted.out <- a + b * predictor
outcome <- row[1,input$outcome]
d <- dnorm(outcome, predicted.out, sigma)
p <- ggplot() +
stat_function(fun = dnorm,
n = 101,
args = list(mean = predicted.out, sd = sigma)) +
annotate(geom = 'segment', x = outcome, y = 0, xend = outcome, yend = d, color = 'red') +
annotate(geom = 'point', x = outcome, y = d, color = 'red', size = 2) +
xlim(c(min(thedata[,input$predictor], predicted.out - 4 * sigma, outcome),
max(thedata[,input$predictor], predicted.out + 4 * sigma, outcome))) +
xlab('Outcome') + ylab('Density') +
coord_flip()
} else {
}
}
return(p)
})
output$summary <- renderPrint({
if(!isBinary(thedata[,input$outcome])) {
lm.out <- lm(as.formula(paste(input$outcome, ' ~ ', input$predictor)),
data = thedata)
summary(lm.out)
} else {
glm.out <- glm(as.formula(paste(input$outcome, ' ~ ', input$predictor)),
data = thedata,
family = binomial(link = 'logit'))
summary(glm.out)
}
})
output$mle_summary <- renderPrint({
optim_run <- getData()
cat('Final parameters from MLE:', optim_run$par,
'\nParameters from iteration', input$iteration, ':',
optim_run$iterations[[input$iteration]], '\n')
})
output$click_info <- renderPrint({
row <- 'Nothing selected.'
if(input$highlight != 'None') {
row <- thedata[input$highlight,]
}
return(row)
})
output$likelihood_plots <- renderPlot({
df <- getData()$iterations_df
tmp <- df %>% dplyr::filter(Iteration == input$iteration)
plots <- list()
nplots <- nrow(thedata)
if(!isBinary(thedata[,input$outcome])) {
for(i in 1:min(nplots, nrow(thedata))) {
a <- tmp[1,1]
b <- tmp[1,2]
sigma <- tmp[1,3]
predictor <- thedata[i,input$predictor]
predicted.out <- a + b * predictor
outcome <- thedata[i,input$outcome]
d <- dnorm(outcome, predicted.out, sigma)
plots[[i]] <- ggplot() +
stat_function(fun = dnorm,
n = 101,
args = list(mean = predicted.out, sd = sigma)) +
annotate(geom = 'segment', x = outcome, y = 0, xend = outcome, yend = d, color = 'red') +
annotate(geom = 'point', x = outcome, y = d, color = 'red', size = 2) +
xlim(c(min(thedata[,input$outcome], predicted.out - 3 * sigma),
max(thedata[,input$outcome], predicted.out + 3 * sigma))) +
# ylim(c(0, .2)) +
ggtitle(row.names(thedata)[i]) +
xlab('Outcome') + ylab('Density')
}
} else {
for(i in 1:min(nplots, nrow(thedata))) {
beta0 <- tmp[1,1]
beta1 <- tmp[1,2]
row <- thedata[i,]
segment <- data.frame(x = row[,input$predictor],
y = row[,input$outcome],
xend = row[,input$predictor],
yend = logistic(row[,input$predictor], beta0, beta1))
plots[[i]] <- ggplot(thedata, aes_string(x = thedata[,input$predictor],
y = thedata[,input$outcome])) +
geom_point(data = segment, aes(x = x, y = y), color = 'red', size = 2) +
geom_segment(data = segment, aes(x = x, xend = xend, y = y, yend = yend),
color = 'red') +
stat_function(fun = logistic, n = 101,
args = list(beta0 = beta0, beta1 = beta1) ) +
ggtitle(row.names(thedata)[i]) +
xlab('Predictor') + ylab('Outcome')
}
}
title <- ggdraw() +
draw_label(paste0('Likelihood Distribution Plots for Iteration ', input$iteration), fontface='bold')
p <- plot_grid(plotlist = plots)
plot_grid(title, p, ncol=1, rel_heights=c(0.1, 1))
})
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.