## ----setup, include=FALSE-----------------------------------------------------
knitr::opts_chunk$set(echo = TRUE, warning = FALSE, message = FALSE, error = FALSE,
fig.align = 'center', fig.height = 4)
set.seed(2112)
library(tidyverse)
library(visualMLE)
library(psych)
library(car)
## -----------------------------------------------------------------------------
study <- data.frame(
Hours=c(0.50,0.75,1.00,1.25,1.50,1.75,1.75,2.00,2.25,2.50,2.75,3.00,
3.25,3.50,4.00,4.25,4.50,4.75,5.00,5.50),
Pass=c(0,0,0,0,0,0,1,0,1,0,1,0,1,0,1,1,1,1,1,1)
)
ggplot(study, aes(x = Hours, y = Pass)) +
geom_point(aes(color = factor(Pass))) +
scale_color_brewer('Pass', type = 'qual', palette = 6)
## -----------------------------------------------------------------------------
linear_regression <- lm(Pass ~ Hours, data = study)
linear_regression
## -----------------------------------------------------------------------------
ggplot(study, aes(x = Hours, y = Pass)) +
geom_point(aes(color = factor(Pass))) +
geom_abline(slope = linear_regression$coefficients[2],
intercept = linear_regression$coefficients[1]) +
scale_color_brewer('Pass', type = 'qual', palette = 6)
## -----------------------------------------------------------------------------
study$linear_resid <- resid(linear_regression)
ggplot(study, aes(x = Hours, y = linear_resid)) + geom_point()
## -----------------------------------------------------------------------------
ols_linear <- function(parameters, predictor, outcome) {
a <- parameters[1] # Intercept
b <- parameters[2] # beta coefficient
predicted <- a + b * predictor
residuals <- outcome - predicted
ss <- sum(residuals^2)
return(ss)
}
## -----------------------------------------------------------------------------
optim_ols_linear <- optim(
c(0, 1), # Initial values
ols_linear,
method = "L-BFGS-B",
predictor = study$Hours,
outcome = study$Pass
)
optim_ols_linear$par
## -----------------------------------------------------------------------------
logit <- function(x, beta0, beta1) {
return( 1 / (1 + exp(-beta0 - beta1 * x)) )
}
ggplot() +
stat_function(fun = logit, args = list(beta0 = 0, beta1 = 1)) +
xlim(-5, 5)
## -----------------------------------------------------------------------------
min_abs_resid <- function(parameters, predictor, outcome) {
a <- parameters[1] # Intercept
b <- parameters[2] # beta coefficient
p <- logit(predictor, a, b)
resid <- outcome - p
return(sum(abs(resid)))
}
## -----------------------------------------------------------------------------
optim_min_abs_resid <- optim(
c(0, 1), # Initial values
min_abs_resid,
method = "L-BFGS-B",
predictor = study$Hours,
outcome = study$Pass
)
optim_min_abs_resid$par
## -----------------------------------------------------------------------------
ggplot(data = study, aes(x = Hours, y = Pass)) +
geom_point(aes(color = factor(Pass))) +
geom_function(fun = logit, args = list(beta0 = optim_min_abs_resid$par[1],
beta1 = optim_min_abs_resid$par[2])) +
scale_color_brewer('Pass', type = 'qual', palette = 6)
## -----------------------------------------------------------------------------
ols_logistic <- function(parameters, predictor, outcome) {
a <- parameters[1] # Intercept
b <- parameters[2] # beta coefficient
p <- logit(predictor, a, b)
resid <- outcome - p
return(sum(resid^2))
}
## -----------------------------------------------------------------------------
optim_ols_logistic <- optim(
c(0, 1), # Initial values
ols_logistic,
method = "L-BFGS-B",
predictor = study$Hours,
outcome = study$Pass
)
optim_ols_logistic$par
## -----------------------------------------------------------------------------
ggplot(data = study, aes(x = Hours, y = Pass)) +
geom_point(aes(color = factor(Pass))) +
geom_function(fun = logit, args = list(beta0 = optim_ols_logistic$par[1],
beta1 = optim_ols_logistic$par[2])) +
scale_color_brewer('Pass', type = 'qual', palette = 6)
## -----------------------------------------------------------------------------
study$predict_ols_logistic <- logit(study$Hours,
beta0 = optim_ols_logistic$par[1],
beta1 = optim_ols_logistic$par[2])
study$resid_ols_logistc <- study$Pass - study$predict_ols_logistic
ggplot(study, aes(x = Hours, y = resid_ols_logistc)) + geom_point()
## -----------------------------------------------------------------------------
loglikelihood.binomial <- function(parameters, predictor, outcome) {
a <- parameters[1] # Intercept
b <- parameters[2] # beta coefficient
p <- logit(predictor, a, b)
ll <- sum( outcome * log(p) + (1 - outcome) * log(1 - p))
return(ll)
}
## -----------------------------------------------------------------------------
optim_binomial <- optim_save(
c(0, 1), # Initial values
loglikelihood.binomial,
method = "L-BFGS-B",
control = list(fnscale = -1),
predictor = study$Hours,
outcome = study$Pass
)
optim_binomial$par
## -----------------------------------------------------------------------------
ggplot(data = study, aes(x = Hours, y = Pass)) +
geom_point(aes(color = factor(Pass))) +
geom_function(fun = logit, args = list(beta0 = optim_ols_logistic$par[1],
beta1 = optim_ols_logistic$par[2]),
linetype = 2) +
geom_function(fun = logit, args = list(beta0 = optim_binomial$par[1],
beta1 = optim_binomial$par[2])) +
scale_color_brewer('Pass', type = 'qual', palette = 6)
## -----------------------------------------------------------------------------
ggplot() + geom_function(fun = log) + xlim(0, 1)
## -----------------------------------------------------------------------------
study$likelihood <- logit(study$Hours,
beta0 = optim_binomial$par[1],
beta1 = optim_binomial$par[2])
study$log_likelihood <- study$Pass * log(study$likelihood) + # If Pass == 1
(1 - study$Pass) * log(1 - study$likelihood) # If Pass == 0
## -----------------------------------------------------------------------------
ggplot(data = study, aes(x = Hours, y = Pass)) +
geom_smooth(method = 'glm', method.args = list(family=binomial(link='logit')), se = FALSE, alpha = 0.2) +
geom_hline(yintercept = 0) +
geom_function(fun = logit, color = 'grey50', size = 1,
args = list(beta0 = optim_binomial$par[1], beta1 = optim_binomial$par[2])) +
geom_segment(aes(xend = Hours, y = 1 - Pass, yend = likelihood, color = factor(Pass)), alpha = 0.5) +
geom_point(aes(y = likelihood, color = factor(Pass), shape = 'Predicted'), size = 3) +
geom_point(aes(color = factor(Pass), shape = 'Observed'), size = 3) +
geom_point(aes(y = log_likelihood, color = factor(Pass), shape = 'Log Likelihood'), size = 3) +
scale_color_brewer('Pass', type = 'qual', palette = 6)
## -----------------------------------------------------------------------------
q <- quantile(study$Hours, probs = seq(0, 1, 0.2))
study$quantile <- cut(study$Hours, breaks = q, include.lowest = TRUE)
probs <- prop.table(table(study$quantile, study$Pass), 1)[,2]
# Since we have probablies of exactly 0 and 1, the log will be undefined unless we fix.
probs[which(probs == 0)] <- .01
probs[which(probs == 1)] <- .99
logits <- log(probs / (1 - probs))
medians <- psych::describeBy(study$Hours, group = study$quantile, mat = TRUE)$median
plot(medians, logits, ylab = 'Log-Odds', xlab = 'Median Hours Studied')
## -----------------------------------------------------------------------------
lr.out <- glm(Pass ~ Hours, data = study, family = binomial(link='logit'))
study$logodds <- lr.out$linear.predictors
ggplot(study, aes(x = logodds, y = Hours)) + geom_point()
car::boxTidwell(logodds ~ Hours, data = study) # Looking for a non-significat p-value
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.