ml-practice-r: Practices of ML

Documented in logit_example_01

#' Logistic Regression example 01
#'
#' based on http://www.ats.ucla.edu/stat/r/dae/logit.htm
#' @export

logit_example_01 <- function() {
  flog.info("Logistic Regression example 01")
  data <- read.csv("./data/logit/logit-binary.csv")
  flog.info("read data from csv file.")
  h <- head(data)
  summary(data)
  sapply(data, sd)
  xtabs(~ admit + rank, data = data)
  
  # prepare data
  data$rank <- factor(data$rank)
  is.factor(data$rank)
  
  mylogit <- glm(admit ~ gre + gpa + rank, data = data, family = "binomial")
  summary(mylogit)

  ## CIs using profiled log-likelihood
  confint(mylogit)
  
  ## CIs using standard errors
  confint.default(mylogit)
  
  aod::wald.test(b = coef(mylogit), Sigma = vcov(mylogit), Terms = 4:6)
  
  l <- cbind(0,0,0,1,-1,0)
  aod::wald.test(b = coef(mylogit), Sigma = vcov(mylogit), L = l)
  
  ## odds ratios only
  exp(coef(mylogit))
  
  ## odds ratios and 95% CI
  exp(cbind(OR = coef(mylogit), confint(mylogit)))
  
  # predict
  newdata1 <- with(data, data.frame(gre = mean(gre), gpa = mean(gpa), rank = factor(1:4)))
  
  ## view data frame
  newdata1
  
  newdata1$rankP <- predict(mylogit, newdata = newdata1, type = "response")
  newdata1
  
  newdata2 <- with(data,
                   data.frame(gre = rep(seq(from = 200, to = 800, length.out = 100), 4),
                              gpa = mean(gpa), rank = factor(rep(1:4, each = 100))))
  typeof(newdata2)
  attributes(newdata2)
  nrow(newdata2)
  
  newdata3 <- cbind(newdata2, predict(mylogit, newdata = newdata2, type="link", se=TRUE))
  newdata3 <- within(newdata3, {
    PredictedProb <- plogis(fit)
    LL <- plogis(fit - (1.96 * se.fit))
    UL <- plogis(fit + (1.96 * se.fit))
  })
  
  ggplot(newdata3, aes(x = gre, y = PredictedProb)) +
    geom_ribbon(aes(ymin = LL, ymax = UL, fill = rank), alpha = .2) +
    geom_line(aes(colour = rank), size=1)
}