knitr::opts_chunk$set(echo = FALSE)
# rmdhelp::show_knit_hook_call()
knitr::knit_hooks$set(hook_convert_odg = rmdhelp::hook_convert_odg)

Why Model Selection

# produce output for two models
# s_ex04p01_data_path <- "https://charlotte-ngs.github.io/asmss2022/data/asm_bw_flem.csv"
# s_ex04p01_data_path <- file.path(here::here(), "docs", "data", "asm_bw_flem.csv")
# tbl_ex04p01_data <- readr::read_csv(file = s_ex04p01_data_path)
# lm_ex04p01_bwbc <- lm(formula = `Body Weight` ~ `Breast Circumference`, data = tbl_ex04p01_data)
# summary(lm_ex04p01_bwbc)
# lm_ex04p01_bwbreed <- lm(formula = `Body Weight` ~ Breed, data = tbl_ex04p01_data)
#rmdhelp::use_odg_graphic(ps_path = "odg/comp-two-models.odg")
knitr::include_graphics(path = "odg/comp-two-models.png")

Full Model

if (params$isonline){
  s_ex04p01_data_path <- "https://charlotte-ngs.github.io/asmss2022/data/asm_bw_flem.csv"
} else {
  s_ex04p01_data_path <- file.path(here::here(), "docs", "data", "asm_bw_flem.csv")
}
tbl_ex04p01_data <- readr::read_csv(file = s_ex04p01_data_path)
summary(lm(formula = `Body Weight` ~ `Breast Circumference` + BCS + HEI + Breed, data = tbl_ex04p01_data))
#rmdhelp::use_odg_graphic(ps_path = "odg/show-full-model.odg")
knitr::include_graphics(path = "odg/show-full-model.png")

Best Model

$$R^2 = \frac{||\hat{y} - \bar{y}||^2}{||y - \bar{y}||^2}$$

$$R_{adj}^2 = 1 - (1 - R^2) \frac{n-1}{n-p-1}$$

Finding the Best Model

Alternative Model Selection Criteria

$$C_p(\mathcal{M}) = \frac{SSE(\mathcal{M})}{\hat{\sigma}^2} - n + 2 |\mathcal{M}|$$

Forward Selection

  1. Start with the smallest model $\mathcal{M}_0$
  2. Include the predictor variable which reduces the residual sum of squares the most.
  3. Continue with step 2 until all predictor variables have been chosen
  4. Choose the model with the smallest $C_p$ value.

Backward Elimination

  1. Start with the full model
  2. Exclude the predictor variable increases the residual sum of squares the least.
  3. Continue with step 2 until all predictor values have been deleted
  4. Choose the model which has the smallest $C_p$ value.

Example



charlotte-ngs/asmss2022 documentation built on June 7, 2022, 1:33 p.m.