vignette.R
In GAReg: Genetic Algorithms in Regression

## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>",
  warning = FALSE,
  message = FALSE
)

set.seed(12345)

## ----setup--------------------------------------------------------------------
library(GAReg)

## -----------------------------------------------------------------------------
sim_subset_data <- function(n = 60, p = 50, s0 = 25, sigma = 1.5,
                            magnitudes_range = c(0.5, 2),
                            rho = NULL,
                            seed = NULL) {
  stopifnot(n > 0, p > 0, s0 >= 0, s0 <= p, sigma >= 0)
  if (!is.null(seed)) set.seed(seed)

  X <- matrix(rnorm(n * p), n, p)

  # Active set and coefficients
  true_idx <- if (s0 > 0) sort(sample.int(p, s0)) else integer(0)
  signs <- if (s0 > 0) sample(c(-1, 1), s0, replace = TRUE) else numeric(0)
  magnitudes <- if (s0 > 0) runif(s0, magnitudes_range[1], magnitudes_range[2]) else numeric(0)

  beta_true <- numeric(p)
  if (s0 > 0) beta_true[true_idx] <- magnitudes * signs

  if (is.null(rho)) {
    e <- rnorm(n, sd = sigma)
  } else {
    sd_innov <- sigma * sqrt(1 - rho^2)
    burn_in <- 100
    z <- rnorm(n + burn_in, sd = sd_innov)
    e_full <- numeric(n + burn_in)
    for (t in 2:(n + burn_in)) e_full[t] <- rho * e_full[t - 1] + z[t]
    e <- e_full[(burn_in + 1):(burn_in + n)]
  }

  y <- as.numeric(X %*% beta_true + e)

  DF <- data.frame(y = y, as.data.frame(X))
  colnames(DF)[-1] <- paste0("X", seq_len(p))

  list(
    X = X,
    y = y,
    beta_true = beta_true,
    true_idx = true_idx,
    DF = DF,
    rho = if (is.null(rho)) NULL else rho,
    args = list(
      n = n, p = p, s0 = s0, sigma = sigma,
      magnitudes_range = magnitudes_range,
      rho = rho, seed = seed
    )
  )
}

## -----------------------------------------------------------------------------
sim <- sim_subset_data(n = 100, p = 50, s0 = 25, sigma = 1.5, rho = NULL, seed = 123)
y <- sim$y
X <- sim$X
ga <- gareg_subset(
  y = y, X = X, gaMethod = "GA", monitor = FALSE,
  gacontrol = list(
    popSize = 120,
    maxiter = 20000,
    run = 4000,
    pmutation = 0.02
  )
)
summary(ga)

res <- FDRCalc(truelabel = sim$true_idx, predlabel = ga@bestsol, N = 50)
# FALSE Discover Rate
res$fdr
# TRUE Positive Rate
res$tpr

## -----------------------------------------------------------------------------
BIC.cpt <- function(chromosome, Xt) {
  m <- chromosome[1]
  tau <- chromosome[2:(2 + m - 1)]
  N <- length(Xt)

  if (m == 0) {
    mu.hat <- mean(Xt)
    sigma.hatsq <- sum((Xt - mu.hat)^2) / N
    BIC.obj <- 0.5 * N * log(sigma.hatsq) + 2 * log(N)
  } else {
    tau.ext <- c(1, tau, N + 1)
    seg.len <- diff(tau.ext)
    ff <- rep(0:m, times = seg.len)
    Xseg <- split(Xt, ff)
    mu.seg <- unlist(lapply(Xseg, mean), use.names = F)
    mu.hat <- rep(mu.seg, seg.len)
    sigma.hatsq <- sum((Xt - mu.hat)^2) / N
    BIC.obj <- 0.5 * N * log(sigma.hatsq) + (2 * m + 2) * log(N)
  }
  return(BIC.obj)
}

# IID data
set.seed(1234)
n <- 200
et <- rnorm(n)
Xt <- et + rep(c(0, 2, 0, 2), each = n / 4)

library(changepointGA)
GA.res <- cptga(
  ObjFunc = BIC.cpt, N = n, popSize = 500,
  pcrossover = 0.95, pmutation = 0.3, pchangepoint = 10 / n,
  Xt = Xt
)
summary(GA.res)

## -----------------------------------------------------------------------------
library(MASS)
library(splines)

data(mcycle)
head(mcycle)

## -----------------------------------------------------------------------------
g1 <- gareg_knots(
  y = mcycle$accel, x = mcycle$times,
  minDist = 5,
  gaMethod = "cptga",
  cptgactrl = cptgaControl(popSize = 200, pcrossover = 0.9, pmutation = 0.3),
  ic_method = "BIC"
)
summary(g1)

# knots location
g1@bestsol

## -----------------------------------------------------------------------------
g2 <- gareg_knots(
  y = mcycle$accel, x = mcycle$times,
  minDist = 5,
  gaMethod = "cptgaisl",
  cptgactrl = cptgaControl(
    numIslands = 5, popSize = 200, maxMig = 250,
    pcrossover = 0.9, pmutation = 0.3
  ),
  ic_method = "BIC"
)
summary(g2)

## -----------------------------------------------------------------------------
g3 <- gareg_knots(
  y = mcycle$accel, x = mcycle$times,
  fixedknots = 3,
  minDist = 5,
  gaMethod = "cptga",
  cptgactrl = cptgaControl(popSize = 200, pcrossover = 0.9, pmutation = 0.3),
  ic_method = "BIC"
)
summary(g3)

## -----------------------------------------------------------------------------
g4 <- gareg_knots(
  y = mcycle$accel, x = mcycle$times,
  fixedknots = 4,
  minDist = 5,
  gaMethod = "cptgaisl",
  cptgactrl = cptgaControl(
    numIslands = 5, popSize = 200, maxMig = 250,
    pcrossover = 0.9, pmutation = 0.3
  ),
  ic_method = "BIC"
)
summary(g4)

## ----fig.width=7, fig.height=5, out.width="90%", fig.align='center'-----------
y <- mcycle$accel
x <- mcycle$times
x_unique <- unique(x)

tBIC.vary.ga <- g1@bestsol
tBIC.vary.gaisl <- g2@bestsol
tBIC.fix.3.ga <- g3@bestsol
tBIC.fix.4.gaisl <- g4@bestsol

bsfit.vary.ga <- lm(y ~ bs(x, knots = x_unique[g1@bestsol], Boundary.knots = range(x)))
bsfit.vary.gaisl <- lm(y ~ bs(x, knots = x_unique[g2@bestsol], Boundary.knots = range(x)))
bsfit.fix.3.ga <- lm(y ~ bs(x, knots = x_unique[g3@bestsol], Boundary.knots = range(x)))
bsfit.fix.4.gaisl <- lm(y ~ bs(x, knots = x_unique[g4@bestsol], Boundary.knots = range(x)))

plot(x, y, xlab = "Time (ms)", ylab = "Acceleration (g)")
ht <- seq(min(x), max(x), length.out = 200)
lines(ht, predict(bsfit.vary.ga, data.frame(x = ht)), col = "blue", lty = 5, lwd = 2)
lines(ht, predict(bsfit.vary.gaisl, data.frame(x = ht)), col = "orange", lty = 4, lwd = 2)
lines(ht, predict(bsfit.fix.3.ga, data.frame(x = ht)), col = "purple", lty = 3, lwd = 2)
lines(ht, predict(bsfit.fix.4.gaisl, data.frame(x = ht)), col = "#D55E00", lty = 2, lwd = 2)
legend("bottomright",
  legend = c(
    "Varying knots GA",
    "Varying knots island model GA",
    "Fixed 3 knots GA",
    "Fixed 4 knots island model GA"
  ),
  lty = 5:2, lwd = 2,
  col = c("blue", "orange", "purple", "#D55E00"), bty = "n"
)

## ----vary-ppolys--------------------------------------------------------------
g_pp3 <- gareg_knots(
  y = y, x = x,
  minDist = 5,
  gaMethod = "cptga",
  ObjFunc = NULL, # use default varyknotsIC
  type = "ppolys",
  degree = 3, # degree-3 piecewise polynomial
  intercept = TRUE,
  ic_method = "BIC"
)
summary(g_pp3)

## ----vary-ns------------------------------------------------------------------
g_ns <- gareg_knots(
  y = y, x = x,
  minDist = 5,
  gaMethod = "cptga",
  type = "ns", # natural cubic (degree ignored)
  degree = 3, # ignored for "ns"
  intercept = TRUE,
  ic_method = "BIC"
)
summary(g_ns)

## ----vary-bs------------------------------------------------------------------
g_bs1 <- gareg_knots(
  y = y, x = x,
  minDist = 5,
  gaMethod = "cptga",
  type = "bs",
  degree = 1, # linear B-splines
  intercept = TRUE,
  ic_method = "BIC"
)
summary(g_bs1)

## -----------------------------------------------------------------------------
sessionInfo()