lab4sidjac: Linear regression

Documented in ridgereg

#' Fitting a ridge regression
#' @description \code{ridgereg} is used to fit models ridge regression..
#' @param formula an object of class \code{\link[stats]{formula}}.
#' @param data a data frame containing all the variables in the regression model stated in the argument \code{formula}.
#' @param lambda a scalar or vector of ridge constants.
#' @param QR a boolean if TRUE use QR decomposition, default = FALSE
#' @return \code{ridgereg} return a named list of class "ridgereg".
#' \itemize{
#' \item{\code{call_arg}}{ return the arguments used to perform the ridge regression}
#' \item{\code{y}}{ a vector with the response variable data used}
#' \item{\code{X}}{ the dependent variables expressed with \code{\link[stats]{model.matrix}}}
#' \item{\code{beta_hat}}{ a one column matrix with the coefficients}
#' \item{\code{sigma_2_hat}}{ the residual variance in a 1x1 matrix}
#' \item{\code{y_hat}}{ a one column matrix with the fitted values}
#' \item{\code{e_hat}}{ a one column matrix with the residuals}
#' \item{\code{var_beta_hat}}{ the covariance matrix for the coefficients}
#' \item{\code{t_beta}}{ a matrix with t-values for each coefficient}
#' \item{\code{p_value}}{ a matrix with p-values for each coefficient}
#' \item{\code{data}}{ return the data.frame stated in the argument \code{data}}
#' \item{\code{formula}}{ return the \code{formula} given to the function}}
#' @examples
#' ridgereg(formula = Petal.Length~Species, data = iris, lambda =0, QR = FALSE)
#' @references https://en.wikipedia.org/wiki/Tikhonov_regularization
#' @export

ridgereg<- function(formula, data, lambda, QR = FALSE){
  call_arg <- match.call()
  # Data pre-processing
  data <- stats::model.frame(formula, data)
  y <- stats::model.response(data)
  X <- stats::model.matrix(attr(data,"terms"),data)

  if(QR==FALSE){
    beta_hat <- solve(t(X) %*% X +lambda*diag(ncol(X))) %*% t(X) %*% y
    } else if(QR==TRUE ){ # QR-decomposition
      A <- X
      n <- nrow(A)
      p <- ncol(A)
      Q <- diag(n)

      vec_norm <- function(x){ sqrt(sum(x^2))}

      for(i in 1:p){
        v <- matrix(0,nrow=n,ncol=1)
        v[i:n] <- A[i:n,i]
        v[i] <- v[i] + vec_norm(v) *sign(v[i])

        H <- diag(n) - 2*(v %*% t(v))/c((t(v) %*% v))
        A <- H %*% A
        Q <- Q %*% H
        }

      Q <- Q[,1:p]
      R <- matrix(0, ncol=p,nrow=p)
      R[upper.tri(R,diag=TRUE)] <- A[1:p,][upper.tri(R,diag=TRUE)]

      beta_hat <- (solve(R) %*% t(Q) %*% y)
      rownames(beta_hat) <- colnames(X)
      }

  y_hat <-  X %*% beta_hat
  e_hat <- y - y_hat
  df <- nrow(data) - ncol(X)
  sigma_2_hat <- (t(e_hat) %*% e_hat) / df
  var_beta_hat <- sigma_2_hat[1] * solve(t(X) %*% X)
  t_beta <- beta_hat / sqrt(diag(var_beta_hat))
  p_value <- 2*stats::pt(abs(t_beta), 147, lower.tail = FALSE)
  ridge <- list(call_arg = call_arg, y = y, X = X, beta_hat = beta_hat,
                y_hat = y_hat, e_hat = e_hat, df = df, sigma_2_hat = sigma_2_hat,
                var_beta_hat = var_beta_hat, t_beta = t_beta, p_value = p_value,
                data = data, formula=formula)

  class(ridge) <- "ridgereg"
  invisible(ridge)
}