R/lm_ridge_lambda_best.R

Defines functions lm_ridge_lambda_best

Documented in lm_ridge_lambda_best

#'choose the best lambda in ridge regression (self-defined)
#'
#'This function is used to select the optimal lambda in ridge regression using my own method.
#'
#'@param formula a symbolic description of the model to be fitted.
#'@param data a dataframe containing the variables in the model.
#'
#'@return the best lambda in ridge regression given by my own method.
#'
#'@examples
#'lm_ridge_lambda_best(Sepal.Width~.,iris)
#'
#'@export

lm_ridge_lambda_best <-
  function(formula,data)
  {
    #extract X and y from the formula and data
    mf<-model.frame(formula,data)
    mt<-attr(mf,"terms")
    X <- model.matrix(mt, mf)
    y <- as.matrix(model.response(mf))
    #split 70% of the data as train and 30% as test
    set.seed(1)
    smp_siz = floor(0.70*nrow(data))
    train = sample(seq_len(nrow(data)),size = smp_siz)
    X_train<-X[train,]
    y_train<-y[train,]
    X_test<-X[-train,]
    y_test<-y[-train,]
    #set a sequence of lambda as candidates for the best one
    lambda_vals<- seq(0, (0.70*nrow(data))*2, length.out = 500)
    #use the train data to estimate the parameter
    svd_obj <- svd(X_train)
    U <- svd_obj$u
    V <- svd_obj$v
    svals <- svd_obj$d
    k <- length(lambda_vals)
    ridge_beta <- matrix(NA_real_, nrow = k, ncol = ncol(X))
    for (j in seq_len(k))
    {
      D <- diag(svals / (svals^2 + lambda_vals[j]))
      ridge_beta[j,] <- V %*% D %*% t(U) %*% (y_train)
    }
    #use the test data to choose the optimal lambda
    y_hat <- tcrossprod(X_test, ridge_beta)
    mse <- apply((y_hat - y_test)^2, 2, mean)
    lambda_best<-lambda_vals[which.min(mse)]
    lambda_best
  }
Jiachen1027/bis557 documentation built on Oct. 30, 2019, 7:41 p.m.