#' fit linear model with elasticnet using coordinate descent algorithm.
#'
#' @description Fit a linear model via penalized maximum likelihood. The regularization path is computed for the elasticnet penalty at a grid of values for the regularization parameter lambda, using coordinate descent algorithm.
#' @param X input matrix, of dimension nobs x nvars; each row is an observation vector.
#' @param y response variable.
#' @param alpha The elasticnet mixing parameter, with \eqn{0\le \alpha \le 1}. The penalty is defined as \deqn{(1-\alpha)/2||\beta||_2^2+\alpha ||\beta||_1} \eqn{\alpha=1} is the lasso penalty, and \eqn{\alpha=0} the ridge penalty.
#' @param maxit Maximum number of passes over the data for all lambda values; default is 10^5.
#' @param eps Convergence threshold for coordinate descent. Each inner coordinate-descent loop continues until the maximum change in the objective after any coefficient update is less than \code{eps} times the null deviance. Defaults value is 1E-7.
#' @param lambda.length The length of \code{lambda} sequence. Default is 100. The largest \code{lambda} is the smallest value which shrink all coefficients to zero. The smallest \code{lambda} is the largest \code{lambda} times 0.001. The \code{lambda} sequence is constructed from the largest \code{lambda} to the smallest \code{lambda} of length \code{lambda.length}.
#' @details The sequence of models implied by \code{lambda} is fit by coordinate descent. The objective function is \deqn{1/2 RSS/nobs + \lambda* penalty}.
#' @return \code{beta} a list of fitted coefficients corresponding to the decreasing \code{lambda} sequence.
#'
#' \code{lambda} the actural sequence of \code{lambda} values used. When alpha=0, the largest lambda reported does not quite give the zero coefficients reported (lambda=inf would in principle). Instead, the largest lambda for alpha=0.01 is used, and the sequence of lambda values is derived from this.
#' @export
#' @examples
#' # Gaussian
#' x=matrix(rnorm(100*20),100,20)
#' y=rnorm(100)
#' fit1=elnet_coord(x,y)
#' plotpath(fit1,x)
elnet_coord = function(X, y, alpha=1, lambda.length=100, maxit = 10000, eps = 1e-5){
# standardization
n = length(y)
p = dim(X)[2]
y_mean = mean(y)
X_mean = apply(X, 2, mean)
X_sd = apply(X, 2, stats::sd)
y = scale(y, scale = FALSE)
X = scale(X)
# lambda
if (alpha >0) lambda_max = max(apply(X, 2, function(x){sum(x*y)})) / (n * alpha) * 1.1
else lambda_max = max(apply(X, 2, function(x){sum(x*y)})) / (n * 0.01) * 1.1
lambda_min = lambda_max * 0.001
lambda_seq = seq(from = lambda_max, to = lambda_min, length.out = lambda.length)
k = 1
beta_list = list()
max_iter_logic = FALSE
for (m in 1:lambda.length){
if(max_iter_logic) break
lambda = lambda_seq[m]
if (m==1) beta = rep(0,p) else beta = beta_list[[m-1]]
k = 1
repeat{
# a copy to have new update
beta1 = beta
for (j in 1:p){
const = mean(y - X %*% beta1)
r = y - X %*% beta1 - const
term1 = sum(r*X[,j])/n + beta1[j]
beta1[j] = S_opera(term1, lambda * alpha)/(1 + lambda * (1-alpha) )
}
k = k+1
if (abs(max(beta1 - beta)) < eps) break
if (k > maxit){
print(paste('max iteration achieved after', m, 'lambda values'))
max_iter_logic = TRUE
break
}
# update the parameter
beta = beta1
}
# store the estimation for m-th lambda value
beta_list[[m]] = beta1
}
beta_list_org = sapply(beta_list, function(x){x / X_sd})
return(list(beta = beta_list_org, lambda = lambda_seq))
}
#' leveraging methods for linear regression.
#' @description implements algorithmic leveraging for linear regression using uniform and leverage score based subsampling of rows.
#' @param x one-dimensional observations.
#' @param y response variable.
#' @param r the size of random sample.
#' @param draws number of draws of subsmaples.
#' @param seed seed number for reproducible experiments.
#' @details This algorithm approximates the linear regression coefficient in a dataset of sample size n using only a randomly selected subset of size r<<n. Selecting r samples uniformly at random often produces biased estimate, while selecting samples with probability proportional to their leverage scores largely alleviates this problem.
#' @return \code{beta_unif} estimated coefficients matrix of all draws using uniform subsampling.
#'
#' \code{beta_blev} estimated coefficients matrix of all draws using leverage score based subsampling
#' @export
#' @examples
#' x = rt(500, df=6)
#' y = -x + rnorm(500)
#' est = algo_average(x,y,r=50)
algo_average = function(x, y, r, draws=500, seed=101){
beta_unif = array(0, dim = c(draws, 2))
colnames(beta_unif) = c('intercept', 'slope')
beta_blev = array(0, dim = c(draws, 2))
colnames(beta_blev) = c('intercept', 'slope')
l = length(x)
hii = diag(matrix(x, nrow = l) %*% matrix(x, nrow = 1)/sum(x^2))
hii = hii/sum(hii)
set.seed(seed)
for (j in 1:draws){
sample_unif = sample(1:500, r)
beta_unif[j,] = stats::lm(y[sample_unif] ~ x[sample_unif])$coefficients
sample_blev = sample(1:500, r, prob = hii)
W = 1/hii[sample_blev]
W = W/sum(W)
beta_blev[j,] = stats::lm(y[sample_blev] ~ x[sample_blev], weights = W)$coefficients
}
return(list(beta_unif = beta_unif, beta_blev = beta_blev))
}
#' Iteratively solve linear system of equations.
#' @description solve linear system of equations using Gauss-Seidal or Jacobi.
#' @param A a numeric matrix containing the coefficients of the linear system.
#' @param b a numeric vector giving the right-hand side of the linear system.
#' @param x0 the intial solution vector.
#' @param method a character string indicating which iterative method is to use. Either 'Gauss-Seidel' or 'Jacobi'.
#' @param parallelize Logical. only useful when \code{method == 'Jacobi'}. Should Jacobi method be implemented in parallel(TRUE) or not(default=FALSE).
#' @param cores The number of cores to use for parallel execution. Default is 1.
#' @param maxit Maximum number of passes over the data for all lambda values; default is 15000.
#' @param eps Convergence threshold. Defaults value is 1E-7.
#' @return \code{x.last} solution vector found in the last iteration.
#'
#' \code{x.all} a list of solutions of all iterations.
#'
#' \code{time} a vector recording time spent at iterations.
#' @import foreach
#' @export
#' @examples
#' A = matrix(0, nrow = 100, ncol = 100)
#' diag(A) = rep(2, 100)
#' for (i in 1:100){
#' for (j in 1:100){
#' if(abs(i-j)==1) A[i,j] = -1
#' }
#' }
#' v = rep(c(1,0), 50)
#' est = solve_ols(A = A, b = A%*%v, method = 'Gauss-Seidel')
#' print(max(abs(est$x.last - v)))
solve_ols = function(A, b, x0=rep(0,length(b)), method=c('Gauss-Seidel', 'Jacobi'),
maxit=15000, eps=1e-5, parallelize = FALSE, cores = 1){
method = match.arg(method)
if (any(diag(A) == 0)) stop("0 can't appear in diagonal entries of A")
if (!(method %in% c('Gauss-Seidel', 'Jacobi'))) stop('select either Gauss-Seidel or Jacobi')
p = dim(A)[1]
a = proc.time()[3]
if (method == 'Gauss-Seidel'){
D = diag(diag(A))
L = -A
L[upper.tri(A, diag = TRUE)] = 0
U = -A
U[lower.tri(A, diag = TRUE)] = 0
T1 = solve(D-L) %*% U
print(paste("the relevent spectral norm is", max(svd(T1)$d)))
if (max(svd(T1)$d)>=1){
print("the relevent spectral norm is larger than 1, algorithm will not converge")
return(NULL)
}
c = solve(D-L) %*% b
k = 1
x.list = list()
x.list[[1]] = T1 %*% x0 + c
elapse.time = proc.time()[3] - a
repeat{
k = k+1
x.list[[k]] = T1 %*% x.list[[k-1]] + c
elapse.time = c(elapse.time, proc.time()[3]-a)
if ( max(abs( x.list[[k]] - x.list[[k-1]] )) < eps) break
if (k > maxit){
break
warning("max iterations reached")
}
}
}
if (method == 'Jacobi'){
Dinv = diag(1/diag(A))
L = -A
L[upper.tri(A, diag = TRUE)] = 0
U = -A
U[lower.tri(A, diag = TRUE)] = 0
T1 = Dinv %*% (L+U)
print(paste("the relevent spectral norm is", max(svd(T1)$d)))
if (max(svd(T1)$d)>=1){
print("the relevent spectral norm is larger than 1, algorithm will not convergence")
return(NULL)
}
if (parallelize){
doParallel::registerDoParallel(cores)
k = 1
x.list = list()
elapse.time = NULL
repeat{
xnew <- foreach::foreach(i = 1:p, .combine='c') %dopar% {
(b[i] - sum(A[i,-i] * x0[-i]))/A[i,i]
}
elapse.time = c(elapse.time, proc.time()[3] - a)
x.list[[k]] = xnew
if ( max(abs( xnew - x0 )) < eps) break
if (k > maxit){
break
warning("max iterations reached")
}
k = k+1
x0 = xnew
}
}else{
c = Dinv %*% b
k = 1
x.list = list()
x.list[[1]] = T1 %*% x0 + c
elapse.time = proc.time()[3] - a
repeat{
k = k+1
x.list[[k]] = T1 %*% x.list[[k-1]] + c
elapse.time = c(elapse.time, proc.time()[3]-a)
if ( max(abs( x.list[[k]] - x.list[[k-1]] )) < eps) break
if (k > maxit){
break
warning("max iterations reached")
}
}
}
}
return(list(x.last=rev(x.list)[[1]], x.all=x.list, time = elapse.time))
}
S_opera = function(z,r){
sign(z) * max(c(abs(z)-r, 0))
}
#' plot coefficients from fit
#'
#' @param fit fitted object from
#' @param x original input matrix
#'
#' @export
#'
plotpath <- function(fit, x){
est = fit
p = dim(x)[2]
ylim_beta = range(est$beta)
graphics::plot(y = est$beta[1,], x = log(est$lambda), ylim = ylim_beta + c(-0.1,0.1), col = 1, type = 'l', xlab = "Log Lambda", ylab = "Coefficients", main = "Solution path")
for (i in 2:p){
graphics::points(y = est$beta[i,], x = log(est$lambda), col = i, type = 'l')
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.