In vincnardelli/gradient: Gradient

knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)

With the gradient package is possible to fit linear models using steepest descent or gradient discent with the same flexibility of the \code{lm} default function in R. First of all we load the library.

library(gradient)

In this example we will use a generated dataset but it is possible to use real data too. Moreover we initialize some parameter that will be used in the following functions.

n = 10000
x1 = rnorm(n)
x2 = rnorm(n)

y = 1 + .5*x1 + .2*x2 + rnorm(n)
x <- cbind(x1, x2)
stepsize <- 1e-5
tolerance <- 1e-10
maxit <- 1000
b <- c(0.1, 0.1, 0.1)
data <- data.frame(y=y, x1=x1, x2=x2)

Steepest descent

The first linear model will be fitted using the steepest descent.

sd <- lm_gradient(b=b, formula=y~x1+x2, data=data, maxit, tolerance, fun="sd")
sd

In this particular case, the warning is useful to check that the convergence is reached before the maxiumum number of iteration.

Gradient descent

The gradient descent can be performed easly changing the \code{fun} parameter.

gd <- lm_gradient(b=b, formula=y~x1+x2, data=data, maxit, tolerance, stepsize, fun="gd")
gd

In both of the algorithm we obtain the same results.

Inspect the fit

The gradient package has different function to inspect the fit of the model. Similar to what is available for the R base function lm.

print(sd)
coef(sd)
summary(sd)

Moreover it is possible to easly plot the convergence of the algorithm.

plot(sd)
plot(gd)

Both of the algorithm reach the convergence but the steepest descent is faster (it takes only 30 iterations!). The matrix 'A' inside the object generated by the function lm_gradient contains all the steps of the algorithm for further investigation.

head(sd$A)
head(gd$A)

Moreover, it is possible to predict new values using the method predict.

n1 <- 200
x11 <- rnorm(200)
x21 <- rnorm(200)
x31 <- rnorm(200)

new <- data.frame(x1=x11, x2=x21, x3=x31)

y1 = 1 + .5*x1 + .2*x2 + rnorm(n1)

y_pred <- predict(sd, new)
head(y_pred)

Rcpp & R

It is possible to compare the performances of similar function written with C++ code and R code.

.controls <- function(tolerance, maxit, stepsize){

  if(!is.numeric(tolerance)){
    stop("Tolerance must be numeric")
  }

  if(!is.numeric(maxit)){
    stop("Max Iteration (maxit) must be numeric")
  }

  if(!is.numeric(stepsize)){
    stop("Step size must be numeric")
  }
}

linear_optim <- function(par,                    # beta(0)
                            formula,             # y~x
                            data = NULL,         # data
                            tolerance=1e-5,      # tolerance
                            stepsize = 1e-4,     # stepsize
                            maxit=1e3,          # max iteration, not to run forever
                            fun="sd",            # function
                            verbose=T            # should the function write messages during
) {

  # controls:
  .controls(tolerance, maxit, stepsize)

  if(is.null(data)){

    mod <- model.frame.default(formula)
    y <- model.response(mod, type="numeric")
    x <- as.matrix(cbind(1,subset(mod, select = -y )))

  }else{

    response <- as.character(attr(terms(formula), "variables"))[-1][attr(terms(formula), "response")] #[1] is the list call
    y <- data[,match(response, names(data))]
    vars <- all.vars(formula)[which(all.vars(formula)!= response)]
    mt <- match(vars, names(data))
    x <- model.matrix(~., data[mt])


  }

  i <- 0
  A <- matrix(0, nrow= maxit, ncol=3)

  if(fun=="gd"){
    while(i < maxit){

      if(abs(max(b)) > 1e300) stop("Beta too large: decrease the step size!")

      grad <- 2*t(x)%*%(x%*%b-y)
      bp <- b
      b <- bp - grad*stepsize

      if(max(abs(b - bp)) < tolerance){
        if(verbose) message("Tolerance reached")
        break()
      }

      A[i,] <- b
      i <- i + 1

    }
  }
  if(fun=="sd"){
    while(i < maxit){

    if(abs(max(b)) > 1e300) stop("Beta too large: decrease the step size!")

    hessian <- 4*t(x)%*%x
    grad <- 2*t(x)%*%(x%*%b-y)
    bp <- b
    step <- sum(grad^2)/(t(grad)%*%hessian%*%grad)
    b <- bp - grad%*%step

    if(max(abs(b - bp)) < tolerance){
      if(verbose) message("Tolerance reached")
      break()
    }

    A[i, ] <- b
    i <- i + 1
    }
  }
  return(list(b, A, i))
}

bm <- bench::mark(gd_R = linear_optim(b, y~x1+x2, data, fun="gd"),
                  sd_R = linear_optim(b, y~x1+x2, data, fun="sd"),
                  gd_C = lm_gradient(b, y~x1+x2, data, fun="gd"),
                  sd_C = lm_gradient(b, y~x1+x2, data, fun="sd"),
                  check=FALSE)
print(bm)
ggplot2::autoplot(bm)

As it can be seen, C function are faster, for this reason our package is based on them.

Cross validation

The gradient package has built-in two different methods for the cross validation: k-fold and leave-one-out. As previously descripted, both methods are available for steepest descent or gradient descent.

K-fold

gd_cv <- lm_gradient_cv(5, b=b, formula=y~x1+x2, data=data, maxit, tolerance, fun="gd", parallel = FALSE)

In this example we performed the k-fold cross validation with k=5. It is possible to validate the results compairing the RMSE, MAE and MedianAE error.

print(gd_cv)
summary(gd_cv)
plot(gd_cv)

Leave-one-out

Similarly it is possbile to perform the leave-one-out cross validation with the following function:

gd_looc <- lm_gradient_looc(b=b, formula=y~x1+x2, data=data, maxit, tolerance, fun="gd", parallel = FALSE)

and look at the results:

print(gd_looc)
# summary(gd_looc)
# plot(gd_looc)

Parallelization

Both of the cross validation are available in parallel taking advantage of multi-core for faster computation setting the parameter parallel to TRUE.

gd_cv_par <- lm_gradient_cv(5, b=b, formula=y~x1+x2, data=data, maxit, tolerance, fun="gd", parallel = TRUE)

bm1 <- bench::mark(
  parallel=lm_gradient_cv(2500, b=b, formula=y~x1+x2, data=data, maxit, tolerance, fun="sd", parallel = TRUE),
  sequential = lm_gradient_cv(2500, b=b, formula=y~x1+x2, data=data, maxit, tolerance, fun="sd", parallel = FALSE),
  check = FALSE
)

print(bm1)
ggplot2::autoplot(bm1)