# Title : TODO
# Objective : TODO
# Created by: jonlachmann
# Created on: 2021-04-21
#' Gradient for GLM
#'
#' @param x A matrix containing the covariates (including an intercept if one wants to use one).
#' @param y The dependent variable.
#' @param theta The coefficient vector to use.
#' @param idx The observation indices to use.
#' @param family A glm family for the distribution to use, i.e. "binomial()".
#'
#' @export glm.grad
glm.grad <- function (x, y, theta, idx, family) {
# TODO: Does not work for the Gamma family yet
m <- length(idx)
eta <- family$linkinv(x[idx,,drop=F]%*%theta)
return(t(x[idx,,drop=F])%*%(eta-y[idx])/m)
}
#' (Batch) (Stochastic) Gradient Descent for GLM
#'
#' @param x A matrix containing the covariates (including an intercept if one wants to use one).
#' @param y The dependent variable.
#' @param family A glm family for the distribution to use, i.e. "binomial()".
#' @param sgd.ctrl control parameters to pass to the sgd function (see ?sgd for details).
#'
#' @export glm.sgd
glm.sgd <- function (x, y, family, sgd.ctrl=NULL) {
if (family$family == "binomial") grad.fun <- logisticG
else if (family$family == "gaussian") grad.fun <- gaussianG
else grad.fun <- function (theta, data, sub_idx) {
glm.grad(data[,-1,drop=F], data[,1], sub_idx, theta, family)
}
sgd_res <- sgd(grad.fun, data=cbind(y,x), sgd.ctrl)
sgd_res$coefficients <- sgd_res$x
return(sgd_res)
}
#' (Batch) (Stochastic) Gradient Descent
#'
#' @param gradient The gradient of the objective function.
#' @param data The dataset to use for the gradient, if applicable, if NULL, no data is used.
#' @param ctrl A list of control parameters.
#' @details The control parameters that can be set are
#' \itemize{
#' \item{"start" }{The starting point, NULL means random around 0.}
#' \item{"alpha" }{The initial step size, defaults to 0.0005.}
#' \item{"decay" }{The exponential decay of the step size per iteration, defaults to 0.999.}
#' \item{"subs" }{The subsample percentage to use at each iteration, defaults to 1 (100\%).}
#' \item{"maxit" }{The maximum number of iterations to run for, defaults to 1000.}
#' \item{"histfreq" }{The number of iterations between stored history frames, defaults to 50.}
#' }
#'
#' @export sgd
sgd <- function (gradient, data=NULL, ctrl=list(start, alpha, decay, subs, maxit, histfreq)) {
# Initialize coefficients from start or randomly
if (is.null(ctrl$start)) x <- rnorm(ncol(data)-1)
else x <- ctrl$start
nvars <- length(x)
# Set up default values if not set
if (is.null(ctrl$alpha)) ctrl$alpha <- 0.0005
if (is.null(ctrl$decay)) ctrl$decay <- 0.999
if (is.null(ctrl$subs)) ctrl$subs <- 1
if (is.null(ctrl$maxit)) ctrl$maxit <- 1000
if (is.null(ctrl$histfreq)) ctrl$histfreq <- 50
# Get observation count and set subsample size
if (!is.null(data)) {
nobs <- nrow(data)
sub_size <- nobs*ctrl$subs
}
# Set decay
if (is.null(ctrl$decay)) ctrl$decay <- (maxit-2)/maxit
# Set up matrix for history
xhist <- matrix(NA, ctrl$maxit/ctrl$histfreq+1, nvars)
xhist[1,] <- x
# Run (S)GD
for (i in 1:ctrl$maxit) {
if (ctrl$subs != 1) sub_idx <- sample(1:nobs, sub_size, replace=T)
else sub_idx <- 1:nobs
ctrl$alpha <- ctrl$alpha * ctrl$decay
if (!is.null(data)) x <- x - ctrl$alpha * gradient(x, data, sub_idx)
else x <- x - ctrl$alpha * gradient(x)
if (i %% ctrl$histfreq == 0) xhist[i/ctrl$histfreq+1,] <- x
}
return(list(x=x, xhist=xhist))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.