R/hierbasis.R

#' Univariate Nonparametric Regression Estimation
#' via a Hierarchical Penalty
#'
#' @details Solves the univariate minimization problem (see Haris et al. 2016)
#' \deqn{argmin_{\beta} (1/2n) || y - \Psi_K \beta ||^2_2 + \lambda\Omega(\beta; m),}
#' where \eqn{\beta} is a real-valued vector of length \eqn{K = } \code{nbasis},
#' \eqn{\Psi_K} is a \eqn{n\times K} real-valued matrix whose columns
#' correspond to the basis expansion of \eqn{x} (ordered in increasing)
#' complexity. The penalty function \eqn{\Omega} is defined as
#' \deqn{\Omega(\beta; m) = \sum^K_{k = 1} w_{k, m} || \Psi_{k:K} \beta_{k:K} ||_2,}
#' for \eqn{\Psi_{k:K}} denotes the submatrix of \eqn{\Psi_K} corresponding to
#' the final columns \eqn{k, k + 1, ..., K}, and \eqn{\beta_{k:K}}
#' the subvector of \eqn{\beta} corresponding to the final elements
#' \eqn{k, k + 1, ..., K}. The weights \eqn{w_{k, m}} in \eqn{\Omega}
#' are given by the m - 1 degree polynomial
#' \deqn{w_{k, m} = k^m - (k - 1)^m},
#' controlling the level of decay on higher order estimates of \eqn{\beta}.
#'
#' @param x A univariate vector representing the predictor.
#' @param y A univariate vector respresenting the response.
#' @param nbasis The number of basis functions to be used in the
#'               basis expansion of \code{x}. Default:
#'               \code{nbasis = length(y)}.
#' @param max.lambda The largest value of \eqn{\lambda} penalizing
#'                   the hierarchical penalty. Default:
#'                   \code{max.lambda = NULL} (computed data-adaptively).
#' @param lam.min.ratio The ratio of the smallest value of \eqn{\lambda}
#'                      to the largest \eqn{\lambda}.
#'                      Default: \code{lam.min.ratio = 1e-04}.
#' @param nlam The number \eqn{\lambda}'s to compute the \code{hierbasis}
#'             estimators (computed on a base-10 \eqn{log} scale). Default:
#'             \code{nlam = 50}.
#' @param m.const Smoothing parameter controlling the degree of polynomial
#'                smoothing/decay performed by the weights in the penalty
#'                \eqn{\Omega}.Default: \code{m.const = 3}. Only relevant if
#'                \code{weights} is left unspecified.
#' @param type Specifies either Gaussian regression (\code{"gaussian"})
#'             with a continuous response vector or binomial regression
#'             (\code{"binomial"}) with binary response. Default type is
#'             assumed to be Gaussian.
#' @param weights (New parameter for \code{hierbasis2}) Permits user-specified
#'                weights \eqn{w_{k, m}}. If left unspecified
#'                (\code{weights = NULL}) then the default is set to
#'                \eqn{w_{k, m} = k^m - (k - 1)^m}.
#' @param basis.type (New parameter for \code{hierbasis2}) Desired basis
#'                   functions for the basis expansion of \code{x}. Specifies
#'                   either a polynomial expansion \code{basis.type = "poly"},
#'                   trigonometric expansion \code{basis.type = "trig"}, or
#'                   wavelet expansion \code{basis.type = "wave"}. Default set
#'                   to a polynomial expansion.
#'
#' @return Returns an object of class \code{hierbasis} with elements:
#'
#' \item{x, y}{The original \code{x} and \code{y} used as inputs.}
#' \item{m.const}{The parameter \code{m.const} used for smoothing.}
#' \item{nbasis}{The maximum number of basis functions used for computing
#' the basis expansion of \code{x}.}
#' \item{basis.expansion}{The \code{n * nbasis}-matrix containing the basis
#' expansion of \code{x}, with column indices corresponding to
#' increasing basis function complexity.}
#' \item{basis.expansion.means}{The \code{nbasis}-vector containing the
#' columnwise means of the basis expansion.}
#' \item{ybar}{Mean of the response vector.}
#' \item{lambdas}{Sequence of tuning parameters \eqn{lambda} used for
#' penalizing the fits.}
#' \item{intercept}{The \code{nlam}-vector of estimated intercepts \eqn{\beta_0}.}
#' \item{beta}{The \code{nbasis * nlam}-matrix of estimated linear coefficients.}
#' \item{fitted.values}{The \code{nbasis * nlam}-matrix of fitted responses.}
#' \item{weights}{The weights used for smoothing/penalizing the basis functions.}
#' \item{active}{The size of the active set (number of nonzero \eqn{\beta}).}
#' \item{type}{The specified family \code{'gaussian'} or \code{'binomial'}.}
#' \item{basis.type}{Specified basis expansion family, polynomial,
#' trigonometric, or wavelet.}
#'
#' @export
#'
#' @author Annik Gougeon,
#' David Fleischer (\email{david.fleischer@@mail.mcgill.ca}).
#' @references
#' Haris, A., Shojaie, A. and Simon, N. (2016). Nonparametric Regression with
#' Adaptive Smoothness via a Convex Hierarchical Penalty. Available on request
#' by authors.
#'
#' @seealso The original \code{HierBasis} function, as implemented by
#' Haris et al. (2016) can be found via
#' \url{https://github.com/asadharis/HierBasis/}.
#'
hierbasis <- function(x, y,
                      nbasis        = length(y),
                      max.lambda    = NULL,
                      lam.min.ratio = 1e-04,
                      nlam          = 50,
                      m.const       = 3,
                      type          = c("gaussian", "binomial"),
                      weights       = NULL,
                      basis.type    = c("poly", "trig", "wave"))
{
  # To do:
  #   * Implement binomial hierbasis estimator.
  #   * Consider multicategory responses (not in HierBasis).
  #   * Add max.iter, tol, max.iter.inner, tol.inner parameters (only
  #     used in the 'binomial' case).
  #   * Implementing nonpolynomial basis expansion (wavelet, trig, etc).
  #   * Implementing other weight structures.

  # extract number of observations
  n <- length(y)

  basis.expand.out <- basis.expand(x, nbasis, basis.type)
  nbasis <- basis.expand.out$nbasis
  PSI    <- basis.expand.out$PSI
  PSI.c  <- basis.expand.out$PSI.c
  PSIbar <- basis.expand.out$PSIbar

  # center responses
  ybar <- mean(y)
  y.c <- y - ybar

  # compute penalty weights
  w <- check.weights(weights, nbasis, m.const)

  # define weight matrix data structrue (which will include
  # the tuning parameter lambda)
  # rows correspond to a single weight w_k value
  # columns corresponds to a single tuning parameter lambda
  w.mat <- matrix(rep(w, nlam), ncol = nlam)

  if (is.null(max.lambda)) max.lambda <- NA

  # create empty the hierbasis object to be returned
  out <- list(); class(out) <- "hierbasis"

  if (type[1] == "gaussian") {
    # linear regression analogue

    # compute and extract fits
    hierbasis.fit <- solvehierbasis(PSI.c, y.c, w, w.mat, n,
                                    lam.min.ratio, nlam, max.lambda)
    betahat <- hierbasis.fit$beta

    # compute intercepts for each fitted model
    intercept <- as.vector(ybar - PSIbar %*% betahat)

    # get size of active set per lambda (nonzero coefficients)
    active.set <- colSums((betahat != 0) * 1)

    # compute fitted response
    yhat <- apply(PSI %*% betahat, 1, "+", intercept)

    out$x                     <- x
    out$y                     <- y
    out$m.const               <- m.const
    out$nbasis                <- nbasis
    out$basis.expansion       <- PSI
    out$basis.expansion.means <- PSIbar
    out$ybar                  <- ybar

    out$lambdas       <- as.vector(hierbasis.fit$lambda)
    out$intercept     <- intercept
    out$beta          <- betahat
    out$fitted.values <- t(yhat)
    out$weights       <- w
    out$active        <- active.set
    out$type          <- type[1]
    out$basis.type    <- basis.type[1]
    out$call          <- match.call()

  } else if (type[1] == "binomial") {
    # logistic regression analogue
    stop("Error in hierbasis2::hierbasis(). Parameter 'type = \"binomial\"'
         not yet implemented.")
  } else {
    stop("Error in hierbasis2::hierbasis(). Parameter 'type' only available
         for 'gaussian' or 'binomial'")
  }

  return (out)
}
dfleis/hierbasis2 documentation built on May 17, 2019, 7:03 p.m.