R/PLS.R

Defines functions plot.gpls rankest.gpls residuals.gpls dev.res .threshpls sgpls summary.gplsSoftmax print.gplsSoftmax predict.gplsSoftmax gplsSoftmax print.gpls summary.gpls predict.gpls gpls.formula gpls.default .probcalc .yinitfunc .variance_gpls .linkfun_gpls .mueta_gpls .linkinv_gpls gpls

Documented in dev.res gpls gpls.default gpls.formula gplsSoftmax .linkfun_gpls .linkinv_gpls .mueta_gpls plot.gpls predict.gpls predict.gplsSoftmax print.gpls print.gplsSoftmax .probcalc rankest.gpls residuals.gpls sgpls summary.gpls summary.gplsSoftmax .variance_gpls .yinitfunc

#' gpls: Generalized Projection to Latent Structues
#'
#' This implements partial least squares regression (projection to latent structures) with the addition of
#' poisson and binomial likelihood functions. Only univariate responses are supported.
#'
#' @title gpls: Generalized Projection to Latent Structues
#' @param x argument
#' @param ... other arguments
#' @examples
#' gpls()
#'
#' @rdname gpls
#' @export gpls
gpls <- function(x, ...){
  UseMethod("gpls")
}


#' inverse link functions for gpls models
#'
#' @param x input
#' @param family family name
#' @param link link function
#'
#' @export .linkinv_gpls
#' @return the linear predictor transformed to the scale of the observations
#' @examples
#' .linkinv_gpls(x, family="gaussian", link="identity")
#' @keywords internal
.linkinv_gpls <- function(x, family="gaussian", link="identity") {

  if (family=="gaussian"  ){
    if (link=="identity") x
    else cat("link function not recognized for ", family, "only identity is available!\n")
  }
  else if (family=="binomial") {
    if (link=="logit") {
      ifelse(is.infinite(exp(x)), 1, plogis(x))
    }
    else if (link=="probit") {
      ifelse(is.infinite(exp(x)), 1, pnorm(x))
    }
    else if (link=="cloglog"){
      ifelse(is.infinite(exp(x)), 1, 1-exp(-exp(x)))
    }
    else if (link=="cauchit") {
      thresh <- -qcauchy(.Machine$double.eps)
      eta <- pmin(pmax(x, -thresh), thresh)
      pcauchy(eta)
    }
    else if (link=="robit") {
      thresh <- -qt(.Machine$double.eps, df = 3)
      eta <- pmin(pmax(x, -thresh), thresh)
      pt(eta, df = 3)
    }
    else cat("link function not recognized for ", family, " only logit, probit, cauchit, robit, and cloglog are available!\n")
  }
  else if (family=="poisson") {
    if (link=="log") pmax(.Machine$double.eps, exp(x))
    else cat("link function not recognized for ", family, " only log is available!\n" )
  }
  else if (family=="Gamma") {
    if (link=="inverse") pmax(.Machine$double.eps, 1/x)
    else cat("link function not recognized for ", family, " only inverse is available!\n" )
  }
  else if (family=="inverse.gaussian") {
    if (link=="1/mu^2") pmax(.Machine$double.eps, 1 / sqrt(x))
    else cat("link function not recognized for ", family, " only invsquare is available!\n" )
  }
  else if (family=="negative.binomial") {
    if (link=="log") pmax(.Machine$double.eps, exp(x))
    else cat("link function not recognized for ", family, " only log is available!\n" )
  }
}

#' utility function for the derivative of linear predictor in gpls models
#'
#' @param eta input
#' @param family family
#' @param link link
#'
#' @export
#' @return the first derivative of the linear predictor
#' @examples
#' .mueta_gpls(eta, family="gaussian", link="identity")
#'
#' @keywords internal
.mueta_gpls <- function(eta, family="gaussian", link="identity") {

  if (family=="gaussian"  ){
    if (link=="identity") rep(1,length(eta))
    else cat("link function not recognized for ", family, " only identity is available!\n")}
  else if (family=="binomial") {
    if (link=="logit") pmax(exp(eta)/(1+exp(eta))^2, .Machine$double.eps)
    else if (link=="probit") pmax(dnorm(eta), .Machine$double.eps)
    else if (link=="cloglog") pmax(exp(-exp(eta))*exp(eta), .Machine$double.eps)
    else if (link=="cauchit") pmax(dcauchy(eta), .Machine$double.eps)
    else if (link=="robit") pmax(dt(eta, df = 3), .Machine$double.eps)
    else cat("link function not recognized for ", family, " only logit, probit, cauchit, robit, and cloglog are available!\n")
  }
  else if (family=="poisson") {
    if (link=="log") exp(eta)
    if (link=="sqrt") 2*eta
    else cat("link function not recognized for ", family, " only log is available!\n" )
  }
  else if (family=="Gamma") {
    if (link=="inverse") -1/(eta^2)
    else if (link=="identity") rep(1,length(eta))
    else if (link=="log") exp(eta)
    else cat("link function not recognized for ", family, " only inverse is available!\n" )
  }
  else if (family=="inverse.gaussian") {
    if (link=="1/mu^2") -1/(2*eta^1.5)
    else cat("link function not recognized for ", family, " only invsquare is available!\n" )
  }
  else if (family=="negative.binomial") {
    if (link=="log") exp(eta)
    else cat("link function not recognized for ", family, " only log is available!\n" )
  }
}


#' link function utility for gpls models
#' @param x input
#' @param family family
#' @param link link
#'
#' @export
#' @return the linear predictor
#' @keywords internal
.linkfun_gpls <-function(x, family="gaussian", link="identity") {

  if (family=="gaussian"  ){
    if (link=="identity") x
    else cat("link function not recognized for ", family, " only identity is available!\n")
  }
  if (family=="binomial") {
    if (link=="logit") log(x/(1-x))
    else if (link=="probit") qnorm(x)
    else if (link=="cloglog") log(-log(1-x))
    else if (link=="cauchit") qcauchy(x)
    else if (link=="robit") qt(x, df = 3)
    else cat("link function not recognized for ", family, " only logit, probit, cauchit, robit, and cloglog are available!\n")
  }
  else if (family=="poisson") {
    if (link=="log") log(x)
    else cat("link function not recognized for ", family, " only log is available!\n" )
  }
  else if (family=="Gamma") {
    if (link=="inverse") 1/x
    else cat("link function not recognized for ", family, " only inverse is available!\n" )
  }
  else if (family=="inverse.gaussian"){
    if (link=="1/mu^2") 1/(x^2)
    else cat("link function not recognized for ", family, " only invsquare is available!\n" )
  }
  else if (family=="negative.binomial") {
    if (link=="log") log(x)
    else cat("link function not recognized for ", family, " only log is available!\n" )
  }
}

#' variance function for glm families in gpls models
#'
#' @param x input
#' @param family family
#' @param link link
#'
#' @export .variance_gpls
#' @return output
#' @examples
#' .variance_gpls(x, family="gaussian", link="identity")
#' @keywords internal
.variance_gpls <- function(x, family="gaussian", link="identity") {
  # variance function
  if (family=="gaussian"){
    if (link=="identity") rep(1,length(x))
    else cat("link function not recognized for ", family, " only identity is available!\n")
  }
  else if (family=="binomial") {
    if (link=="logit") exp(x)/(1+exp(x))^2
    else if (link=="probit") pnorm(x)*(1-pnorm(x))
    else if (link=="cloglog") exp(-exp(x))*(1-exp(-exp(x)))
    else if (link=="cauchit") pcauchy(x)*(1-pcauchy(x))
    else if (link=="robit") pt(x, df = 3)*(1-pt(x, df = 3))
    else cat("link function not recognized for ", family, " only logit, probit, cauchit, robit, and cloglog are available!\n")
  }
  else if (family=="poisson") {
    if (link=="log") exp(x)
    else cat("link function not recognized for ", family, " only log is available!\n" )
  }
  else if (family=="Gamma") {
    if (link=="inverse") x^2
    else cat("link function not recognized for ", family, " only inverse is available!\n" )
  }
  else if (family=="inverse.gaussian"){
    if (link=="1/mu^2") x^3
    else cat("link function not recognized for ", family, " only invsquare is available!\n" )
  }
  else if (family=="negative.binomial") {
    if (link=="log") {
      #theta <- uniroot(function(mu,t,y)(mu + (mu^2 / t) - y ), lower = -1e10, upper = 1e10, y = var(exp(x)), mu = mean(exp(x)))$root
      #theta <- MASS::theta.mm(x, mu = rep(mean(x), length(x)), dfr = length(x) - 1)


      theta.est <- function(x){

        x <- round(x, 0)

        dnegative.binomial <- function (x, mu = 1, theta = 1, log = FALSE) {
          if (any(mu <= 0))
            stop(paste("mu must be greater than 0 ", "\n", ""))
          if (any(theta <= 0))
            stop(paste("theta must be greater than 0 ", "\n", ""))
          if (length(theta) > 1)
            fy <- ifelse(theta > 1e-04, dnbinom(x, size = mu/theta,
                                                mu = mu, log = log), dpois(x = x, lambda = mu, log = log))
          else fy <- if (theta < 1e-04)
            dpois(x = x, lambda = mu, log = log)
          else dnbinom(x, size = mu/theta, mu = mu, log = log)
          fy
        }

        f <- function(theta) sum(dnegative.binomial(x, mean(x), theta, log = T))

        optimize(f, lower = 1, upper = 1000, maximum = TRUE)$maximum
      }
      theta <- theta.est(x)
      exp(x) + exp(x)^2/theta
    }
    else cat("link function not recognized for ", family, " only log is available!\n" )
  }
}

#' initial value utility function for gpls models
#'
#' @param x input
#' @param family glm family
#' @return initial values
#' @examples
#' .yinitfunc(x, family="gaussian")
#' @keywords internal
.yinitfunc <- function(x, family="gaussian") {

  # initial value for dependent variable
  if (family=="binomial") return((x+0.50) * 0.50)
  else if (family=="poisson") return(log(x + 0.01))
  else if (family=="negative.binomial") return(log(x + 0.01))
  else if (family=="Gamma") return(1/x)
  else if (family=="inverse.gaussian") return(1/(x^2))
  else if (family=="gaussian") return(x)
  else return(x)

}

#' utility function for calculating probabilities in binomial gpls models
#'
#' @param y input
#' @param eta linear predictor
#' @return probabilities
#' @keywords internal
#' @examples
#' .probcalc(y, eta)
#'
.probcalc <- function(y, eta) {
  p <- 0
  for ( i in 1:length(y))
    p[i] <- ifelse(y[i]==1, exp(eta[i])/(1+exp(eta[i])) ,1/(1+exp(eta[i])))
  p
}

#' gpls function for internal use
#'
#' fits a gpls object
#'
#' @param X model matrix
#' @param y response
#' @param ncomp number of components
#' @param family glm family
#' @param link glm link
#' @param eps convergence tolerance
#' @param maxit max iterations of iteratively reweighted least squares algorithm
#' @param denom.eps tolerance for small numbers
#' @param firth should Firth's bias correction be applied? defaults to FALSE.
#' @keywords internal
gpls.default <- function(X, y, ncomp = NULL, eps=1e-3, maxit=1000, denom.eps=1e-20, family=NULL, link=NULL, firth=FALSE){

  testInteger <- function(x){
    test <- all.equal(x, as.integer(x), check.attributes = FALSE)
    if(test == TRUE){ return(TRUE) }
    else { return(FALSE) }
  }

  if (is.null(family)){

    if (all(y >= 0)){
      if (testInteger(y)){
        if (length(unique(y)) == 2){
          family <- "binomial"
        }
        else {
          family <- "poisson"
        }
      }
      else {
        family <- "Gamma"
      }
    }
    else {
      family <- "gaussian"
    }
  }

  if (is.null(link)) {
    if (family=="gaussian") link <- "identity"
    else if (family=="binomial") link <- "logit"
    else if (family=="poisson") link <- "log"
    else if (family=="negative.binomial") link <- "log"
    else if (family=="Gamma") link <- "inverse"
    else if (family=="inverse.gaussian") link <- "1/mu^2"
    else stop("unknown family", family)
  }

  X <- as.matrix(X)
  dx <- dim(X)

  if (family == "binomial"){

    if (is.factor(y)) {
      levs = levels(y)
      if( length(levs) > 2 )
        levs = c(levs[1], "Other")
      y <- y != levels(y)[1]
    }
    else levs = unique(y)

    if(length(levs) > 2)
      warning("y has more than two levels")

    if (is.factor(y)){
      y <- as.numeric(y) - 1
    }

    if (is.numeric(y)){
      maxy <- max(y)
      miny <- min(y)
      y[which(y > miny)] <- 1
      y[which(y == miny)] <- 0
    }

    if (is.character(y)){
      y <- as.numeric(as.factor(y)) - 1
    }

    if (any(y < 0 | y > 1))
      stop("y values must be 0 <= y <= 1")
  }

  ### number of PLS components

  if (is.null(ncomp)) K <- min(dx[1]-2,dx[2])
  else if (ncomp > min(dx[1]-1,dx[2])){
    cat("Specified number of components exceeds rank of X. \n", "Setting the number of components to ", min(dx[1]-1,dx[2]),"!\n");
    K <- min(dx[1]-1,dx[2])
  }
  else{
    K <- ncomp
  }

  ### initialzing weights for PLS regression

  ## weight matrix for PLS regression
  W<- matrix(0,dx[2],K)
  ## score matrix
  Ta <- matrix(0,dx[1],K)
  ## loading matrix for X
  P <- matrix(0,dx[2],K)
  ## loading matrix for y
  Q <- numeric(K)
  ## initial predictor matrix
  E <- X
  ## initial linear predictor
  ystar <- cvreg:::.yinitfunc(y, family)
  ystar0 <- ystar
  ## Weight matrix for glm
  V <- diag(c(.mueta_gpls(ystar, family,link)^2/.variance_gpls(ystar, family, link)))
  ## standardized weights for predictor matrix
  E <- t(E)-apply(E,2,weighted.mean,diag(V))
  E <- t(E)
  ## linear predictor
  eta <- rep(0,length(y))
  eta.old <- eta+10
  ## setup initial conditions
  Q <- rep(0,K)
  Q.old <- rep(100,K)
  K.old <- K
  min <- 1000
  beta <- rep(1,ncol(X))
  beta.old <- beta/1000
  l <- 0; converged <- F

  while ((max(abs(beta-beta.old)/(abs(beta.old)+denom.eps)) > eps) & (l < maxit))     {

    if ((max(abs(beta-beta.old)/(abs(beta.old)+denom.eps)) < min))
    {
      if(l==1)
      {
        W.min <- W
        P.min <- P
        Q.min <- Q
      }
      else
      {
        ## record minimum values
        min <- max(abs(beta-beta.old)/(abs(beta.old)+denom.eps))
        W.min <- W
        P.min <- P
        Q.min <- Q
        eta.min <- eta
        l.min <- l
      }
    }

    K.old <- K
    l <- l + 1
    W <- matrix(0,dx[2],K)
    Ta <- matrix(0,dx[1],K)
    P <- matrix(0,dx[2],K)
    Q <- numeric(K)

    ### PLS regression

    for ( i in 1:K) {
      w <- crossprod(E,V)%*%ystar
      w <- w/sqrt(crossprod(w)[1])
      W[,i] <- w
      ta <- E%*%w
      Ta[,i] <- ta
      taa <- (crossprod(ta,V)%*%ta)[1]
      p <- crossprod(E,V)%*%ta/taa
      P[,i] <- p
      q <- (crossprod(ystar,V)%*%ta/taa)[1]
      Q[i] <- q
      ystar <- ystar - q*ta
      E <- E - tcrossprod(ta,p)
    }

    ## update beta
    temp <- pseudoinverse(crossprod(P,W))
    if(any(is.na(temp))){
      break;
    }
    else{
      beta.old <- beta
      beta <- W%*%temp%*%Q
    }

    ## Hat matrix
    H <- hat(sweep(cbind(rep(1,dx[1]),X),1,sqrt(diag(V)),"*"),intercept=FALSE)
    ## update eta (linear predictor)
    eta <- weighted.mean(ystar0,diag(V)) + Ta%*%Q
    ## update and rescaling weight matrix
    V <- diag(c(.mueta_gpls(eta, family,link)^2/.variance_gpls(eta, family,link)))
    V <- V*(H*firth+1)
    ## diagnosis for divergence
    if(sum(diag(V)=="NaN") > 0){break;}
    if (sum(round(cvreg:::.probcalc(y,eta),4)>=0.9999)==length(y)){break;}
    ## update estimate of linear predictor
    ystar <- eta + diag(c(1/.mueta_gpls(eta, family,link)))%*%(y+H*firth/2 - (H*firth+1)*.linkinv_gpls(eta, family,link))/(H*firth +1)
    ystar0 <- ystar
    ## update predictor matrix
    E <- t(X)-apply(X,2,weighted.mean,diag(V))
    E <- t(E)
    ## update hat matrix
    if (firth){Hat <- H*firth}else{Hat <- H}
  }

  if (max(abs(beta-beta.old)/(abs(beta.old)+denom.eps)) > eps){
    W <- W.min; P <- P.min; Q <- Q.min; eta <- eta.min
  }
  else{
    converged <- T
  }

  ## final estimates
  beta <- W%*%pseudoinverse(crossprod(P,W))%*%Q
  beta0 <- mean(drop(eta)-drop(X%*%beta))
  ## assign names to the coefficients
  coef = c(beta0,beta)
  dnx = dimnames(X)[[2]]
  if(length(dnx) != length(beta))
    dnx = paste("X", 1:length(beta), sep=":")
  names(coef) = c("(Intercept)", dnx)

  colnames(P) <- c(paste0("Factor.", 1:ncol(P)))
  rownames(P) <- names(coef)[-1]
  colnames(Ta) <-  c(paste0("Factor.", 1:ncol(Ta)))

  if (family == "binomial"){

    ans = list(coefficients=coef,
               convergence = converged,
               niter = l,
               family = family,
               link = link,
               ncomp = K,
               levs = levs,
               firth = firth,
               loadings = P,
               scores = Ta,
               linear.predictor = as.vector(eta),
               fitted = as.vector(.linkinv_gpls(eta, family, link)),
               y_obs = y,
               hat = Hat,
               irwls.wts = V,
               orthdist = cvreg:::.orthdist(X, P),
               scoredist = cvreg:::.scoredist(Ta, apply(Ta, 2, var))
               )

  }
  else if (family == "negative.binomial") {

    theta.est <- function(x){

      x <- round(x, 0)

      dnegative.binomial <- function (x, mu = 1, theta = 1, log = FALSE) {
        if (any(mu <= 0))
          stop(paste("mu must be greater than 0 ", "\n", ""))
        if (any(theta <= 0))
          stop(paste("theta must be greater than 0 ", "\n", ""))
        if (length(theta) > 1)
          fy <- ifelse(theta > 1e-04, dnbinom(x, size = mu/theta,
                                              mu = mu, log = log), dpois(x = x, lambda = mu, log = log))
        else fy <- if (theta < 1e-04)
          dpois(x = x, lambda = mu, log = log)
        else dnbinom(x, size = mu/theta, mu = mu, log = log)
        fy
      }

      f <- function(theta) sum(dnegative.binomial(x, mean(x), theta, log = T))

      optimize(f, lower = 1, upper = 1000, maximum = TRUE)$maximum
    }

    theta <- theta.est(as.vector(.linkinv_gpls(eta, family, link)))

    ans = list(coefficients=coef,
               convergence = converged,
               niter = l,
               ncomp = K,
               family = family,
               link = link,
               firth = firth,
               loadings = P,
               scores = Ta,
               theta = theta,
               linear.predictor = as.vector(eta),
               fitted = as.vector(.linkinv_gpls(eta, family, link)),
               y_obs = y,
               hat = Hat,
               irwls.wts = V,
               orthdist = cvreg:::.orthdist(X, P),
               scoredist = cvreg:::.scoredist(Ta, apply(Ta, 2, var))
            )
  }
  else {
    ans = list(coefficients=coef,
               convergence = converged,
               niter = l,
               ncomp = K,
               family = family,
               link = link,
               firth = firth,
               loadings = P,
               scores = Ta,
               linear.predictor = as.vector(eta),
               fitted = as.vector(.linkinv_gpls(eta, family, link)),
               y_obs = y,
               hat = Hat,
               irwls.wts = V,
               orthdist = cvreg:::.orthdist(X, P),
               scoredist = cvreg:::.scoredist(Ta, apply(Ta, 2, var))
            )
  }

  class(ans) = "gpls"
  ans
}

#' Projection to Latent Structues for Generalized Linear Models
#'
#' @param formula model formula
#' @param data a data frame
#' @param ncomp number of components to retain
#' @param eps tolerance
#' @param maxit max iter
#' @param denom.eps tolerance value for denominator to consider a number as zero
#' @param family "gaussian", "poisson", "negative.binomial", "binomial", "multinom", "Gamma", "inverse.gaussian"
#' @param link the link function. see details for available options.
#' @param firth should Firth's bias correction be applied? defaults to FALSE.
#' @param contrasts model contrasts
#' @param ... other
#'
#' @return a gpls object
#' @export gpls
#'
#' @details This function implements what is often called partial least squares for generalized
#' linear models. However, Swedish statisticians Herman Wold and Svante Wold, who invented
#' the method, maintain that the proper name is projection to latent structures. This name is
#' used here because it would be improper to call generalized linear models by the name
#' least squares. As the name implies, PLS works by projecting the predictor matrix to a lower
#' dimensional subspace comprised of latent factors (in the sense of factor analysis, not categorical variables).
#' Essentially, this can save an analytic step. Instead of asking "what factors underlie my variables",
#' and then using the factor scores as predictors, PLS directly answers the question of "what latent factors explain
#' my outcome variable." The returned regression coefficients correspond to the original set of explanatory
#' variables, facilitating inference about the original variables if being used for reasons other than
#' a factor analytic method. \cr
#' \cr
#' PLS regression is useful for a variety of circumstances. These include the following:
#'
#' \itemize{
#'  \item multicollinear variables.
#'  \item variables believed to be measures of an underlying latent factor (which typically entails multicollinearity)
#'  \item regression problems where there are more predictor variables than observations (deficient rank)
#'  \item minimizing prediction error in a manner similar to ridge regression
#'  \item recovering a set of factors that explain an outcome, a sort of "supervised factor analysis".
#' }
#' \cr
#' \cr
#' Several likelihood functions are implemented here. These include the gaussian,  poisson, binomial,
#' gamma, inverse gaussian, and negative binomial distributions. The gaussian distribution is naturally
#' ideal for continuous data. The binomial distribution is utilized for binary outcomes, while the
#' multinomial distribution can model multiple outcomes. The poisson and negative binomial distributions
#' are appropriate for integer count data, with the negative binomial being well suited for overdispersed
#' counts. The gamma distribution and inverse gaussian distributions are appropriate for continuous data
#' with positive support, with the gamma assuming a constant coefficient of variation and the inverse
#' gaussian being suitable for heteroskedastic and/or highly skewed data. \cr
#' \cr
#' The following link functions are available for each distribution: \cr
#' \cr
#' Gaussian: "identity" \cr
#' Binomial & Multinomial: "logit", "probit", "cauchit", "robit" (Student T with 3 df), and "cloglog" \cr
#' Poisson & Negative Binomial: "log" \cr
#' Gamma: "inverse" (1 / x) \cr
#' Inverse Gaussian: "1/mu^2" (1/x^2)
#'
#'
#' @examples
#' gpls(y ~ ., data)
#' @export
#' @return a gpls object containing the model fit, factor loadings, linear predictors, fitted values, and an assortment of other things.
#'
gpls.formula = function(formula, data, ncomp =NULL, eps=1e-3, maxit=100, denom.eps=1e-20, family = NULL, link = NULL, firth=FALSE, contrasts=NULL, ...) {

  mf = match.call()
  m = match(c("formula", "data"), names(mf), 0)
  mf = mf[c(1,m)]
  mf[[1]] = as.name("model.frame")
  mf = eval(mf, parent.frame())
  mt = attr(mf, "terms")
  y = model.response(mf)
  x = if( !is.empty.model(mt))
    model.matrix(mt, mf, contrasts)
  else matrix(1, NROW(y), 0)
  xint = match("(Intercept)", colnames(x), nomatch=0 )
  if(xint > 0 )
    x <- x[, -xint, drop=FALSE]

  testInteger <- function(x){
    test <- all.equal(x, as.integer(x), check.attributes = FALSE)
    if(test == TRUE){ return(TRUE) }
    else { return(FALSE) }
  }

  if (is.null(family)){

    if (is.factor(y)){
      if (length(unique(levels(y))) == 2){
        family <- "binomial"
      }
      else if (length(unique(levels(y))) >=3){
        family <- "multinom"
      }
    } else if (all(y >= 0)){
      if (testInteger(y)){
        if (length(unique(y)) == 2){
          family <- "binomial"
        }
        else {
          family <- "poisson"
        }
      }
      else {
        family <- "Gamma"
      }
    }
    else {
      family <- "gaussian"
    }
  }

  if (is.null(link)) {
    if (family=="gaussian") link <- "identity"
    else if (family=="binomial") link <- "logit"
    else if (family=="poisson") link <- "log"
    else if (family=="negative.binomial") link <- "log"
    else if (family=="Gamma") link <- "inverse"
    else if (family=="inverse.gaussian") link <- "1/mu^2"
    else if (family=="multinom") link <- "robit"
    else stop("unknown family", family)
  }


  if (family == "multinom"){
    ans = cvreg:::gplsSoftmax(x, y, ncomp , eps, maxit, denom.eps, link, firth)

  } else {
    ans = cvreg:::gpls.default(x, y, ncomp , eps, maxit, denom.eps, family, link, firth)
  }

  if (family != "multinom"){

    newlink <- switch(link,
                      "logit" = "logit",
                      "probit" = "probit",
                      "robit" = "cauchit",
                      "cauchit" = "cauchit",
                      "cloglog" = "cloglog")

    glmfamily <- switch(ans$family,
                        "gaussian" = gaussian(link = "identity"),
                        "binomial" = quasibinomial(link = newlink),
                        "poisson" = poisson(link = "log"),
                        "Gamma" = Gamma(link = "inverse"),
                        "negative.binomial" = quasipoisson(link = "log"),
                        "inverse.gaussian" = inverse.gaussian(link = "1/mu^2")
    )

    glmfit <- suppressMessages(suppressWarnings(glm(y ~ ., cbind.data.frame(y = ans$y_obs, ans$scores), family = glmfamily)))
    ans$pearson.residuals <- residuals(glmfit, type = "pearson")
    ans$deviance.residuals <- cvreg:::dev.res(ans$y_obs, ans$fitted, wt = NULL, family = family, ncomp = ncomp)
  }
  ans$terms = mt
  ans$call = match.call()
  ans$model.frame <- model.frame(formula, data)
  ans$model.matrix <- model.matrix(formula, data)
  ans$formula <- formula
  ans$link <- link
  return(ans)
}

#' Predict method for gpls models
#'
#' @param model the model
#' @param newdata the new data
#' @param type either "link" (untransformed linear predictor), "response" (transformed prediction),
#' or "class" (for binomial models only)
#' @param se.fit should the standard errors of prediction also be returned
#'
#' @return none
#' @method predict gpls
#' @export
#' @keywords internal
predict.gpls <- function(object, newdata = NULL, type = c("response", "link", "class"), se.fit = FALSE){

  if (!inherits(object, "gpls"))
    stop("object not of class gpls")

  tt <- terms(object)
  type <- match.arg(type)
  family <- object$family
  link <- object$link
  formula <- object$formula
  coefs <- coef(object)

  if (is.null(newdata)){
    Newdata <- NULL
  } else{
    if (!is.data.frame(newdata)){
      newdata <- as.data.frame(newdata)
    }
    ox <- object$model.matrix[,-1]
    Newdata <- model.matrix(formula, newdata)[,-1]
    Newdata <- base::scale(Newdata, colMeans(ox), rep(1, ncol(ox))) %*% object$loadings
    Newdata <- as.data.frame(Newdata)
    colnames(Newdata) <- paste0("FACTOR", 1:ncol(object$scores))
  }

  if (family == "binomial"){

    newlink <- switch(link,
                      "logit" = "logit",
                      "probit" = "probit",
                      "robit" = "cauchit",
                      "cauchit" = "cauchit",
                      "cloglog" = "cloglog")
  }

  newfamily <- switch(family,
                      "gaussian" = gaussian(link = "identity"),
                      "binomial" = binomial(link = newlink),
                      "poisson" = poisson(link = "log"),
                      "Gamma" = Gamma(link = "inverse"),
                      "negative.binomial" = quasipoisson(link = "log"),
                      "inverse.gaussian" = inverse.gaussian(link = "1/mu^2")
  )

  dat <- cbind.data.frame(y = object$y_obs, object$scores)
  colnames(dat) <- c("y", paste0("FACTOR", 1:ncol(object$scores)))
  glmfit <- glm(y ~ ., dat, family = newfamily)

  if (type == "class"){
    if (family != "binomial"){
      stop("type 'predict' is only valid for predicting class labels for binomial models.")
    }
    else {
      preds <- predict(glmfit, newdata = Newdata, type = "response", se.fit = FALSE)
      levs <- object$levs
      classification <- preds > 0.50
      labels <- ifelse(classification, levs[1], levs[2])
      return(labels)
    }
  }
  else if (type == "link"){
    preds <- predict(glmfit, newdata = Newdata, type = "link", se.fit = se.fit)
  }
  else if (type == "response"){
    preds <- predict(glmfit, newdata = Newdata, type = "response", se.fit = se.fit)
  }

  return(preds)
}

#' Summary method for gpls models
#'
#' @param model a fitted model object
#' @param se.type One of the robust standard error methods described in \link[cvreg]{vcovHC.gpls}. This defaults
#' to the HC4m method, which I have found to give the most accurate results. Note that typical standard errors are
#' not available for partial least squares. Another option to HC standard errors is to utilize the boot package to bootstrap
#' standard errors.
#' @method summary gpls
#' @return a summary
#' @export
#' @keywords internal
summary.gpls <- function(model, se.type = "HC4m"){
  purple = crayon::make_style(rgb(0.565, 0, 0.769))
  cat(purple("Call:\n"), deparse(model$call), "\n", sep = "")
  cat(crayon::blue("\nLatent Variables: "), deparse(model$ncomp), "\n", sep = "")
  cat(crayon::green("\nFamily: "), noquote(model$family), "(link = ", noquote(model$link), ")", "\n", sep = "")
  std.err = sqrt(diag(vcovHC(model, type = se.type)))
  results = cbind.data.frame(coefs = round(model$coefficients, 3), round(std.err, 4), round(model$coefficients/std.err, 3), round(2 * pnorm(abs(model$coefficients/std.err), lower.tail = F), 5))
  colnames(results) <- c("Estimate", "Std. Error", "t value", "Pr(>|t|)")
  orange = crayon::make_style(rgb(1, 0.314, 0))
  cat(orange("\nCoefficients: \n\n"))
  return(results)
}

#' Print method for gpls models
#'
#' @param x the model
#' @param digits digits to round
#' @method print gpls
#' @return none
#' @export
#' @keywords internal
print.gpls = function(x, digits = 3) {
  cat("Call:\n", deparse(x$call), "\n", sep = "")
  cat(crayon::blue("\nLatent Variables: "), deparse(x$ncomp), "\n", sep = "")
  cat(crayon::green("\nFamily: "), noquote(x$family), "(link = ", noquote(x$link), ")", "\n", sep = "")
  if (length(coef(x))) {
    cat(crayon::magenta("\nCoefficients:\n"))
    coef <- round(zapsmall(x$coefficients, 18), digits)
    print.default(coef, print.gap = 1, quote = FALSE, digits = digits, )
  }
  if (x$family == "negative.binomial"){
    purpletext <- crayon::make_style(rgb(.51, .098, .635, 0.95), bg = FALSE)
    cat(purpletext("\n\nNegative Binomial Excess Dispersion",  " : ", round(noquote(x$theta), digits), "\n"))
  }
  invisible(x)
}

#' gpls function for internal use
#'
#' fits a gpls softmax object
#'
#' @param x model matrix
#' @param y rsponse
#' @param ncomp number of components
#' @param family glm family
#' @param link glm link
#' @param eps convergence tolerance
#' @param maxit max iterations of iteratively reweighted least squares algorithm
#' @param denom.eps tolerance for small numbers
#' @param shrinkage should Firth's bias correction be applied? defaults to FALSE.#'
#' @keywords internal
gplsSoftmax <- function(x, y, ncomp =NULL, eps=1e-3, maxit=100, denom.eps=1e-20, family = "binomial", link, firth=T){

  family <- "binomial"
  names <- colnames(x)

  if (is.numeric(y)){
    y.levs <- seq(min(y), max(y), len = length(unique(y)))
    all.levs <- y.levs
    all.levs <- as.character(all.levs)
    y.levs <- y.levs[-1]
    y.levs <- as.character(y.levs)
  }

  if (is.factor(y)){
    all.levs <- levels(y)
    y.levs <- levels(y)[-1]
    y <- as.numeric(y)
  }

  if (is.character(y)){
    y <- as.factor(y)
    all.levs <- levels(y)
    y.levs <- levels(y)[-1]
    y <- as.numeric(y)
  }

  if(any(x[,1] !=1))
    x <- cbind(rep(1,nrow(x)),x)

  x <- as.matrix(x)
  yv <- as.numeric(as.vector(y))
  r <- ncol(x)
  C <- max(yv)-1
  n <- nrow(x)
  y <- matrix(0,n,C+1)
  y[cbind(seq(n),yv)] <- 1
  y0 <- y[,1]
  y <- y[,-1]
  jn <- rep(1,n)
  jC <- rep(1,C)
  y2 <- as.vector(t(y))
  x2 <- t(kronecker(rep(1,C),x))
  dim(x2) <- c(r,n,C)
  x2 <- aperm(x2,c(1,3,2))
  dim(x2) <- c(r,n*C)
  x2 <- t(x2)

  X <- matrix(0,nrow=C*n,ncol=C*r)
  for ( i in 1:n)
  {
    for (j in 1:C)
    {
      X[((i-1)*C+j),(((j-1)*r+1):((j-1)*r+r))] <- x[i,]
    }
  }

  dx <- dim(X)

  if (is.null(ncomp )) K <- min(dx[1]-1,dx[2])
  else if ( ncomp  > min(dx[1]-1,dx[2]) )
  {
    cat("Specified number of components exceeds rank of X. \n", "Setting the number of components to ", min(dx[1]-1,dx[2]),"!\n"); break
  }
  else
  {
    K <- ncomp
  }

  ### initialzing matrices for PLS regression
  ## weight matrix for PLS regression
  W<- matrix(0,dx[2],K)
  ## score matrix
  Ta <- matrix(0,dx[1],K)
  ## loading matrix for X
  P <- matrix(0,dx[2],K)
  ## loading matrix for y
  Q <- numeric(K)
  ## initial predictor matrix
  E <- X
  ## Weight matrix for GLM
  V <- diag(abs(rst(C*n, nu = 12)), C*n)
  ## initial linear predictor
  ystar <- cvreg:::.yinitfunc(y2, family="binomial")
  ystar0 <- ystar
  eta.old <- rep(1,length(y2))
  eta <- eta.old+100
  ## set starting point for iterations
  Q <- rep(0,K)
  Q.old <- rep(100,K)
  K.old <- K
  min <- 1000
  beta <- rep(1,C*r)
  beta.old <- beta/1000
  converged <- F
  l <- 0
  l.min <- 1

  while ((max(abs(beta-beta.old)/(abs(beta.old)+denom.eps)) > eps) & (l < maxit)){
    if ((max(abs(beta-beta.old)/(abs(beta.old)+denom.eps)) < min)){
      if(l==1){
        W.min <- W; P.min <- P; Q.min <- Q
      }
      else{
        ## record minimum values
        min <- max(abs(beta-beta.old)/(abs(beta.old)+denom.eps))
        W.min <- W
        P.min <- P
        Q.min <- Q
        eta.min <- eta
        l.min <- l
      }
    }

    K.old <- K
    l <- l + 1
    W <- matrix(0,dx[2],K)
    Ta <- matrix(0,dx[1],K)
    P <- matrix(0,dx[2],K)
    Q <- numeric(K)

    ### PLS regression
    for ( i in 1:K) {
      w <- t(E)%*%V%*%ystar
      w <- w/sqrt(crossprod(w)[1])
      W[,i] <- w
      ta <- E%*%w
      Ta[,i] <- ta
      taa <- (t(ta)%*%V%*%ta)[1]
      p <- t(E)%*%V%*%ta/taa
      P[,i] <- p
      q <- (t(ystar)%*%V%*%ta/taa)[1]
      Q[i] <- q
      ystar <- ystar - q*ta
      E <- E - ta%*%t(p)

    }

    ## update beta

    temp <- pseudoinverse(t(P)%*%W)
    if(any(is.na(temp))){
      break;
    }
    else{
      beta.old <- beta
      beta <- W%*%temp%*%Q
    }

    ## Hat matrix
    H <- V%*%X%*%pseudoinverse(t(X)%*%V%*%X)%*%t(X)

    ## update eta (linear predictor)
    eta <- as.vector(t(x%*%matrix(beta,ncol=C,byrow=F)))
    p <- exp(x%*%matrix(beta,ncol=C ,byrow=F))

    den.p <- 1+as.vector(p%*%jC)
    if(any(is.infinite(den.p))){break;}

    p <- p2 <- p/den.p
    dim(p2) <- c(n,C)
    p2 <- as.vector(t(p2))

    ## update and rescale weight matrix
    V <- matrix(0, C*n, C*n)
    for(j in seq(n)) {
      V[((j - 1) * C + 1):(j * C), ((j - 1) * C + 1):(j * C)] <- diag(p[j,],length(p[j,]))-matrix(p[j,],nrow=C,ncol=1)%*%matrix(p[j,],nrow=1,ncol=C)
    }

    detadmu <- V
    for ( j in seq(n)){
      for (i in ((j - 1) * C + 1):(j * C))
        for (k in (i:(j*C))){
          sum <- sum(p2[((j-1)*C+1):(j*C)])
            if(i==k){
              detadmu[i,k] <- (1-sum+p2[k])/(p2[k]*(1-sum))
            }
            else{
              detadmu[i,k] <- 1/(1-sum)
              detadmu[k,i] <- detadmu[i,k]
            }
        }
    }

    Hw <- NULL
    for ( j in seq(n)){
      sum <- sum(diag(H)[((j - 1) * C + 1):(j * C)])
      for (i in ((j - 1) * C + 1):(j * C)){
        Hw[i] <- sum + diag(H)[i]
      }
    }

    ## update estimate of linear predictor
    if(any(is.na(detadmu))){break;}
    ystar <- eta + detadmu%*%(y2+diag(H)*firth/2-(Hw*firth/2+1)*p2)/(Hw*firth/2+1)
    ystar0 <- ystar
    V <- V*(Hw*firth/2+1)
    ## update predictor matrix
    E <- X
  }

  if (max(abs(beta-beta.old)/(abs(beta.old)+denom.eps)) > eps){
    W <- W.min
    P <- P.min
    Q <- Q.min
  }
  else{
    converged <- T
  }
  ## final estimates
  beta <- W%*%pseudoinverse(t(P)%*%W)%*%Q
  beta <- matrix(beta,ncol=C ,byrow=F)
  rownames(beta) <- c("(Intercept)", names)
  colnames(beta) <- y.levs
  colnames(P) <- c(paste0("Factor.", 1:ncol(P)))
  colnames(Ta) <-  c(paste0("Factor.", 1:ncol(Ta)))

  out <- list(coefficients=beta,
              convergence = converged,
              niter = l,
              firth = firth,
              X = x,
              loadings = P,
              scores = Ta,
              family = "multinom",
              link = link,
              ncomp = K,
              irwls.wts = W,
              outcome.levs = all.levs)
  return(structure(out, class = "gplsSoftmax"))

}


#' Predict method for softmax (multinomial) gpls models
#'
#' @param model the model
#' @param newdata the new data
#' @param type one of "response" to return probabilities, or "class" to return class labels
#'
#' @return none
#' @method predict gplsSoftmax
#' @export
#' @keywords internal
predict.gplsSoftmax <- function(model, newdata = NULL, type = c("response", "class")) {

  type <- match.arg(type)

  if (is.null(newdata)){
    X <- model$X
  }

  beta <- model$coefficients

  if(all(X[,1] == rep(1,nrow(X))))
  {
    eta <- X%*%beta
  } else
  {
    eta <- cbind(rep(1,nrow(X)),X)%*% beta
  }

  p <- exp(eta)
  p.denom <- apply(p, 1, function(x) 1+sum(x))
  probabilities <- p/p.denom
  probabilities <- cbind(1 - rowSums(probabilities), probabilities)
  colnames(probabilities) <- model$outcome.levs
  if (type == "response")
    return(probabilities)
  else
    return(model$outcome.levs[sapply(1:nrow(probabilities), function(x) which.max(probabilities[x, ]))])
}

#' Print method for gplsSoftmax  models
#'
#' @param x the model
#' @param digits digits to round
#'
#' @return none
#' @method print gplsSoftmax
#' @export
#' @examples print(fit)
#' @keywords internal
print.gplsSoftmax = function(x, digits = max(3, getOption("digits") - 3)){
  cat("Call:\n", deparse(x$call), "\n", sep = "")
  cat("\nLatent Variables: ", deparse(x$ncomp), "\n", sep = "")
  cat("\nFamily: ", "multinom(link = ", noquote(x$link), ")", "\n\n", sep = "")
  if (length(coef(x))) {
    cat("Coefficients:\n")
    print.default(x$coefficients, print.gap = 2, quote = FALSE)
  }
  invisible(x)
}


#' Summary method for gplsSoftmax models
#'
#' @param x the model
#' @param digits digits to round
#'
#' @return none
#' @method summary gplsSoftmax
#' @export
#' @examples summary(fit)
#' @keywords internal
summary.gplsSoftmax = function(x, digits = 3){
  cat("Call:\n", deparse(x$call), "\n", sep = "")
  cat("\nLatent Variables: ", deparse(x$ncomp), "\n", sep = "")
  cat("\nFamily: ", "multinom(link = ", noquote(x$link), ")", "\n\n", sep = "")
  if (length(coef(x))) {
    cat("Coefficients:\n")
    print.default(x$coefficients, print.gap = 2, quote = FALSE)
  }
  invisible(x)
}



#' Sparse Generalized Projection to Latent Structures
#'
#' @description Sparse projection to latent structures (partial least squares)
#' is an extension of PLS that takes advantage of L1 regularization via a
#' LARS-like algorithm to select predictors. Predictors which do not load highly
#' on any of the latent variables get dropped from the model, and the corresponding
#' regression estimates are shrunk to zero. Here sparse PLS is extended to include
#' generalized linear models. Only univariate outcomes are supported here.
#'
#' @param formula model formula
#' @param data a data frame
#' @param ncomp number of components to retain
#' @param lambda a regularization parameter.
#' @param family "gaussian", "poisson", "negative.binomial", "binomial", "Gamma", or "inverse.gaussian"
#' @param link the link function. see details for available options.
#' @return
#' @export
#' @references
#' Chun, H., & Keles, S. (2010). Sparse partial least squares regression for simultaneous dimension reduction and variable selection. Journal of the Royal Statistical Society: Series B (Statistical Methodology), 72(1), 3–25. doi:10.1111/j.1467-9868.2009.00723.x
#'
sgpls <- function(formula, data, ncomp = 2, lambda = 0.01, family = "gaussian", link = "identity", scale = T){

  this.call <- match.call()
  if (sum(family %in% c("gaussian", "poisson", "negative.binomial", "binomial", "Gamma", "inverse.gaussian"))==0){
    stop("Please supply as a string to the argument 'family' one of gaussian, poisson, negative.binomial, binomial, Gamma, or inverse.gaussian")
  }
  x <- model.matrix(formula, data)
  if (all(x[,1]==1)){
      xnames <- colnames(x)[-1]
      x <- x[,-1]
  }
  else{
    x <- x; xnames <- colnames(x)
  }
  y <- model.frame(formula, data);
  yname <- colnames(y)[1]
  y <- model.response(y)

  if (family == "binomial"){

    if (is.factor(y)) {
      levs = levels(y)
      if( length(levs) > 2 )
        levs = c(levs[1], "Other")
      y <- y != levels(y)[1]
    }
    else levs = unique(y)

    if(length(levs) > 2)
     stop("y has more than two levels. sparse multinomial PLS not currently supported by this function")

    if (is.factor(y)){
      y <- as.numeric(y) - 1
    }

    if (is.numeric(y)){
      maxy <- max(y)
      miny <- min(y)
      y[which(y > miny)] <- 1
      y[which(y == miny)] <- 0
    }

    if (is.character(y)){
      y <- as.numeric(as.factor(y)) - 1
    }

    if (any(y < 0 | y > 1))
      stop("y values must be 0 <= y <= 1")
  }

  K <- ncomp
  n <- nrow(x)
  p <- ncol(x)
  ip <- c(1:p)
  y <- as.matrix(y)
  q <- ncol(y)
  one <- matrix(1,1,n)
  # center & scale y & x
  if (family=="gaussian"){
    if (scale) y <- scale(y)
  }
  if (scale) {
    x <- scale(x)
  }
  else{
    x <- scale(x,T,F)
  }
  # initilize objects
  betahat <- matrix( 0, p, q )
  x1 <- x
  y1 <- y
  new2keeps <- list()

  for (k in 1:K){
    # define Z
    Z <- t(x1) %*% y1
    # fit direction vector
    thr <- .threshpls(Z, lambda)
    keep <- unique( ip[ thr!=0 | betahat[,1]!=0 ] )
    new2keep <- ip[ thr!=0 & betahat[,1]==0 ]
    xkeep <- x[,keep,drop=FALSE]
    plsfit <- gpls(y ~ ., cbind.data.frame(y=y,xkeep),ncomp=min(k,length(keep)),family=family,link=link)
    # update
    betahat <- matrix( 0, p, q )
    betahat[keep,] <- matrix(coef(plsfit)[-1], length(keep), q)
    pj <- plsfit$loadings
    pw <- pj %*% tcrossprod(solve(crossprod(pj)), pj)
    x1 <- x
    x1[,keep] <- x[,keep,drop=FALSE] - x[,keep,drop=FALSE] %*% pw
    new2keeps[[k]] <- new2keep
  }

  coef <- as.vector(betahat); names(coef) <- xnames;
  retained.loadings <- pj; retained.x <- x[, which(xnames %in% rownames(pj) == TRUE)]
  if (nrow(pj) != p){
    keep.names <- rownames(pj)
    zero.names <- setdiff(names(coef), keep.names)
    pj <- rbind(matrix(0, nrow = length(zero.names) , ncol = ncol(pj)), pj)
    rownames(pj) <- c(zero.names, keep.names);
    pj <- pj[xnames, ]; wch <- which(rownames(pj) %in% zero.names == TRUE)
    retained.loadings <- pj[-wch, ]
  }
  coef <- c("(Intercept)" = unname(coef(plsfit)[1]), coef)
  scores <- retained.x %*% retained.loadings
  form <- as.formula(paste(yname, paste0(rownames(retained.loadings), collapse=" + "), sep = " ~ "))
  out <- structure(
              list(coefficients=coef,
                 lambda=lambda,
                 ncomp=K,
                 loadings=pj,
                 scores = plsfit$scores,
                 retained.loadings = retained.loadings,
                 linear.predictor = plsfit$linear.predictor,
                 fitted = plsfit$fitted,
                 pearson.residuals = plsfit$pearson.residuals,
                 deviance.residuals = plsfit$deviance.residuals,
                 y_obs = y,
                 retained.x = retained.x,
                 family = plsfit$family,
                 link = plsfit$link,
                 levs = plsfit$levs,
                 hat = plsfit$hat,
                 call = this.call,
                 formula = form,
                 model.matrix = model.matrix(form, data),
                 irwls.weights = plsfit$irwls.weights
                 )
              , class = c("sgpls", "gpls"))

  return(out)
}

.threshpls <-function(Z, lambda){
  # initialization
  p <- nrow(Z)
  Znorm1 <- median( abs(Z) )
  Z <- Z / Znorm1
  b.threshpls <- matrix( 0, length(Z), 1 )
  if ( lambda < 1 ){
    valb <- abs(Z) - lambda * max( abs(Z) )
    b.threshpls[ valb>=0 ] <- valb[ valb>=0 ] * (sign(Z))[ valb>=0 ]
  }
  return(b.threshpls)
}


#' Calculate deviance residuals for gpls models
#'
#' @param y the observations
#' @param mu the predictions
#' @param wt for internal use only
#' @param family the glm family
#' @param ncomp number of latent variables used in fitting the model
#'
#' @return deviance residuals
#' @keywords internal
#'
dev.res <- function(y, mu, wt = NULL, family, ncomp = NULL){

  if (is.null(wt)){
    wt <- rep(1, length(y))
  }

  if (family == "gaussian"  ){
    d.res <- wt * ((y - mu)^2)
    d.res <- ifelse(y > mu, d.res, -d.res)
    d.res <- as.vector(d.res)
    return(d.res)
  }

  if (family == "poisson"){
    r <- mu * wt
    p <- which(y > 0)
    r[p] <- (wt * (y * log(y/mu) - (y - mu)))[p]
    d.res <- 2 * r
    d.res <- ifelse(y > mu, d.res, -d.res)
    d.res <- as.vector(d.res)
    return(d.res)
  }

  if (family == "negative.binomial"){

    theta.est <- function(x){

      x <- round(x, 0)

      dnegative.binomial <- function (x, mu = 1, theta = 1, log = FALSE) {
        if (any(mu <= 0))
          stop(paste("mu must be greater than 0 ", "\n", ""))
        if (any(theta <= 0))
          stop(paste("theta must be greater than 0 ", "\n", ""))
        if (length(theta) > 1)
          fy <- ifelse(theta > 1e-04, dnbinom(x, size = mu/theta,
                                              mu = mu, log = log), dpois(x = x, lambda = mu, log = log))
        else fy <- if (theta < 1e-04)
          dpois(x = x, lambda = mu, log = log)
        else dnbinom(x, size = mu/theta, mu = mu, log = log)
        fy
      }

      f <- function(theta) sum(dnegative.binomial(x, mean(x), theta, log = T))

      optimize(f, lower = 1, upper = 1000, maximum = TRUE)$maximum
    }



    theta <- theta.est(mu)
    d.res <- 2 * wt * (y * log(pmax(1, y)/mu) - (y + theta) * log((y + theta)/(mu + theta)))
    d.res <- ifelse(y > mu, d.res, -d.res)
    d.res <- as.vector(d.res)
    return(d.res)
  }

  if (family == "Gamma"){
    d.res <- -2 * wt * (log(ifelse(y == 0, 1, y/mu)) - (y - mu)/mu)
    d.res <- ifelse(y > mu, d.res, -d.res)
    d.res <- as.vector(d.res)
    return(d.res)
  }

  if (family == "inverse.gaussian"){
    d.res <- wt * ((y - mu)^2)/(y * mu^2)
    d.res <- ifelse(y > mu, d.res, -d.res)
    d.res <- as.vector(d.res)
    return(d.res)
  }

  if (family == "binomial"){
    d.res <- sqrt(binomial()$dev.res(y = y, mu = mu, wt = wt))
    d.res <- ifelse(y > mu, d.res, -d.res)
    d.res <- as.vector(d.res)
    return(d.res)
  }
}

#' Calculate residuals for gpls models
#'
#' @param object the model
#' @param type either deviance or pearson
#' @param wts should typically be left as NULL.
#'
#' @return deviance, pearson, or working residuals
#' @method residuals gpls
#' @export
#' @examples
#' residuals(fit, type = "pearson")
#' @keywords internal
residuals.gpls <- function(object, type = c("pearson", "deviance", "working"), wts = NULL){

  type <- match.arg(type)
  mu <- object$fitted
  y <- object$y_obs
  scores <- object$scores

  if (is.null(wts)){
    wts <- rep(1, length(y))
  }

  newlink <- switch(object$link,
                    "logit" = "logit",
                    "probit" = "probit",
                    "robit" = "cauchit",
                    "cauchit" = "cauchit",
                    "cloglog" = "cloglog")

  glmfamily <- switch(object$family,
                      "gaussian" = gaussian(link = "identity"),
                      "binomial" = quasibinomial(link = newlink),
                      "poisson" = poisson(link = "log"),
                      "Gamma" = Gamma(link = "inverse"),
                      "negative.binomial" = quasipoisson(link = "log"),
                      "inverse.gaussian" = inverse.gaussian(link = "1/mu^2")
  )

  if (type == "pearson"){
    fit <- suppressMessages(suppressWarnings(glm(y ~ ., cbind.data.frame(y = y, scores), family = glmfamily)))
    resids <- residuals(fit, type = "pearson")
    return(resids)
  }
  if (type == "working"){
    fit <- suppressMessages(suppressWarnings(glm(y ~ ., cbind.data.frame(y = y, scores), family = glmfamily)))
    resids <- residuals(fit, type = "working")
    return(resids)
  }
  else if (type == "deviance"){
    resids <- cvreg:::dev.res(y, mu, wt = wts, family = object$family, ncomp = object$ncomp)
    resids <- as.vector(resids)
    return(resids)
  }
}

#' Confidence Intervals for Model Parameters
#'
#' @param object a gpls fitted object
#' @param conf.level the confidence level required. the default is 95 pct.
#' @param se.type which variant of the Huber-White robust standard errors should be used. Defaults to "HC4m".
#' @param ... other
#' @method confint gpls
#' @return A matrix with columns giving lower and upper confidence limits for each parameter.
#' @export
#' @keywords internal
confint.gpls <- function (object, conf.level  = 0.95, se.type = "HC4m", ...) {

  tol <- 1e-10
  se <- sqrt(diag(vcovHC(object, type = se.type)))
  coef <- object$coefficients
  p <- length(coef)

  qnorm_factory <- function(mu, sigma){
    function(p) qnorm(p, mean = mu, sd = sigma)
  }

  ci <- matrix(0, nrow = p, ncol = 2)

  calculate_ci <- function(ICDF, conf.level, tol){

    intervalWidth <- function(lowTailPr, ICDF, conf.level , ...) {
      ICDF(conf.level  + lowTailPr, ...) - ICDF(lowTailPr, ...)
    }
    optInfo <- optimize(intervalWidth, c(0, 1 - conf.level ), ICDF = ICDF,
                        conf.level  = conf.level , tol = tol, ...)
    HDIlowTailPr <- optInfo$minimum
    result <- c(lower = ICDF(HDIlowTailPr, ...), upper = ICDF(conf.level  +
                                                                HDIlowTailPr, ...))
    attr(result, "conf.level ") <- conf.level
    return(result)
  }

  for (i in 1:p){
    q <- qnorm_factory(mu = coef[i], sigma = se[i])
    ci[i, ] <- calculate_ci(q, conf.level, tol)
  }

  colnames(ci) <- c(
    stats:::format.perc((1 - (conf.level)) / 2, 3),
    stats:::format.perc(conf.level + (1 - (conf.level)) / 2, 3)
  )

  rownames(ci) <- names(coef)
  return(ci)
}


#' Choose optimal number of latent variables for gpls models
#'
#' @description This evaluates the number of latent variables that leads to the best fit as judged by the AICc, AIC, or BIC metric.
#  This is an alternative to cross-validation that can be used to pick a good fit using less computational resources and time.
#'
#' @param formula model formula
#' @param data a data frame
#' @param eps tolerance
#' @param maxit max iter
#' @param denom.eps tolerance value for denominator to consider a number as zero
#' @param family "gaussian", "poisson", "negative.binomial", "binomial", "multinom", "Gamma", "inverse.gaussian"
#' @param link the link function. see details for available options.
#' @param firth should shrinkage be applied? Defaults to FALSE.
#' @param contrasts model contrasts
#' @param criterion one of AICc, AIC, BIC, or BICc.
#' @param plot Should the results be plotted? Defaults to TRUE.
#'
#' @return a numeric value
#' @export
#'
rankest.gpls <- function(formula, data, eps=1e-3, maxit=1000, denom.eps=1e-20, family = NULL, link = NULL, firth=FALSE, contrasts=NULL, criterion = c("AICc", "AIC", "BIC", "BICc"), plot = TRUE){

  criterion <- match.arg(criterion)
  x = model.matrix(formula, data)[,-1]

  score <- rep(0, ncol(x))

  for (i in 1:ncol(x)){
    if (criterion == "AIC"){
      score[i] <-  AIC(gpls(formula, data, ncomp = i, eps, maxit, denom.eps, family, link, firth, contrasts), correction = F)
    }
    else if (criterion == "AICc"){
      score[i] <-  AIC(gpls(formula, data, ncomp = i, eps, maxit, denom.eps, family, link, firth, contrasts), correction = T)
    }
    else if (criterion == "BIC"){
      score[i] <-  BIC(gpls(formula, data, ncomp = i, eps, maxit, denom.eps, family, link, firth, contrasts), correction = F)
    }
    else if (criterion == "BICc"){
      score[i] <-  BIC(gpls(formula, data, ncomp = i, eps, maxit, denom.eps, family, link, firth, contrasts), correction = T)
    }
  }

  dat <- data.frame("k variables" = 1:ncol(x), "information.criterion" = score)
  colnames(dat) <- c("k variables", criterion)

  if (plot){
    plot(ggplot(dat, aes_string(x = "`k variables`", y = paste0(criterion, "\n"))) +
           geom_point(color = "#268fff", alpha = 0.94, shape = 1) +
           geom_line(color = "#268fff", alpha = 0.85, size = 0.65))
  }
  star <- rep(" ", length(score))
  wch <- which.min(dat[,2])
  star[wch] <- "*"
  dat <- cbind.data.frame(dat, optimal.fit = star)
  return(dat)
}


#' plot method for gpls objects
#'
#' @param fit model fit
#' @param type one of "facet", "splom", or "fitted". facet plots each sufficient predictor's
#' relationship with the response. splom plots the same as facet but also the inter-relationships
#' of the sufficient predictors as a scatterplot matrix. fitted plots the relationship between
#' the fitted values of y and the observed values.
#'
#' @return a plot
#' @method plot gpls
#' @export
#' @keywords internal
plot.gpls <- function(fit, type = c("facet", "splom", "fitted")){

  type <- match.arg(type)

  if (type == "facet"){
    dat <- cbind.data.frame(y = fit$y_obs, fit$scores)
    dat <- as.data.frame(pivot_longer(dat, -y))
    ggplot(dat, aes(x = value, y = y, color = name)) +
      facet_wrap(~ name, ncol = ifelse(length(unique(dat$name)) < 4, length(unique(dat$name)), 2)) +
      geom_point(shape = 1, alpha = 0.90) +
      theme(legend.position = "none")
  }
  else if (type == "splom"){
    dat <- cbind.data.frame(y = fit$y_obs, fit$scores)
    scatMat(dat, smooth = F, smoother = F, points.col = "#3293f3BF", pch = 1)
  }
  else if (type == "fitted"){
    dat <- cbind.data.frame(y = fit$y_obs, fit$scores)
    ggplot(dat, aes(x = yhat, y = yobs)) +
      geom_point(shape = 1, alpha = 0.90, color = "#e70e4e")
  }
}
abnormally-distributed/cvreg documentation built on May 3, 2020, 3:45 p.m.