R/robustGlms.R

Defines functions .misclassw .misclassg .misclassG .misclassf .misclassF .maked1variance .maked3afun .maked2afun .maked1afun .maked2mu.deta print.robglm .wgtFlex robglm.boot robglm

Documented in print.robglm robglm robglm.boot

#' Robust Generalized Linear Models
#'
#' @param formula A model formula
#' @param data A data frame
#' @param family One of "binomial", "poisson", "Gamma", "gaussian".
#' @param method The method to be used to calculate weights for the design matrix. One of the options below. The default is "mallows". \cr \cr
#' - "none" uses equal weights for all rows of the design matrix \cr
#' - "mallows" uses the diagonal of the hat matrix to calculate leverage weights as sqrt(1-h) \cr
#' - "carroll" uses the sqrt(mah.dist/ p) as a statistic to feed into the psi function \eqn{\psi = u * ((1-(u/cw)^2)^3 * I(u) <= cw)} to generate weights as \eqn{\psi(u)/u}. \cr
#' - "schweppe" multiplies the mallows weights by psi-weights derived from robustly standardized deviance residuals
#' from an initial fit, i.e., sqrt(1-h)*psi((r-median(r))/mad(r)). Schweppe's weights prevent high-leverage observations
#' that are not associated with large residuals (good leverage points) from being downweighted. This results in increased
#' efficiency. \cr
#' - "welsch" is similar to Mallow's weights, but Welsch's weights are calculated as (1-h)/sqrt(h).
#' @param k Tuning constant for Huber's psi. defaults to 1.345.
#' @param cw Tuning constant for caroll weights. defaults to 8.
#'
#' @return a robglm object
#' @export
#'
robglm <- function(formula, data, family = "binomial", method = c("mallows","none","schweppe","welsch","carroll"),  k = 1.345, cw = 8){
  if (class(family)=="family"){
    family <- family
  }
  else{
    family <- switch(family,
                     "binomial" = binomial(),
                     "poisson" = poisson(),
                     "quasibinomial" = quasibinomial(),
                     "quasipoisson" = quasipoisson(),
                     "gamma" = Gamma(),
                     "Gamma" = Gamma(),
                     "gaussian" = gaussian()
                     )
  }
  if (is.null(weights)) weights <- rep(1,nrow(data))
  method <- match.arg(method);
  x <- model.matrix(formula, data)
  y <- model.response(model.frame(formula, data))
  if (family$family == "binomial" || family$family == "quasibinomial"){
    if (!is.numeric(y)){y <- as.numeric(as.factor(y)) - 1}
    if (min(y)!=0){y <- y -1}
  }
  out <- .Mqle(x, y, weights=rep(1,length(y)), family=family, k=k, cw=cw, xwts=method)
  return(out)
}


#' Bootstrapping Robust Generalized Linear Models
#'
#' @param formula A model formula
#' @param data A data frame
#' @param family One of "binomial", "poisson", "Gamma", "gaussian".
#' @param method The method to be used to calculate weights for the design matrix. One of the options below. The default is "mallows". \cr \cr
#' - "none" uses equal weights for all rows of the design matrix \cr
#' - "mallows" uses the diagonal of the hat matrix to calculate leverage weights as sqrt(1-h) \cr
#' - "carroll" uses the sqrt(mah.dist/ p) as a statistic to feed into the psi function \eqn{\psi = u * ((1-(u/cw)^2)^3 * I(u) <= cw)} to generate weights as \eqn{\psi(u)/u}. \cr
#' - "schweppe" multiplies the mallows weights by psi-weights derived from robustly standardized deviance residuals
#' from an initial fit, i.e., sqrt(1-h)*psi((r-median(r))/mad(r)). Schweppe's weights prevent high-leverage observations
#' that are not associated with large residuals (good leverage points) from being downweighted. This results in increased
#' efficiency. \cr
#' - "welsch" is similar to Mallow's weights, but Welsch's weights are calculated as (1-h)/sqrt(h).
#' @param k Tuning constant for Huber's psi. defaults to 1.345.
#' @param cw Tuning constant for caroll weights. defaults to 8.
#' @param parallel Whether or not to use parallel processing. Defaults to TRUE.
#' @param ncores Number of cores to use. If not supplied, a cluster on the local machine consisting of the number of system cores minus one is created for the duration of the boot call.
#' @param iter The number of bootstrap replicates. Defaults to 1000.
#' @param conf.level the confidence level for computing the bootstrap confidence intervals. defaults to 0.95.
#' @return a roblm object containing the contents of either a "boot" or a "kernelboot" object.
#' @export
#'
#' @return a robglm object
#' @export
#'
robglm.boot <- function(formula, data, family = "binomial", method = c("mallows", "none", "schweppe", "welsch", "carroll"), k = 1.345, cw = 8, parallel = TRUE, ncores = 4, iter = 1000, conf.level = 0.95){

  this.call <- match.call()
  if (class(family)=="family"){
    family <- family
  }
  else{
    family <- switch(family,"binomial" = binomial(),"poisson" = poisson(),"quasibinomial" = quasibinomial(),
                     "quasipoisson" = quasipoisson(),"gamma" = Gamma(),"Gamma" = Gamma(),"gaussian" = gaussian())
  }
  method <- match.arg(method)
  X = model.matrix(formula, data)
  y <- model.response(model.frame(formula, data))
  if (family$family == "binomial" || family$family == "quasibinomial"){
    if (!is.numeric(y)){y <- as.numeric(as.factor(y)) - 1}
    if (min(y)!=0){y <- y -1}
    tmp <- colnames(model.frame(formula, data))[1]
    wch <- which(colnames(data)==tmp)
    data[,wch] <- y
  }
   make.bootfun <- function(Formula, Family, Khub, CW, Method, Start, Mustart, Etastart){
    function(dat, idx){
      coef  <- .MqleFastBoot(formula=Formula,data=dat[idx,],family=Family,k=Khub,cw=CW,xwts=Method,start=Start,mustart=Mustart[idx],etastart=Etastart[idx])
      return(coef)
    }
  }


  suppressPackageStartupMessages(require(parallel))
  suppressPackageStartupMessages(require(doParallel))
  if (parallel){
    if (is.null(ncores)) {ncores <- parallel::detectCores()}
    cl <- parallel::makeCluster(ncores)
    doParallel::registerDoParallel(cl)
    parallel <- "multicore"
  }
  else{
    cl <- NA
    parallel <- "no"
  }

  initial <- robglm(formula, data, family, method,k, cw)
  varnames <- names(initial$coefficients)
  bootfun <- make.bootfun(formula, family, k, cw, method, Start = initial$coefficients, Mustart = initial$fitted, Etastart = initial$linear.predictor)
  out <- try(boot::boot(data, bootfun, R = iter, parallel = parallel, cl = cl), silent = TRUE)
  if (class(out) == "try-error"){
    suppressWarnings(suppressMessages(parallel::stopCluster(cl)))
    return()
  }
  else{
    suppressWarnings(suppressMessages(parallel::stopCluster(cl)))
  }
  out$coefficients <- as.vector(out$t0) ; names(out$coefficients)  <- varnames
  confidence.intervals <- sapply(1:ncol(out$t), function(i) bci(out$t[,i], conf.level))
  out$p.values <- sapply(1:ncol(out$t),function(i) cvreg:::exact_pvalue(out$t[,i]))
  out$std.err <- sapply(1:ncol(out$t), function(x) sqrt(mean((out$t[,x]-mean(out$t[,x]))^2)))
  out$vcov <- cor2cov(cor(out$t), out$std.err^2)
  confidence.intervals <- t(confidence.intervals)
  colnames(confidence.intervals) <- c(paste0("lower ", 100*conf.level, "% CI" ), paste0("upper ", 100*conf.level, "% CI"))
  names(out$coefficients) <- names(out$std.err) <- names(out$p.values) <- rownames(out$vcov) <- colnames(out$vcov) <-varnames
  out$fitted <- initial$fitted
  out$linear.predictor <- initial$linear.predictor
  out$dispersion <- initial$dispersion
  out$confidence.intervals <- confidence.intervals
  out$model.matrix = model.matrix(formula, data)
  out$model.frame = model.frame(formula, data)
  out$terms = terms(model.frame(formula, data))
  out$formula = formula; out$family <- family; out$call = this.call
  class(out) <- "robglm"
  return(out)
}


.biasreducedglm <- function (x, y, weights = rep(1, nobs), start = NULL, etastart = NULL,
                             mustart = NULL, offset = rep(0, nobs), family = gaussian(),
                             control = list(), intercept = TRUE, singular.ok = TRUE){


  has_fixed_totals <- fixed_totals <- FALSE
  ok_links <- c("logit", "probit", "cauchit", "cloglog", "identity", "log", "1/mu^2", "sqrt", "inverse")
  if (isTRUE(family$family %in% c("quasi", "inverse.gaussian", "negative.binomial"))) {
    stop("'quasi', 'inverse.gaussian', 'negative.binomial' not supported")
  }

  variance <- family$variance
  d1variance <- .maked1variance(family$family)
  linkinv <- family$linkinv
  linkfun <- family$linkfun
  linkname <- family$link
  if (!is.function(variance) || !is.function(linkinv))
    stop("'family' argument seems not to be a valid family object",
         call. = FALSE)
  dev.resids <- family$dev.resids
  aic <- family$aic
  mu.eta <- family$mu.eta
  d2mu.deta <- .maked2mu.deta(family$family, family$link)
  d1afun <- .maked1afun(family$family)
  d2afun <- .maked2afun(family$family)
  d3afun <- .maked3afun(family$family)
  simulate <- family$simulate
  d1_transformed_dispersion <- D(expression(dispersion),"dispersion")
  d2_transformed_dispersion <- D(expression(dispersion),"dispersion")



  ## key_quantities, grad, info and bias are ALWAYS in beta, dispersion parameterization
  key_quantities <- function(pars, y, level = 0, scale_totals = FALSE, qr = TRUE) {
    betas <- pars[seq.int(nvars)]
    dispersion <- pars[nvars + 1]
    prec <- 1/dispersion
    etas <- drop(x %*% betas + offset)
    mus <- linkinv(etas)
    if (scale_totals) {
      ## Rescale mus
      mus_totals <-  as.vector(tapply(mus, fixed_totals, sum))[fixed_totals]
      mus <- mus * row_totals / mus_totals
      etas <- linkfun(mus)
    }
    out <- list(precision = prec,
                betas = betas,
                dispersion = dispersion,
                etas = etas,
                mus = mus,
                scale_totals = scale_totals)
    mean_quantities <- function(out) {
      d1mus <- mu.eta(etas)
      d2mus <- d2mu.deta(etas)
      varmus <- variance(mus)
      working_weights <- weights * d1mus^2 / varmus
      out$wx <- wx <- sqrt(working_weights) * x
      out$d1mus <- d1mus
      out$d2mus <- d2mus
      out$varmus <- varmus
      out$d1varmus <- d1variance(mus)
      out$working_weights <- working_weights
      if (qr) out$qr_decomposition <- qr(wx)
      out
    }
    dispersion_quantities <- function(out) {
      zetas <- -weights * prec
      out$zetas <- zetas
      d1afuns <- d2afuns <- d3afuns <- rep(NA_real_, nobs)
      d1afuns[keep] <- d1afun(zetas[keep])
      if (family$family == "Gamma") d1afuns <- d1afuns - 2
      d2afuns[keep] <- d2afun(zetas[keep])
      d3afuns[keep] <- d3afun(zetas[keep])
      out$d2afuns <- d2afuns
      out$d3afuns <- d3afuns
      out$deviance_residuals <- dev.resids(y, mus, weights)
      out$Edeviance_residuals <- weights * d1afuns
      out
    }
    if (level == 0) {
      out <- mean_quantities(out)
    }
    if (level == 1) {
      out <- dispersion_quantities(out)
    }
    if (level > 1) {
      out <- mean_quantities(out)
      out <- dispersion_quantities(out)
    }
    out
  }

  gradient <- function(pars, level = 0, fit = NULL) {
    if (is.null(fit)) {
      fit <- key_quantities(pars, y = y, level = level, qr = FALSE)
    }
    with(fit, {
      if (level == 0) {
        score_components <- weights * d1mus  * (y - mus) / varmus * x
        return(precision * .colSums(score_components, nobs, nvars, TRUE))
      }
      if (level == 1) {
        return(1/2 * precision^2 * sum(deviance_residuals - Edeviance_residuals, na.rm = TRUE))
      }
    })
  }

  information <- function(pars, level = 0, fit = NULL, inverse = FALSE) {
    if (is.null(fit)) {
      fit <- key_quantities(pars, y = y, level = level, qr = TRUE)
    }
    with(fit, {
      if (level == 0) {
        if (inverse) {
          return(dispersion * cvreg:::.ridgeinv(crossprod(wx)))
        }
        else {
          return(precision * crossprod(wx))
        }
      }
      if (level == 1) {
        info <- 0.5 * sum(weights^2 * d2afuns, na.rm = TRUE)/dispersion^4
        if (inverse) {
          return(1/info)
        }
        else {
          return(info)
        }
      }
    })
  }

  hat_values <- function(pars, fit = NULL) {
    if (is.null(fit)) {
      fit <- key_quantities(pars, y = y, level = 0, qr = TRUE)
    }
    with(fit, {
      Qmat <- qr.Q(qr_decomposition)
      .rowSums(Qmat * Qmat, nobs, nvars, TRUE)
    })
  }

  estimate_dispersion <- function(betas, y) {
    if (no_dispersion) {
      disp <- 1
      dispML <- 1
    }
    else {
      if (df_residual > 0) {
        dispFit <- try(cvreg::saferoot(f = function(phi) {
          theta <- c(betas, phi)
          cfit <- key_quantities(theta, y = y, level = 1, qr = FALSE)
          gradient(theta, level = 1, fit = cfit)
        }, lower = .Machine$double.eps, upper = 10000, tol = 1e-05), silent = TRUE)
        if (inherits(dispFit, "try-error")) {
          dispML <- var(y)/variance(sum(weights * y)/sum(weights))
          disp <- var(y)/variance(sum(weights * y)/sum(weights))
        }
        else {
          disp <- dispML <- dispFit$root
        }
      }
      else { ## if the model is saturated dispML is NA_real_
        disp <- 1 ## A convenient value
        dispML <- NA_real_
      }
    }
    list(dispersion = disp)
  }


  adjustment_function <- function(pars, level = 0, fit = NULL) {
    if (is.null(fit)) {
      fit <- key_quantities(pars, y = y, level = level, qr = TRUE)
    }
    with(fit, {
      if (level == 0) {
        hatvalues <- hat_values(pars, fit = fit)
        ## Use only observations with keep = TRUE to ensure that no division with zero takes place
        return(2 * 0.5 * .colSums(0.5 * hatvalues * (2 * d2mus/d1mus - d1varmus * d1mus / varmus) * x, nobs, nvars, TRUE))
      }
      if (level == 1) {
        s1 <- sum(weights^3 * d3afuns, na.rm = TRUE)
        s2 <- sum(weights^2 * d2afuns, na.rm = TRUE)
        return(2 * 0.5 * (-(nvars + 4)/(2 * dispersion) + s1/(2 * dispersion^2 * s2)))
      }
    })
  }

  compute_step_components <- function(pars, level = 0, fit = NULL) {
    if (is.null(fit)) {
      fit <- key_quantities(pars, y = y, level = level, qr = TRUE)
    }
    if (level == 0) {
      grad <-  gradient(pars, fit = if (has_fixed_totals) NULL else fit, level = 0)
      inverse_info <- try(information(pars, inverse = TRUE, fit = fit, level = 0))
      failed_inversion <- inherits(inverse_info, "try-error")
      adjustment <- adjustment_function(pars, fit = fit, level = 0)
      failed_adjustment <- any(is.na(adjustment))
    }
    if (level == 1) {
      if (no_dispersion | df_residual < 1) {
        grad <- adjustment <- inverse_info <- NA_real_
        failed_adjustment <- failed_inversion <- FALSE
      }
      else {
        d1zeta <- eval(d1_transformed_dispersion)
        d2zeta <- eval(d2_transformed_dispersion)
        grad <-  gradient(theta, fit = fit, level = 1)/d1zeta
        inverse_info <- 1/information(theta, inverse = FALSE, fit = fit, level = 1) * d1zeta^2
        failed_inversion <- !is.finite(inverse_info)
        adjustment <- adjustment_function(theta, fit = fit, level = 1)/d1zeta - 0.5 * d2zeta / d1zeta^2
        failed_adjustment <- is.na(adjustment)
      }
    }
    out <- list(grad = grad,
                inverse_info = inverse_info,
                adjustment = adjustment,
                failed_adjustment = failed_adjustment,
                failed_inversion = failed_inversion)
    out
  }

  is_correction <- TRUE
  no_dispersion <- family$family %in% c("poisson", "binomial","quasipoisson", "quasibinomial")
  no_final_dispersion <- family$family %in% c("poisson", "binomial")
  x <- as.matrix(x)
  betas_names <- dimnames(x)[[2L]]
  ynames <- if (is.matrix(y)) rownames(y) else names(y)
  converged <- FALSE
  nobs <- NROW(y)
  nvars <- ncol(x)
  EMPTY <- nvars == 0
  if (is.null(weights)) {
    weights <- rep.int(1, nobs)
  }
  if (missing_offset <- is.null(offset)) {
    offset <- rep.int(0, nobs)
  }

  unless_null <- function(x, if_null) {
    if (is.null(x)) {
      if_null
    }
    else {
      x
    }
  }

  ## Check for invalid etas and mus
  valid_eta <- unless_null(family$valideta, function(eta) TRUE)
  valid_mu <- unless_null(family$validmu, function(mu) TRUE)

  ## Initialize as prescribed in family
  eval(family$initialize)

  ## If there are no covariates in the model then evaluate only the offset
  if (EMPTY) {
    etas <- rep.int(0, nobs) + offset
    if (!valid_eta(etas))
      stop("invalid linear predictor values in empty model", call. = FALSE)
    mus <- linkinv(etas)
    if (!valid_mu(mus))
      stop("invalid fitted means in empty model", call. = FALSE)
    ## deviance <- sum(dev.resids(y, mus, weights))
    working_weights <- ((weights * mu.eta(etas)^2)/variance(mus))^0.5
    residuals <- (y - mus)/mu.eta(etas)
    keep <- rep(TRUE, length(residuals))
    boundary <- converged <- TRUE
    betas_all <- numeric()
    rank <- 0
    iter <- 0L
  }
  else {
    boundary <- converged <- FALSE
    ## Detect aliasing
    qrx <- qr(x)
    rank <- qrx$rank
    is_full_rank <- rank == nvars

    if (!singular.ok && !is_full_rank) {
      stop("singular fit encountered")
    }
    if (!isTRUE(is_full_rank)) {
      aliased <- qrx$pivot[seq.int(qrx$rank + 1, nvars)]
      X_all <- x
      x <- x[, -aliased]
      nvars_all <- nvars
      nvars <- ncol(x)
      betas_names_all <- betas_names
      betas_names <- betas_names[-aliased]
    }
    else {
      nvars_all <- nvars
      betas_names_all <- betas_names
    }
    betas_all <- structure(rep(NA_real_, nvars_all), .Names = betas_names_all)
    keep <- weights > 0
    ## Check for zero weights
    ## if (any(!keep)) {
    ##     warning("Observations with non-positive weights have been omited from the computations")
    ## }
    nkeep <- sum(keep)
    df_residual <- nkeep - rank
    ## Handle starting values
    ## If start is NULL then start at the ML estimator else use start
    if (is.null(start)) {
      ## Adjust counts if binomial or Poisson in order to avoid infinite estimates
      adj <- nvars/nobs
      if (family$family == "binomial" || family$family=="quasibinomial") {
        weights.adj <- weights + (!(is_correction)) * adj
        y.adj <- (weights * y + (!(is_correction)) * 0.5 * adj)/weights.adj
      }
      else {
        weights.adj <- weights
        y.adj <- y + if (family$family == "poisson"|| family$family=="quasipoisson" || family$family=="Gamma") (!(is_correction)) * 0.5 * adj else 0
      }
      ## ML fit to get starting values
      warn <- getOption("warn")
      ## Get startng values and kill warnings whilst doing that
      options(warn = -1)
      if (family$family == "binomial"|| family$family=="quasipoisson"){Link<-make.link(linkname);family2 <- quasibinomial(link=Link)}
      else if (family$family == "poisson"|| family$family=="quasipoisson"){Link<-make.link(linkname);family2 <- quasipoisson(link=Link)}
      else {family2 <- family}
      tempFit <- glm.fit(x = x, y = y.adj, weights = weights.adj,
                         etastart = etastart, mustart = mustart,
                         offset = offset, family = family2, start = start,
                         control = list(epsilon = 1e-04,
                                        maxit = 400, trace = FALSE),
                         intercept = intercept)

      ## Set warn to its original value
      #options(warn = warn)
      betas <- coef(tempFit)
      names(betas) <- betas_names
      dispList <- estimate_dispersion(betas, y = y)
      dispersion <- dispList$dispersion
      if (is.na(dispersion)) dispersion <- var(y)/variance(sum(weights * y)/sum(weights))
      transformed_dispersion <- D(expression(dispersion),"dispersion")
    }
    else {
      if ((length(start) == nvars_all) & is.numeric(start)) {
        betas_all <- start
        names(betas_all) <- betas_names_all
        if (!isTRUE(is_full_rank)) {
          betas_all[aliased] <- NA_real_
          betas <- betas_all[-aliased]
        }
        else {
          betas <- betas_all
        }
        ## Estimate dispersion based on current value for betas
        dispList <- estimate_dispersion(betas, y = y)
        dispersion <- dispList$dispersion
        if (is.na(dispersion)) dispersion <- var(y)/variance(sum(weights * y)/sum(weights))
        transformed_dispersion <- D(expression(dispersion),"dispersion")
      }
      if ((length(start) == nvars_all + 1) & is.numeric(start)) {
        betas_all <- start[seq.int(nvars_all)]
        names(betas_all) <- betas_names_all
        if (!isTRUE(is_full_rank)) {
          betas_all[aliased] <- NA_real_
          betas <- betas_all[-aliased]
        }
        else {
          betas <- betas_all
        }
        transformed_dispersion <- start[nvars_all + 1]
        dispersion <- 1
      }
      if (length(start) > nvars_all + 1 | length(start) < nvars_all) {
        stop(paste(paste(gettextf("length of 'start' should be equal to %d and correspond to initial betas for %s", nvars_all, paste(deparse(betas_names_all), collapse = ", "), "or", gettextf("to %d and also include a starting value for the transformed dispersion", nvars_all + 1)))), domain = NA_real_)
      }
    }

    adjusted_grad_all <- rep(NA_real_, nvars_all + 1)
    names(adjusted_grad_all) <- c(betas_names_all, "Transformed dispersion")
    theta <- c(betas, dispersion)
    transformed_dispersion <- D(expression(dispersion),"dispersion")
    quantities <- key_quantities(theta, y = y, level = 2 * !no_dispersion, scale_totals = FALSE, qr = TRUE)
    step_components_beta <- compute_step_components(theta, level = 0, fit = quantities)
    step_components_zeta <- compute_step_components(theta, level = 1, fit = quantities)
    if (step_components_beta$failed_inversion) {
      warning("failed to invert the information matrix")
    }
    if (step_components_beta$failed_adjustment) {
      warning("failed to calculate score adjustment")
    }
    adjusted_grad_beta <- with(step_components_beta, {
      grad + adjustment
    })
    step_beta <- drop(step_components_beta$inverse_info %*% adjusted_grad_beta)
    ## Dispersion quantities
    if (no_dispersion) {
      adjusted_grad_zeta <- step_zeta <- NA_real_
    }
    else {
      if (step_components_zeta$failed_inversion) {
        warning("failed to invert the information matrix")
      }
      if (step_components_zeta$failed_adjustment) {
        warning("failed to calculate score adjustment")
      }
      adjusted_grad_zeta <- with(step_components_zeta, {
        grad + adjustment
      })
      step_zeta <- as.vector(adjusted_grad_zeta * step_components_zeta$inverse_info)
    }

    ## Main iterations
    slowit <- 1
    for (iter in seq.int(1000)) {
      step_factor <- 0
      testhalf <- TRUE
      ## Inner iteration
      while (testhalf & step_factor < 15) {
        step_beta_previous <- step_beta
        step_zeta_previous <- step_zeta
        ## Update betas
        betas <- betas + 1 * 2^(-step_factor) * step_beta
        ## Update zetas
        if (!no_dispersion & df_residual > 0) {
          transformed_dispersion <- D(expression(dispersion),"dispersion")
          transformed_dispersion <- transformed_dispersion + 2^(-step_factor) * step_zeta
          dispersion <- D(expression(dispersion),"dispersion")
        }
        ## Compute key quantities
        theta <- c(betas, dispersion)
        transformed_dispersion <- D(expression(dispersion),"dispersion")
        ## Mean quantities
        quantities <- key_quantities(theta, y = y, level = 2 * !no_dispersion, scale_totals = has_fixed_totals, qr = TRUE)
        step_components_beta <- compute_step_components(theta, level = 0, fit = quantities)
        step_components_zeta <- compute_step_components(theta, level = 1, fit = quantities)
        if (failed_inversion_beta <- step_components_beta$failed_inversion) {
          warning("failed to invert the information matrix")
          break
        }
        if (failed_adjustment_beta <- step_components_beta$failed_adjustment) {
          warning("failed to calculate score adjustment")
          break
        }
        adjusted_grad_beta <- with(step_components_beta, {
          grad + adjustment
        })
        step_beta <- drop(step_components_beta$inverse_info %*% adjusted_grad_beta)
        ## Dispersion quantities
        if (no_dispersion) {
          adjusted_grad_zeta <- step_zeta <- NA_real_
          failed_inversion_zeta <- failed_adjustment_zeta <- FALSE
        }
        else {
          if (failed_inversion_zeta <- step_components_zeta$failed_inversion) {
            warning("failed to invert the information matrix")
            break
          }
          if (failed_adjustment_zeta <- step_components_zeta$failed_adjustment) {
            warning("failed to calculate score adjustment")
            break
          }
          adjusted_grad_zeta <- with(step_components_zeta, {
            grad + adjustment
          })
          step_zeta <- as.vector(adjusted_grad_zeta * step_components_zeta$inverse_info)
        }
        ## Continue inner loop
        if (step_factor == 0 & iter == 1)  {
          testhalf <- TRUE
        }
        else {
          s2 <- c(abs(step_beta), abs(step_zeta))
          s1 <- c(abs(step_beta_previous), abs(step_zeta_previous))
          testhalf <- sum(s2, na.rm = TRUE) > sum(s1, na.rm = TRUE)
        }
        step_factor <- step_factor + 1
      }
      failed <- failed_adjustment_beta | failed_inversion_beta | failed_adjustment_zeta | failed_inversion_zeta
      if (failed | sum(abs(c(step_beta, step_zeta)), na.rm = TRUE) < 1e-05) {
        break
      }
    }
  }

  adjusted_grad_all[betas_names] <- adjusted_grad_beta
  adjusted_grad_all["Transformed dispersion"] <- adjusted_grad_zeta
  betas_all[betas_names] <- betas

  ## Convergence analysis
  if ((failed | iter >= 1000) & !(is_correction)) {
    warning("algorithm did not converge", call. = FALSE)
    converged <- FALSE
  }
  else {
    converged <- TRUE
  }


  ## QR decomposition and fitted values are at the final value
  ## for the coefficients
  ## QR decomposition for cov.unscaled
  if (!isTRUE(is_full_rank)) {
    x <- X_all
    betas <- betas_all
    betas[is.na(betas)] <- 0
    nvars <- nvars_all
  }

  ## If has_fixed_totals = TRUE, then scale fitted values before
  ## calculating QR decompositions, fitted values, etas,
  ## residuals and working_weights

  quantities <- key_quantities(c(betas, dispersion), y = y, level = 2 * !no_dispersion, scale_totals = has_fixed_totals, qr = TRUE)
  if (!no_dispersion) {info_transformed_dispersion <- 1/step_components_zeta$inverse_info}

  eps <- 10 * .Machine$double.eps



  mus <- quantities$mus
  etas <- quantities$etas

  if (family$family == "binomial") {
    if (any(mus > 1 - eps) || any(mus < eps)) {
      warning("fitted probabilities numerically 0 or 1 occurred", call. = FALSE)
      boundary <- TRUE
    }
  }
  if (family$family == "poisson") {
    if (any(mus < eps)) {
      warning("fitted rates numerically 0 occurred", call. = FALSE)
      boundary <- TRUE
    }
  }
  if (df_residual == 0 & !no_dispersion) {
    dispersion <- NA_real_
  }

  results <- list()
  results$coefficients <- betas
  results$fitted.values <- mus
  results$linear.predictor <- etas
  results$residuals <- with(quantities, (y - mus)/d1mus)
  results$deviance.residuals <- family$dev.res(y, mus, rep(1, length(y)))
  results$deviance <- sum(results$deviance.residuals^2)
  results$weights <- quantities$working_weights
  results$prior.weights <- weights
  if (!no_final_dispersion){
    results$dispersion <- sum((quantities$working_weights * results$residuals^2)[results$weights > 0])/df_residual
  }
  else{
    results$dispersion <- 1
  }
  wt <- rep.int(0, nobs)
  wt[keep] <- quantities$working_weights[keep]
  W.X <- x * wt
  results$vcov.unscaled <- pseudoinverse(crossprod(W.X))
  results$vcov <- results$vcov.unscaled  * results$dispersion
  results$std.err <- sqrt(diag(results$vcov))
  results$qr <- try(qr(W.X), silent = TRUE)
  results$df.residuals <- df_residual
  results$family <- family
  results$rank <- rank
  return(results)

}

.Mqle <- function (X, y, weights = NULL, start = NULL, mustart = NULL, etastart = NULL, offset = NULL, family, k = 1.345, cw=8, xwts=c("mallows","none","schweppe","welsch","carroll"), intercept = TRUE, trace = FALSE){
  family <- family
  if (!is.matrix(X))
    X <- as.matrix(X)
  names <- colnames(X)
  nobs <- NROW(y)
  stopifnot(nobs == nrow(X))
  if (is.null(weights)) {weights <- rep.int(1, nobs)}
  else if (any(weights < 0)){stop("All weights must be positive")}
  if (is.null(offset))
    offset <- rep.int(0, nobs)
  else if (!all(offset == 0))
    warning("'offset' not fully implemented")
  variance <- family$variance
  linkinv <- family$linkinv
  if (!is.function(variance) || !is.function(linkinv))
    stop("illegal 'family' argument")
  mu.eta <- family$mu.eta
  if (is.null(valideta <- family$valideta))
    valideta <- function(eta) TRUE
  if (is.null(validmu <- family$validmu))
    validmu <- function(mu) TRUE
  devres <- family$dev.res
  ncoef <- ncol(X)
  if (xwts == "welsch"){
    h <- hat(X, intercept = intercept)
    w.x <- (1 - h) / sqrt(h)
  }
  else if (xwts == "mallows"){
    h <- hat(X, intercept = intercept)
    w.x <- sqrt(1 - h)
  }
  else if (xwts == "schweppe"){
    h <- hat(X, intercept = intercept)
    w.x <- 1 - h
    r <- cvreg:::.biasreducedglm(x = X, y = y, weights = weights, offset = offset, family = family, intercept = intercept)$deviance.residuals
    r <- (r-hdmedian(r)) / (1.4826*hdmedian(abs(r-hdmedian(r))))
    w.x <- sqrt(w.x * MASS::psi.huber(r))
  }
  else if (xwts == "carroll"){
    rdx <- cvreg::cov.bacon(X)$dist
    rdx <- sqrt(rdx/ncoef)
    w.x <- (1 - (rdx/cw)^2)^3 * (abs(rdx) <= cw)
  }
  else if (xwts == "none"){
    w.x <- rep(1, nrow(X))
  }
  stopifnot(1000 >= 1, (k <- k) >= 0)
  eval(family$initialize)
  ni <- as.vector(weights)
  if (is.null(start)){
    if (family$family=="binomial"){
      if(all(weights) != 1){
        family2 <- quasibinomial()
      }
      else{
        family2 <- binomial()
      }
    }
    if (family$family=="poisson"){
      if(all(weights) != 1){
        family2 <- quasipoisson()
      }
      else{
        family2 <- poisson()
      }
    }
    else{
      family2 <- family
    }
    start <- cvreg:::.biasreducedglm(x = X, y = y, weights = weights, offset = offset, family = family2, intercept = intercept)$coefficients
    if (any(ina <- is.na(start))) {
      cat("initial start 'theta' has NA's; eliminating columns X[, j];",
          "j = ", pasteK(which(ina)), "\n")
      theta.na <- start
      X <- X[, !ina, drop = FALSE]
      start <- cvreg:::.biasreducedglm(x = X, y = y, weights = weights, offset = offset, family = family2, intercept = intercept)$coefficients
      if (any(is.na(start)))
        stop("start 'theta' has still NA's .. badly singular x\n")
      ncoef <- length(start)
    }
  }
  k <- k
  thetaOld <- theta <- as.vector(start)
  eta <- as.vector(X %*% theta)
  mu <- linkinv(eta)
  if (!(validmu(mu) && valideta(eta)))
    stop("Cannot find valid starting values: You need help")
  switch(family$family,
  binomial = {
    Epsi.init <- expression({
      pK <- pbinom(K, ni, mu);pH <- pbinom(H, ni, mu);pKm1 <- pbinom(K - 1, pmax.int(0, ni - 1), mu)
      pHm1 <- pbinom(H - 1, pmax.int(0, ni - 1), mu);pKm2 <- pbinom(K - 2, pmax.int(0, ni - 2), mu)
      pHm2 <- pbinom(H - 2, pmax.int(0, ni - 2), mu); QlV <- mu/Vmu * (mu * ni * (pK - pH) + (1 - 2 * mu * ni) * ifelse(ni == 1, (H <= 0) * (K >= 1), pKm1 - pHm1) + (ni - 1) * mu * ifelse(ni == 2, (H <= 1) * (K >= 2), pKm2 - pHm2))
    })
    Epsi <-  expression({k*(1-pK-pH)+ifelse(ni==1,(-(H<0)+(K>=1))*sV, (pKm1-pHm1-pK+pH)*mu*sni/sV)})
    EpsiS <- expression({mu/Vmu * (k * (pH - ifelse(ni == 1, H >= 1, pHm1)) + k *(pK - ifelse(ni == 1, K > 0, pKm1))) + ifelse(ni == 0, 0, QlV/(sni * sV))})
    Epsi2 <- expression({k^2 * (pH + 1 - pK) + QlV})
    phiEst <- phiEst.cl <- expression({1})
  },
  poisson = {
    Epsi.init <- expression({dpH <- dpois(H, mu);dpH1 <- dpois(H - 1, mu);dpK <- dpois(K, mu);dpK1 <- dpois(K - 1, mu)
    pHm1 <- ppois(H - 1, mu);pH <- pHm1 + dpH; pKm1 <- ppois(K - 1, mu);pK <- pKm1 + dpK
    E2f <- mu * (dpH1 - dpH - dpK1 + dpK) + pKm1 - pHm1
    })
    Epsi <- expression({k * (1 - pK - pH) + mu * (dpH - dpK)/sV})
    EpsiS <- expression({k * (dpH + dpK) + E2f/sV})
    Epsi2 <- expression({k^2 * (pH + 1 - pK) + E2f})
    phiEst <- phiEst.cl <- expression({1})
  },
  quasibinomial = {
    Epsi.init <- expression({
      pK <- pbinom(K, ni, mu);pH <- pbinom(H, ni, mu);pKm1 <- pbinom(K - 1, pmax.int(0, ni - 1), mu)
      pHm1 <- pbinom(H - 1, pmax.int(0, ni - 1), mu);pKm2 <- pbinom(K - 2, pmax.int(0, ni - 2), mu)
      pHm2 <- pbinom(H - 2, pmax.int(0, ni - 2), mu); QlV <- mu/Vmu * (mu * ni * (pK - pH) + (1 - 2 * mu * ni) * ifelse(ni == 1, (H <= 0) * (K >= 1), pKm1 - pHm1) + (ni - 1) * mu * ifelse(ni == 2, (H <= 1) * (K >= 2), pKm2 - pHm2))
    })
    Epsi <-  expression({k*(1-pK-pH)+ifelse(ni==1,(-(H<0)+(K>=1))*sV, (pKm1-pHm1-pK+pH)*mu*sni/sV)})
    EpsiS <- expression({mu/Vmu * (k * (pH - ifelse(ni == 1, H >= 1, pHm1)) + k *(pK - ifelse(ni == 1, K > 0, pKm1))) + ifelse(ni == 0, 0, QlV/(sni * sV))})
    Epsi2 <- expression({k^2 * (pH + 1 - pK) + QlV})
    phiEst <- expression({1})
    phiEst.cl <- expression({cvreg:::.biasreducedglm(x = X, y = y, weights = weights, offset = offset, family = family, intercept = intercept)$dispersion})
  },
  quasipoisson = {
    Epsi.init <- expression({dpH <- dpois(H, mu);dpH1 <- dpois(H - 1, mu);dpK <- dpois(K, mu);dpK1 <- dpois(K - 1, mu)
    pHm1 <- ppois(H - 1, mu);pH <- pHm1 + dpH; pKm1 <- ppois(K - 1, mu);pK <- pKm1 + dpK
    E2f <- mu * (dpH1 - dpH - dpK1 + dpK) + pKm1 - pHm1
    })
    Epsi <- expression({k * (1 - pK - pH) + mu * (dpH - dpK)/sV})
    EpsiS <- expression({k * (dpH + dpK) + E2f/sV})
    Epsi2 <- expression({k^2 * (pH + 1 - pK) + E2f})
    phiEst <- expression({1})
    phiEst.cl <- expression({cvreg:::.biasreducedglm(x = X, y = y, weights = weights, offset = offset, family = family, intercept = intercept)$dispersion})
  },
  gaussian = {
    Epsi.init <- expression({dc <- dnorm(k); pc <- pnorm(k)})
    Epsi <- expression(0)
    EpsiS <- expression(2 * pc - 1)
    Epsi2 <- expression(2 * k^2 * (1 - pc) - 2 * k * dc + 2 * pc - 1)
    phiEst.cl <- expression({sum(((y - mu)/mu)^2)/(nobs - ncoef)})
    phiEst <- expression({sphi <- mad(residP, center = 0)^2})
  },
  Gamma = {
    Epsi.init <- expression({
      nu <- 1/phi;snu <- 1/sqrt(phi);pPtc <- pgamma(snu + c(-k, k), shape = nu, rate = snu)
      pMtc <- pPtc[1];pPtc <- pPtc[2];aux2 <- k * snu;GLk <- .Gmn(-k, nu);GUk <- .Gmn(k, nu)
    })
    Epsi <- expression(k * (1 - pPtc - pMtc) + GLk - GUk)
    EpsiS <- expression(((GLk - GUk) + snu * (pPtc - pMtc))/mu)
    Epsi2 <- expression({(k^2 * (pMtc + 1 - pPtc) + (pPtc - pMtc) + (GLk * (1 - aux2) - GUk * (1 + aux2))/snu)})
    phiEst.cl <- expression({sum(((y - mu)/mu)^2)/(nobs - ncoef)})
    phiEst <- expression({sum(((y - mu)/mu)^2)/(nobs - ncoef)})
  }, stop(gettextf("family '%s' not yet implemented", family$family),
          domain = NA))
  sV <- NULL
  comp.V.resid <- expression({
    Vmu <- variance(mu)
    if (any(is.na(Vmu))) stop("NAs in V(mu)")
    if (any(Vmu == 0)) stop("0s in V(mu)")
    sVF <- sqrt(Vmu)
    residP <- (y - mu) * sni/sVF
  })
  comp.scaling <- expression({
    sV <- sVF * sqrt(phi)
    residPS <- residP/sqrt(phi)
  })
  comp.Epsi.init <- expression({
    dmu.deta <- mu.eta(eta)
    if (any(is.na(dmu.deta))) stop("NAs in d(mu)/d(eta)")
    H <- floor(mu * ni - k * sni * sV)
    K <- floor(mu * ni + k * sni * sV)
    eval(Epsi.init)
  })

  sni <- sqrt(ni)
  eval(comp.V.resid)
  phi <- eval(phiEst.cl)
  Rphi <- max(c(1e-4, 3 * hdmedian(abs(residP)))^2)
  conv <- FALSE
  if (ncoef)
    for (nit in 1:250) {
      eval(comp.scaling)
      eval(comp.Epsi.init)
      cpsi <- pmax.int(-k, pmin.int(residPS, k)) - eval(Epsi)
      EEq <- colMeans(cpsi * w.x * sni/sV * dmu.deta * X)
      DiagB <- eval(EpsiS)/(sni * sV) * w.x * (ni * dmu.deta)^2
      if (any(n0 <- ni == 0))
        DiagB[n0] <- 0
      Dtheta <- cvreg:::.ridgeinv(crossprod(X, DiagB * X)/nobs) %*% EEq
      if ((family$family=="binomial" || family$family =="quasibinomial") && (any(abs(Dtheta) > 8)) && all(is.finite(Dtheta))){
        wch <- which(abs(Dtheta) > 8); Dtheta[wch] <- sign(Dtheta[wch]) * 8
      }
      if (any(!is.finite(Dtheta))) {
        warning("Non-finite coefficients at iteration ", nit)
        break
      }
      theta <- thetaOld + Dtheta
      eta <- as.vector(X %*% theta) + offset
      mu <- linkinv(eta)
      eval(comp.V.resid)
      phi <- eval(phiEst)
      relE <- sqrt(sum(Dtheta^2)/max(1e-20, sum(thetaOld^2)))
      conv <- relE <= 1e-5
      if (conv)
        break
      thetaOld <- theta
    }
  else {
    conv <- TRUE
    nit <- 0
  }
  if (!conv)
    warning("Algorithm did not converge")
  eps <- 10 * .Machine$double.eps
  switch(family$family, binomial = {
    if (any(mu/weights > 1 - eps) || any(mu/weights < eps)) warning("fitted probabilities numerically 0 or 1 occurred")
  }, poisson = {
    if (any(mu < eps)) warning("fitted rates numerically 0 occurred")
  })
  eval(comp.V.resid)
  eval(comp.scaling)

  if (ncoef) {
    eval(comp.Epsi.init)
    alpha <- colMeans(eval(Epsi) * w.x * sni/sV * dmu.deta * X)
    DiagA <- eval(Epsi2)/(ni * sV^2) * w.x^2 * (ni * dmu.deta)^2
    matQ <- crossprod(X, DiagA * X)/nobs - tcrossprod(alpha, alpha)
    DiagB <- eval(EpsiS)/(sni * sV) * w.x * (ni * dmu.deta)^2
    if (any(n0 <- ni == 0))
      DiagB[n0] <- 0
    matM <- crossprod(X, DiagB * X)/nobs
    matMinv <- pseudoinverse(matM)
    asCov <- matMinv %*% matQ %*% matMinv/nobs
  }
  else {
    matM <- matQ <- asCov <- matrix(NA_real_, 0, 0)
  }
  if (any(ina)) {
    ok <- !ina
    theta.na[ok] <- theta
    theta <- theta.na
  }
  w.r <- pmin(1, k/abs(residPS))
  deviance.residuals <- devres(y, mu, w.r)
  deviance <- sum(deviance.residuals^2)
  if (family$family == "quasibinomial" || family$family == "quasipoisson"){
    phi <- sum((w.r * residPS^2)[w.r > 0])/(nobs-ncoef)
    asCov <- asCov * phi
   }
  names(mu) <- names(eta) <- names(residPS)
  std.err <- sqrt(diag(asCov))
  names(theta) <- names(std.err) <- rownames(asCov) <- colnames(asCov) <- names
  structure(list(coefficients = theta, std.err = std.err,
                 residuals = residP, fitted = mu, prior.weights = weights,
                 weights = w.r, x.weights = w.x, ni = ni, dispersion = phi, vcov = asCov,
                 matM = matM, matQ = matQ, k = k, family = family,
                 linear.predictor = eta, deviance = deviance, iter = nit, df.residual=nrow(X)-ncol(X),
                 y = y, converged = conv), class=c("robglm"))
}


#' Misclassification Robust Loss Function for Binomial Glmnet Models
#'
#' @description Sometimes even the best data manager or data management software goofs,
#' and response labels get flipped. When using a binomial elastic net model to classify
#' outcomes this can result in a poor choice of \eqn{\lambda} and lead to over-shrinkage,
#' insufficient shrinkage, or selecting the wrong subset of variables. This function applies
#' the misclassification loss function to each step in the coefficient path of a glmnet fit,
#' and calculates revised coefficients. Deviances and deviance residuals are calculated for
#' each set of revised coefficients.
#'
#' @param x a data matrix
#' @param y a binary outcome
#' @param alpha the mixing parameter for combining the L1 and L2 penalties
#' @param lambda supply a vector of specific penalty values if desired. this needs to be longer
#' than a single value, however! defaults to NULL.
#' @param nlambda how many lambda values glmnet should generate on its own if lambda is NULL.
#' @param gamma prior expectation of mislabel probability. defaults to 0.05.
#' @param tol tolerance for convergence. defaults to 1e-4.
#'
#' @return a list containing coefficients, deviance residuals, deviances, weights, the
#' vector of lambda values, and the optimal lambda (the value which minimizes the deviance).
#' @export
#'
#' @references
#' Copas, J. B. (1988). Binary Regression Models for Contaminated Data. Journal of the Royal Statistical Society: Series B (Methodological), 50(2), 225–253. doi:10.1111/j.2517-6161.1988.tb01723.x \cr \cr
#' Hung, H., Jou, Z.-Y., & Huang, S.-Y. (2017). Robust mislabel logistic regression without modeling mislabel probabilities. Biometrics, 74(1), 145–154. doi:10.1111/biom.12726  \cr \cr
#'
glmnet.cme <- function (x, y, alpha = 0.5, lambda = NULL, nlambda = 15, gamma = 0.05, tol = 1e-4){

  if (!is.null(lambda)) {lambda <- sort(lambda, decreasing = TRUE)}
  if (!is.null(lambda) && length(lambda) == 1) {return(cat(crayon::red("Must supply a vector of multiple lambda values!")))}
  x <- as.matrix(x)
  if (all(x[,1]==1)) x<-x[,-1]
  the.call <- match.call()
  family <- binomial()
  family.name <- "binomial"
  if (is.factor(y)) y <- as.numeric(y != levels(y)[1]) else y <- as.vector(y)
  lim <- rep(4, ncol(x))
  if (is.null(lambda)){
    initial <- glmnet:::glmnet(x=x,y=y,family="binomial",alpha=alpha,nlambda=nlambda,intercept = T,
                               type.logistic="modified.Newton",lower.limit=-1*lim,upper.limits=lim)
  }
  else{
    initial <- glmnet:::glmnet(x=x,y=y,family="binomial",alpha=alpha,lambda=lambda,intercept = T,
                               type.logistic="modified.Newton",lower.limit=-1*lim,upper.limits=lim)
  }

  lambda.grid <- initial$lambda
  beta.grid <- as.matrix(rbind("(Intercept)" = initial$a0, initial$beta))
  beta.grid[which(abs(beta.grid) < 1e-4)] <- 0
  n <- dim(x)[1]
  p <- dim(x)[2] + 1

  misfunc <- function(intercept, coef, x, y, gamma, maxit, tol, lambda, alpha){
    itcpt <- intercept
    initbetas <- coef
    keeps <- which(abs(initbetas) > 0)
    mc.beta <- .misclassFit(x = cbind(1, x[,keeps]), y = y, gamma = gamma, maxit = maxit, tol = tol, beta1 = c(itcpt, initbetas[keeps]), lambda = lambda, alpha = alpha)
    beta <- rep(0, length(initbetas))
    beta[keeps] <- mc.beta$coefficients[-1]
    beta <- c(mc.beta$coefficients[1], beta)
    eta <- drop(cbind(1,x) %*% beta)
    mu <- binomial()$linkinv(eta)
    #mw <- 1-hat(x[,keeps], intercept = F); if (all(mw==0)) mw <- rep(1, length(mw)) else mw <- sqrt(mw)
    w <- .misclassw(eta, gamma)
    dev.res <- binomial()$dev.res(y, mu, w)
    dev <- sum(dev.res^2)
    return(list(beta = beta, w = w, dev = dev, dev.res = dev.res))
  }

  grid <- sapply(1:ncol(beta.grid), function(i) misfunc(beta.grid[1,i], beta.grid[-1,i], x, y, gamma, 50, tol, lambda = lambda.grid[i], alpha))
  coefficients <- do.call(cbind, grid[1, ])
  rownames(coefficients) <- rownames(beta.grid)
  dev <- unlist(grid[3, ])
  w <- do.call(cbind, grid[2, ])
  deviance.residuals <- do.call(cbind, grid[4, ])
  lambda.opt <- lambda.grid[which.min(dev)]
  lambda <- lambda.grid
  return(list(coefficients = coefficients, deviance = dev, deviance.residuals = deviance.residuals,
              lambda.opt = lambda.opt, lambda = lambda, weights = w))
}


logistic.cme <- function (x, y, gamma = 0.0175, maxit = 500, tol = 1e-5){
  x <- as.matrix(x)
  if (all(x[,1]==1)) x<-x[,-1]
  the.call <- match.call()
  family <- binomial()
  family.name <- "binomial"
  if (is.factor(y)) y <- as.numeric(y != levels(y)[1]) else y <- as.vector(y)
  lim <- rep(4, ncol(x))
  initial <- cvreg:::.biasreducedglm(x = cbind(1,x), y = y,family = quasibinomial())$coefficients

  n <- dim(x)[1]
  p <- dim(x)[2] + 1
  itcpt <- initial[1]
  initbetas <- initial[-1]
  keeps <- which(abs(initbetas) >= 1e-4)
  mc.beta <- .misclassFit(x = cbind(1, x[,keeps]), y = y, gamma = gamma, maxit = maxit, tol = tol, beta1 = c(itcpt, initbetas[keeps]), lambda = 0, alpha = 0)

  beta <- rep(0, length(initbetas))
  beta[keeps] <- mc.beta$coefficients[-1]
  beta <- c(mc.beta$coefficients[1], beta)
  eta <- drop(cbind(1,x) %*% beta)
  mu <- binomial()$linkinv(eta)
  w <- .misclassw(eta, gamma)
  w.glmnet.fit <- cvreg:::.biasreducedglm(x=cbind(1,x),y=y,family=quasibinomial(),weights=w)

  w.glmnet.fit$call <- the.call
  w.glmnet.fit$prior.weights <- NULL
  tmp1 <- tmp2 <- matrix(0, p, p)
  tmp3 <- cbind(1,x) %*% w.glmnet.fit$coef
  for (i in 1:n) {
    tmp <- cbind(1,x)[i, ] %*% t(cbind(1,x)[i, ])
    tmp <- tmp * .misclassf(tmp3[i])
    tmp1 <- tmp1 + tmp * w[i]
    tmp2 <- tmp2 + tmp * w[i] * w[i]
  }
  tmp1 <- tmp1/n
  tmp2 <- tmp2/n
  cov <- cvreg:::.ridgeinv(tmp1) %*% tmp2 %*% cvreg:::.ridgeinv(tmp1)
  cov <- cov/n
  xn <- dimnames(x)[[2]]
  w.glmnet.fit$cov <- cov; rownames(cov) <- colnames(cov) <- colnames(c("(Intercept)",x))
  std.err <- sqrt(diag(w.glmnet.fit$cov))
  fitted <- binomial()$linkinv(drop(cbind(1,x)%*%beta))
  linear.predictor <- drop(cbind(1,x)%*%beta)

  dev.res <- binomial()$dev.res(y, fitted, w)
  w.glmnet.fit$coef <- names(std.err) <- names(beta) <- names(initial) <- c("(Intercept)", colnames(x))

   structure(
    list(coefficients = beta, std.err=std.err,fitted = fitted, linear.predictor = linear.predictor, deviance.residuals = dev.res, weights = w, converged = mc.beta$converged, iter = mc.beta$iter, vcov = w.glmnet.fit$cov)
    ,class="robglm"
  )
}


###### General Utilities #####

.Gmn <- function (t, nu) {
  snu <- sqrt(nu);snut <- snu + t;r <- numeric(length(snut))
  ok <- snut > 0;r[ok] <- {nu <- nu[ok];snu <- snu[ok];snut <- snut[ok]; exp((nu - 1)/2 * log(nu) - lgamma(nu) - snu * snut + nu * log(snut))
  }
  r
}

.wgtFlex <- function(d, c, h = 2){
  wtfun <- function(d, c, h){
    h2 <- h/2;ch2p <- c+h2;ch2n <- c-h2;u = abs(d)
    if (u >= ch2p){w <- 0;return(w)};if (u <= ch2n){w <- 1;return(w)}
    w = (u - ch2n) / h;w = 1 - (w^2);w = w^2;return(w)
  }
  if (length(d) > 1){sapply(d, function(i) wtfun(i, c, h))}else{wtfun(d, c, h)}
}


#' print method for robglm objects
#'
#' @param fit the model fit
#' @param conf.level the confidence level for the interval estimation. defaults to 0.95.
#' @param digits the number of significant digits to display. defaults to 3.
#' @method print robglm
#' @export
#' @keywords internal
#'
print.robglm <- function(fit, conf.level = 0.95, digits = 3){
  if (!is.null(fit$confidence.intervals)){
    varnames <- names(fit$coefficients)
    m <- cbind.data.frame(coefficients = format(round(fit$coefficients,digits),nsmall=digits),
                          std.err = format(round(fit$std.err,digits),nsmall=digits),
                          format(round(fit$confidence.intervals,digits),nsmall=digits),
                          p.value = fit$p.values)
    rownames(m) <- varnames
    print(noquote(m), right = T)
  }
  else{
    m <- as.matrix(fit$coefficients)
    m <- cbind(m, fit$std.err,
               lower.ci = m[,1] - (abs(qnorm((1 - conf.level)/2))*fit$std.err),
               upper.ci = m[,1] + (abs(qnorm((1 - conf.level)/2))*fit$std.err))
    sig <- sapply(1:nrow(m), function(i) ifelse(sign(m[i,3]) == sign(m[i,4]), "*" , " "))
    rownames(m) <- names(fit$coefficients)
    m <- format(round(m, digits), nsmall = digits);m <- as.data.frame(m); m<-cbind.data.frame(m, sig)
    colnames(m) <- c("estimate", "std.err", paste0("lower ", 100*conf.level, "% CI" ), paste0("upper ", 100*conf.level, "% CI"), "sig.")
    print(noquote(m), right=T )
  }
}

.maked2mu.deta <- function(family="gaussian", link="identity") {

  if (family=="gaussian"  ){
    if (link=="identity") function (eta) rep(1,length(eta))
    else cat("link function not recognized for ", family, " only identity is available!\n")}
  else if (family=="binomial" || family=="quasibinomial") {
    if (link=="logit") function (eta) pmax(exp(eta)/(1+exp(eta))^2, .Machine$double.eps) * (1 - 2 * ifelse(is.infinite(exp(eta)), 1, plogis(eta)))
    else if (link=="probit") function (eta) pmax(dnorm(eta), .Machine$double.eps) * -eta
    else if (link=="cloglog") function (eta) (1 - exp(eta)) * pmax(exp(eta) * exp(-exp(eta)), .Machine$double.eps)
    else if (link=="cauchit") function (eta) -2 * pi * eta * pmax(dcauchy(eta)^2, .Machine$double.eps)
    else cat("link function not recognized for ", family, " only logit, probit, cauchit, robit, and cloglog are available!\n")
  }
  else if (family=="poisson" || family == "quasipoisson") {
    if (link=="log") function (eta) pmax(exp(eta), .Machine$double.eps)
    else if (link=="sqrt") function (eta) rep(2, length(eta))
    else cat("link function not recognized for ", family, " only log is available!\n" )
  }
  else if (family=="Gamma") {
    if (link=="inverse") function (eta) 2/(eta^3)
    else if (link=="identity") function (eta) rep(1,length(eta))
    else if (link=="log") function (eta) pmax(exp(eta), .Machine$double.eps)
    else cat("link function not recognized for ", family, " only inverse is available!\n" )
  }
}



.maked1afun <- function(family="gaussian") {
  if (family=="gaussian"){
    function (zeta)  -1/zeta
  }
  else if (family=="binomial" || family=="quasibinomial")  {
    function (zeta) 0
  }
  else if (family=="poisson" || family == "quasipoisson") {
    function (zeta) 0
  }
  else if (family=="Gamma") {
    function (zeta) -2 * psigamma(-zeta, 0) + 2 * log(-zeta) + 2
  }
}

.maked2afun <- function(family="gaussian") {
  if (family=="gaussian"){
    function (zeta)  1/zeta^2
  }
  else if (family=="binomial" || family=="quasibinomial") {
    function (zeta) 0
  }
  else if (family=="poisson" || family == "quasipoisson") {
    function (zeta) 0
  }
  else if (family=="Gamma") {
    function (zeta)  2 * psigamma(-zeta, 1) + 2/zeta
  }
}


.maked3afun <- function(family="gaussian") {
  if (family=="gaussian"){
    function (zeta)  -2/zeta^3
  }
  else if (family=="binomial" || family=="quasibinomial") {
    function (zeta) 0
  }
  else if (family=="poisson" || family == "quasipoisson") {
    function (zeta) 0
  }
  else if (family=="Gamma") {
    function (zeta)  -2 * psigamma(-zeta, 2) - 2/zeta^2
  }
}

.maked1variance <- function(family="gaussian") {
  if (family=="gaussian"){
    function (mu)  rep(0, length(mu))
  }
  else if (family=="binomial" || family=="quasibinomial") {
    function (mu) 1 - 2 * mu
  }
  else if (family=="poisson" || family == "quasipoisson") {
    function (mu) rep(1, length(mu))
  }
  else if (family=="Gamma") {
    function (mu)  2 * mu
  }
}


.MqleFastBoot <- function (formula, data, family, k, cw, xwts, weights = NULL, start = NULL, mustart = NULL, etastart = NULL){

  X <- model.matrix(formula, data)
  y <- model.response(model.frame(formula, data))
  intercept = TRUE
  if (!is.matrix(X))
    X <- as.matrix(X)
  names <- colnames(X)
  nobs <- NROW(y)
  stopifnot(nobs == nrow(X))
  if (is.null(weights)) {
    weights <- rep.int(1, nobs)
  }
  else if (any(weights < 0)){
    stop("All weights must be positive")
  }
  offset <- NULL
  if (is.null(offset))
    offset <- rep.int(0, nobs)
  else if (!all(offset == 0))
    warning("'offset' not fully implemented")
  variance <- family$variance
  linkinv <- family$linkinv
  if (!is.function(variance) || !is.function(linkinv))
    stop("illegal 'family' argument")
  mu.eta <- family$mu.eta
  if (is.null(valideta <- family$valideta))
    valideta <- function(eta) TRUE
  if (is.null(validmu <- family$validmu))
    validmu <- function(mu) TRUE
  ncoef <- ncol(X)
  if (xwts == "welsch"){
    h <- hat(X, intercept = intercept)
    w.x <- (1 - h) / sqrt(h)
  }
  else if (xwts == "mallows"){
    h <- hat(X, intercept = intercept)
    w.x <- sqrt(1 - h)
  }
  else if (xwts == "schweppe"){
    h <- hat(X, intercept = intercept)
    w.x <- 1 - h
    r <- cvreg:::.biasreducedglm(x = X, y = y, weights = weights, offset = offset, family = family, intercept = intercept)$deviance.residuals
    r <- (r-hdmedian(r)) / (1.4826*hdmedian(abs(r-hdmedian(r))))
    w.x <- sqrt(w.x * MASS::psi.huber(r))
  }
  else if (xwts == "carroll"){
    rdx <- cvreg::cov.bacon(X)$dist
    rdx <- sqrt(rdx/ncoef)
    w.x <- (1 - (rdx/cw)^2)^3 * (abs(rdx) <= cw)
  }
  else if (xwts == "none"){
    w.x <- rep(1, nrow(X))
  }
  stopifnot(1000 >= 1, (k <- k) >= 0)
  #eval(family$initialize)
  ni <- as.vector(weights)
  ncoef <- length(start)
  k <- k
  thetaOld <- theta <- as.vector(start)
  eta <- as.vector(X %*% theta)
  mu <- linkinv(eta)
  if (!(validmu(mu) && valideta(eta)))
    stop("Cannot find valid starting values: You need help")
  switch(family$family, binomial = {
    Epsi.init <- expression({
      pK <- pbinom(K, ni, mu);pH <- pbinom(H, ni, mu);pKm1 <- pbinom(K - 1, pmax.int(0, ni - 1), mu)
      pHm1 <- pbinom(H - 1, pmax.int(0, ni - 1), mu);pKm2 <- pbinom(K - 2, pmax.int(0, ni - 2), mu)
      pHm2 <- pbinom(H - 2, pmax.int(0, ni - 2), mu); QlV <- mu/Vmu * (mu * ni * (pK - pH) + (1 - 2 * mu * ni) * ifelse(ni == 1, (H <= 0) * (K >= 1), pKm1 - pHm1) + (ni - 1) * mu * ifelse(ni == 2, (H <= 1) * (K >= 2), pKm2 - pHm2))
    })
    Epsi <-  expression({k*(1-pK-pH)+ifelse(ni==1,(-(H<0)+(K>=1))*sV, (pKm1-pHm1-pK+pH)*mu*sni/sV)})
    EpsiS <- expression({mu/Vmu * (k * (pH - ifelse(ni == 1, H >= 1, pHm1)) + k *(pK - ifelse(ni == 1, K > 0, pKm1))) + ifelse(ni == 0, 0, QlV/(sni * sV))})
    Epsi2 <- expression({k^2 * (pH + 1 - pK) + QlV})
    phiEst <- phiEst.cl <- expression({1})
  }, poisson = {
    Epsi.init <- expression({dpH <- dpois(H, mu);dpH1 <- dpois(H - 1, mu);dpK <- dpois(K, mu);dpK1 <- dpois(K - 1, mu)
    pHm1 <- ppois(H - 1, mu);pH <- pHm1 + dpH; pKm1 <- ppois(K - 1, mu);pK <- pKm1 + dpK
    E2f <- mu * (dpH1 - dpH - dpK1 + dpK) + pKm1 - pHm1
    })
    Epsi <- expression({k * (1 - pK - pH) + mu * (dpH - dpK)/sV})
    EpsiS <- expression({k * (dpH + dpK) + E2f/sV})
    Epsi2 <- expression({k^2 * (pH + 1 - pK) + E2f})
    phiEst <- phiEst.cl <- expression({1})
  },
  quasibinomial = {
    Epsi.init <- expression({
      pK <- pbinom(K, ni, mu);pH <- pbinom(H, ni, mu);pKm1 <- pbinom(K - 1, pmax.int(0, ni - 1), mu)
      pHm1 <- pbinom(H - 1, pmax.int(0, ni - 1), mu);pKm2 <- pbinom(K - 2, pmax.int(0, ni - 2), mu)
      pHm2 <- pbinom(H - 2, pmax.int(0, ni - 2), mu); QlV <- mu/Vmu * (mu * ni * (pK - pH) + (1 - 2 * mu * ni) * ifelse(ni == 1, (H <= 0) * (K >= 1), pKm1 - pHm1) + (ni - 1) * mu * ifelse(ni == 2, (H <= 1) * (K >= 2), pKm2 - pHm2))
    })
    Epsi <-  expression({k*(1-pK-pH)+ifelse(ni==1,(-(H<0)+(K>=1))*sV, (pKm1-pHm1-pK+pH)*mu*sni/sV)})
    EpsiS <- expression({mu/Vmu * (k * (pH - ifelse(ni == 1, H >= 1, pHm1)) + k *(pK - ifelse(ni == 1, K > 0, pKm1))) + ifelse(ni == 0, 0, QlV/(sni * sV))})
    Epsi2 <- expression({k^2 * (pH + 1 - pK) + QlV})
    phiEst <- expression({1})
    phiEst.cl <- expression({cvreg:::.biasreducedglm(x = X, y = y, weights = weights, offset = offset, family = family, intercept = intercept)$dispersion})
  },
  quasipoisson = {
    Epsi.init <- expression({dpH <- dpois(H, mu);dpH1 <- dpois(H - 1, mu);dpK <- dpois(K, mu);dpK1 <- dpois(K - 1, mu)
    pHm1 <- ppois(H - 1, mu);pH <- pHm1 + dpH; pKm1 <- ppois(K - 1, mu);pK <- pKm1 + dpK
    E2f <- mu * (dpH1 - dpH - dpK1 + dpK) + pKm1 - pHm1
    })
    Epsi <- expression({k * (1 - pK - pH) + mu * (dpH - dpK)/sV})
    EpsiS <- expression({k * (dpH + dpK) + E2f/sV})
    Epsi2 <- expression({k^2 * (pH + 1 - pK) + E2f})
    phiEst <- expression({1})
    phiEst.cl <- expression({cvreg:::.biasreducedglm(x = X, y = y, weights = weights, offset = offset, family = family, intercept = intercept)$dispersion})
  }, gaussian = {
    Epsi.init <- expression({dc <- dnorm(k); pc <- pnorm(k)})
    Epsi <- expression(0)
    EpsiS <- expression(2 * pc - 1)
    Epsi2 <- expression(2 * k^2 * (1 - pc) - 2 * k * dc + 2 * pc - 1)
    phiEst.cl <- expression({sum(((y - mu)/mu)^2)/(nobs - ncoef)})
    phiEst <- expression(sum(((y - mu)/mu)^2)/(nobs - ncoef))
  }, Gamma = {
    Epsi.init <- expression({
      nu <- 1/phi;snu <- 1/sqrt(phi);pPtc <- pgamma(snu + c(-k, k), shape = nu, rate = snu)
      pMtc <- pPtc[1];pPtc <- pPtc[2];aux2 <- k * snu;GLk <- .Gmn(-k, nu);GUk <- .Gmn(k, nu)
    })
    Epsi <- expression(k * (1 - pPtc - pMtc) + GLk - GUk)
    EpsiS <- expression(((GLk - GUk) + snu * (pPtc - pMtc))/mu)
    Epsi2 <- expression({(k^2 * (pMtc + 1 - pPtc) + (pPtc - pMtc) + (GLk * (1 - aux2) - GUk * (1 + aux2))/snu)})
    phiEst.cl <- expression({sum(((y - mu)/mu)^2)/(nobs - ncoef)})
    phiEst <- expression({sum(((y - mu)/mu)^2)/(nobs - ncoef)})
  }, stop(gettextf("family '%s' not yet implemented", family$family),
          domain = NA))
  sV <- NULL
  comp.V.resid <- expression({
    Vmu <- variance(mu)
    if (any(is.na(Vmu))) stop("NAs in V(mu)")
    if (any(Vmu == 0)) stop("0s in V(mu)")
    sVF <- sqrt(Vmu)
    residP <- (y - mu) * sni/sVF
  })
  comp.scaling <- expression({
    sV <- sVF * sqrt(phi)
    residPS <- residP/sqrt(phi)
  })
  comp.Epsi.init <- expression({
    dmu.deta <- mu.eta(eta)
    if (any(is.na(dmu.deta))) stop("NAs in d(mu)/d(eta)")
    H <- floor(mu * ni - k * sni * sV)
    K <- floor(mu * ni + k * sni * sV)
    eval(Epsi.init)
  })

  sni <- sqrt(ni)
  eval(comp.V.resid)
  phi <- eval(phiEst.cl)
  Rphi <- c(1e-12, 3 * hdmedian(abs(residP)))^2
  conv <- FALSE
  if (ncoef)
    for (nit in 1:150) {
      eval(comp.scaling)
      eval(comp.Epsi.init)
      cpsi <- pmax.int(-k, pmin.int(residPS, k)) - eval(Epsi)
      EEq <- colMeans(cpsi * w.x * sni/sV * dmu.deta * X)
      DiagB <- eval(EpsiS)/(sni * sV) * w.x * (ni * dmu.deta)^2
      if (any(n0 <- ni == 0))
        DiagB[n0] <- 0
      Dtheta <- cvreg:::.ridgeinv(crossprod(X, DiagB * X)/nobs) %*% EEq
      if ((family$family=="binomial" || family$family =="quasibinomial") && (any(abs(Dtheta) > 8)) && all(is.finite(Dtheta))){
        wch <- which(abs(Dtheta) > 8); Dtheta[wch] <- sign(Dtheta[wch]) * 8
      }
      if (any(!is.finite(Dtheta))) {
        warning("Non-finite coefficients at iteration ", nit)
        break
      }
      theta <- thetaOld + Dtheta
      eta <- as.vector(X %*% theta) + offset
      mu <- linkinv(eta)
      eval(comp.V.resid)
      phi <- eval(phiEst)
      relE <- sqrt(sum(Dtheta^2)/max(1e-10, sum(thetaOld^2)))
      conv <- relE <= 1e-4
      if (conv)
        break
      thetaOld <- theta
    }
  else {
    conv <- TRUE
    nit <- 0
  }
  switch(family$family, binomial = {
    if (any(mu/weights > 1 - 0) || any(mu/weights < 0)) warning("fitted probabilities numerically 0 or 1 occurred")
  }, poisson = {
    if (any(mu < 0)) warning("fitted rates less than 0 occurred")
  })
  theta <- as.vector(theta)
  names(theta) <- names
  return(theta)
}


.misclassF <- function(u) {1 / (1 + exp(-u))}
.misclassf <- function(u) {exp(-u) / ((1 +exp(-u))^2)}
.misclassG <- function(u, theta) {.misclassF(u) + theta * (1.0 - 2.0 * .misclassF(u))}
.misclassg <- function(u, theta) {.misclassf(u) - 2.0 * theta * .misclassf(u)}
.misclassw <- function(u, theta) {((1.0 - 2.0 * theta) * .misclassF(u) * (1.0 - .misclassF(u))) / (.misclassG(u, theta) * (1.0 - .misclassG(u, theta)))}

.misclassFit <- function (x, y, gamma, maxit, tol, beta1, lambda, alpha) {
  n <- dim(x)[1]
  p <- dim(x)[2]
  v <- 10 * tol
  j <- 0
  L2 <- lambda*(1-alpha)
  pen <- 1 / (1+L2)
  while ((sum(abs(v)) > tol) && (j < maxit)) {
    j <- j + 1
    beta0 <- beta1
    a <- matrix(0, p, p)
    v <- rep(0, p)
    for (i in 1:n) {
      tmp <- drop(x[i, ] %*% beta0)
      tmp2 <- .misclassw(tmp, gamma) * .misclassg(tmp, gamma)
      tmp3 <- (tmp2 * x[i, ]) %*% t(x[i, ])
      a <- a + tmp3
      tmp4 <- .misclassw(tmp, gamma) * x[i, ] * (y[i] - .misclassG(tmp, gamma))
      v <- v + tmp4
    }
    a <- -a
    beta1 <- as.vector(beta0 - solve(a) %*% v) * pen
  }
  fit <- list(coefficients = as.vector(beta1), iter = j, converged = sum(abs(v)) < tol)
  fit
}
abnormally-distributed/cvreg documentation built on May 3, 2020, 3:45 p.m.