R/covariance.R

Defines functions robcor cov.bacon cov.ahuber .OGKcustomvrob .OGKmakecovfun wt.shr .scfun .mufun .minNotZero cov.mrgv cov.rmb cov.wogk cov.ogk cov.wcomed cov.comed plot.covRobust print.covRobust .eigenshrink .estTau .ESD .getLambdaJ .nlshrink .EstQ cov.nlshrink covShrink

Documented in cov.ahuber cov.bacon cov.comed cov.mrgv cov.ogk cov.rmb covShrink cov.wcomed cov.wogk plot.covRobust print.covRobust robcor

#' Shrinkage Estimation of the Covariance Matrix
#'
#' @description This implements the covariance estimator of Schaefer & Strimmer (2005) which in turn builds
#' upon Ledoit & Wolf's (2004) paper. Shrinkage targets A, B, and D of Schaefer & Strimmer's (2005)
#' paper are offered here, details of which are given below: \cr
#' \cr
#' Target A: Identity (diagonal, unit variance). Off-diagonal entries are shrunk towards zero, while
#' diagonal entries are shrunk towards 1. \cr \cr
#'
#' Target B: Pooled (diagonal, common-variance). Off diagonals are shrunk as in Target A, but diagonal
#' entries are shrunk towards a common value. In this package, the geometric mean is utilized. Schaefer & Strimmer
#' use the median, but the geometric mean is a good choice because it is preferable to the arithmetic mean
#' when data are > 0 (which variances are) and is robust to outlying observations while still accounting for them
#' in its estimate. This is the default shrinkage target. \cr \cr
#'
#' Target D: Unequal (diagonal, empirical variances). This works just as Target B, but the mixing parameter
#' for the diagonal is set to zero, such that the empirical variances are along the diagonal. If you have
#' reason to believe the variances of your covariates should be more similar than different, this is a recommended
#' choice. \cr \cr
#'
#' Targets C, E, and F are not offered due to the fact that they do not guarantee a positive-definite
#' covariance matrix.
#'
#' Also available here is an adaptive non-linear shrinkage procedure. The estimator was adapted from the nlshrink
#' package, but with a few minor adjustments to simplify the useage and speed up the estimation. Note that
#' the penalization method for this is entirely different and based on a non-linear optimization problem. For details,
#' see Ledoit & Wolf (2015).
#'
#' @param x a data frame or matrix of numeric covariates
#' @param alpha a custom value for the off-diagonal mixing parameter. if left as NULL the optimal value will automatically be selected.
#' @param alpha.var a custom value for the diagonal mixing parameter. if left as NULL the optimal value will automatically be selected.
#' @param target one of "unequal" (the default), "identity", "pooled", or "adaptive".
#' @param ... other arguments
#'
#' @return a covariance matrix
#' @export
#'
#' @examples
#'
#' @references
#' Schaefer, J. ; K. Strimmer (2005) A shrinkage approach to large-scale covariance estimation and implications for functional genomics. Statist. Appl. Genet. Mol. Biol. 4:32. \cr
#' \cr
#' Ledoit, O. ; M. Wolf (2004). Honey, I shrunk the sample covariance matrix. J. Portfolio Management, 30(4), 110-119, doi: 10.3905/jpm.2004.110 \cr
#' \cr
#' Ledoit, O. ; Wolf, M. (2015). Spectrum estimation: a unified framework for covariance matrix estimation and PCA in large dimensions. Journal of Multivariate Analysis, 139(2)
#' \cr
#'
#'
covShrink = function(x, w = NULL, alpha = NULL, alpha.var = NULL, target = c("unequal", "identity", "pooled", "adaptive"), ...){

  target <- match.arg(target)
  x <- as.matrix(x)
  gmean = function(x, na.rm=TRUE){
    exp(sum(log(x[x > 0]), na.rm=na.rm) / length(x))
  }
  if (target != "adaptive"){
    if (is.null(w)){w = rep(1/nrow(x), nrow(x))}else{w = w/sum(w)}
    if (is.null(alpha)) alpha = .estimateAlpha(x, w)
    if (is.null(alpha.var)) alpha.var = .estimateAlphaVar(x, w, target)
    rho <- cov2cor(t(x)%*%(diag(w)%*%x))
  }
  if (target == "identity"){
    rho.shrink <- ((1-alpha)*rho) + (alpha*diag(rep(1,ncol(x))))
    gv = gmean(colVars(x))
    vars = sapply(colVars(x), function(i)  ((1-alpha.var)*i)+(alpha.var))
    Sigma = cor2cov(rho.shrink, vars)
    attr(Sigma, "alpha")<-alpha;attr(Sigma,"alpha.var")<-alpha.var
  }
  else if (target == "unequal"){
    rho.shrink = ((1-alpha)*rho) + (alpha*diag(rep(1,ncol(x))))
    vars = colVars(x)
    Sigma = cor2cov(rho.shrink, vars)
    attr(Sigma, "alpha")<-alpha;attr(Sigma,"alpha.var")<-alpha.var
  }
  else if (target == "pooled"){
    rho.shrink <- ((1-alpha)*rho) + (alpha*diag(rep(1,ncol(x))))
    gv = gmean(colVars(x))
    vars = sapply(colVars(x), function(i) ((1-alpha.var)*i)+(alpha.var*gv))
    Sigma = cor2cov(rho.shrink, vars)
    attr(Sigma, "alpha")<-alpha;attr(Sigma,"alpha.var")<-alpha.var
  }
  else if (target == "adaptive"){
    if (!is.null(w)){
      x <- diag((w * nrow(x))^0.5) %*% x
      Sigma = cov.nlshrink(x, ...)
    }
    else{
      Sigma = cov.nlshrink(x, ...)
    }
    attr(Sigma, "alpha") <- NA;attr(Sigma, "alpha.var") <- NA
  }
  return(Sigma)
}


cov.nlshrink <- function(X, k=0) {
  n <- nrow(X); p <- ncol(X)
  if (k == 0){X <- X - tcrossprod(rep(1,n), colMeans(X));k = 1}
  if (n > k){effn <- n - k}else{k <- n - 1;effn <- 1}
  Sigma <- crossprod(X)/effn;eig<-eigen(Sigma);Gamma<-eig$vectors;lambda<-as.vector(eig$values)
  lambdasort<-sort(lambda);lambdaord<-order(lambda);tau_est<-.estTau(X = X, k = k)
  nlshrink_tau <- .nlshrink(.EstQ(tau_est, effn))
  mleshrink <- Gamma %*% tcrossprod(diag(nlshrink_tau[lambdaord]), Gamma)
  return(mleshrink)
}


.estimateAlpha <- function (x, w = NULL){
  n = nrow(x)
  p = ncol(x)
  if (p == 1)
    return(1)
  if (is.null(w)){w = rep(1/nrow(x), nrow(x))}else{w = w/sum(w)}
  xs = Scale(x, w, center = TRUE, scale = TRUE)
  w2 = sum(w * w)
  h1w2 = w2/(1 - w2)
  sw = sqrt(w)
  xsw = sweep(xs, MARGIN = 1, STATS = sw, FUN = "*")
  xswsvd = svd(xsw)
  sE2R = sum(xsw * (tcrossprod(sweep(xswsvd$u, 2, xswsvd$d^3, "*"), xswsvd$v))) - sum(colSums(xsw^2)^2)
  remove(xsw, xswsvd)
  xs2w = sweep(xs^2, MARGIN = 1, STATS = sw, FUN = "*")
  sER2 = 2 * sum(xs2w[, (p - 1):1] * t(apply(xs2w[, p:2, drop = FALSE], 1, cumsum)))
  remove(xs2w)
  denominator = sE2R
  numerator = sER2 - sE2R
  if (denominator == 0)
    alpha = 1
  else alpha = max(0, min(1, numerator/denominator * h1w2))
  return(alpha)
}

.estimateAlphaVar = function (x, w = NULL, target = c("unequal", "identity", "pooled")) {

  target <- match.arg(target)
  n = nrow(x)
  p = ncol(x)
  if (is.null(w)){w = rep(1/nrow(x), nrow(x))}else{w = w/sum(w)}
  w2 = sum(w^2)
  h1 = 1/(1 - w2)
  h1w2 = w2/(1 - w2)
  xc = scale(x, center = TRUE, scale = FALSE)
  v = h1 * (colSums(w * xc^2))
  gmean = function(x, na.rm=TRUE){
    exp(sum(log(x[x > 0]), na.rm=na.rm) / length(x))
  }
  if (target == "pooled"){
    target = gmean(v)
  }
  else if (target == "identity"){
    target = 1
  }
  else if (target == "unequal"){
    return(0)
  }

  Z = xc^2
  q1 = colSums(sweep(Z, MARGIN = 1, STATS = w, FUN = "*"))
  q2 = colSums(sweep(Z^2, MARGIN = 1, STATS = w, FUN = "*")) - q1^2
  numerator = sum(q2)
  denominator = sum((q1 - target/h1)^2)
  if (denominator == 0) alpha.var = 1
  else alpha.var = max(0, min(1, numerator/denominator * h1w2))
  return(alpha.var)
}





.EstQ <- function(tau, n){
  # first check inputs
  if (as.integer(n) <= 0) stop("n must be positive integer")

  # these parameters can be changed if needed
  left_tol <- 1e-4
  tol <- 1e-4

  if (is.unsorted(tau)) tau_ <- sort(tau) else tau_ <- tau
  tau_[tau_ < 0 & abs(tau_) < left_tol] = 0
  tau_[tau_ > 0 & abs(tau_) < tol] = 0
  if (any (tau_ < 0)) stop("all eigenvalues must be non-negative")
  if (all(tau_ == 0)) stop("at least one eigenvalue must be non-zero")

  # set up environment
  Q <- new.env()
  Q$n <- as.integer(n)
  if (Q$n != n) print ("Note: n coerced to integer")
  Q$tau <- tau_
  Q$p <- length(Q$tau)
  Q$c <- Q$p / Q$n

  tau_nonzero <- Q$tau[Q$tau > 0]
  Q$t <- unique(tau_nonzero)
  Q$K <- length(Q$t)
  Q$pw <- vapply(Q$t, function(x) length(which(tau_nonzero == x)), integer(1))
  Q$pzw <- Q$p - sum(Q$pw)
  Q$pwt2 <- Q$pw * Q$t^2

  sup_fn <- function(Q){
    x_fn <- function(k){(Q$t[k]*Q$t[k+1])^(2/3) * ( (Q$pw[k]*Q$t[k+1])^(1/3) + (Q$pw[k+1]*Q$t[k])^(1/3) ) / ( Q$pwt2[k]^(1/3) + Q$pwt2[k+1]^(1/3) )}
    p_theta_fn <- function(u, k) Q$pwt2[k]/(Q$t[k] - u)^2 + Q$pwt2[k+1]/(Q$t[k+1] - u)^2
    p_phi_L_fn <- function(u, k) if (k == 1) 0 else {sum(Q$pwt2[1:(k-1)]/(Q$t[1:(k-1)] - u)^2)}
    p_phi_R_fn <- function(u, k) if (k == (Q$K-1)) 0 else {sum(Q$pwt2[(k+2):Q$K]/(Q$t[(k+2):Q$K] - u)^2 )}

    spec_sep <- function() {
      if (Q$K == 1) return (NULL)
      vapply(1:(Q$K-1), function(k)
        p_theta_fn(x_fn(k),k) + p_phi_L_fn(Q$t[k+1], k) + p_phi_R_fn(Q$t[k], k)  < Q$n, logical(1) )
    }

    p_phi_diff_fn <- function(u) 2*sum(Q$pwt2/(Q$t - u)^3)
    p_phi_fn <- function(u) sum(Q$pwt2/(Q$t - u)^2)

    spec_sep2 <- function(){
      if (Q$K == 1) return (NULL)
      spec_sep_out <- spec_sep();out <- c()
      for (k in which(spec_sep_out)){
        x_k <- x_fn(k);p_phi_diff_x_k <- p_phi_diff_fn(x_k)
        if (p_phi_diff_x_k == 0) out <- c(out, setNames(x_k,k) )
        else if (p_phi_diff_x_k > 0) {
          x_root <- optimize(f = function(x) (p_phi_diff_fn(x))^2, interval=c(Q$t[k], x_k), tol=1e-4)$minimum
          if (p_phi_fn(x_root) < Q$n) out <- c(out, setNames(x_root,k))
        }
        else {
          x_root <- optimize(f = function(x) (p_phi_diff_fn(x))^2, interval=c(x_k, Q$t[k+1]), tol=1e-4)$minimum
          if(p_phi_fn(x_root) < Q$n) out <- c(out, setNames(x_root,k))
        }
      }
      return ( out )
    }

    spec_boundint <- function(){
      spec_sep_out <- spec_sep2()
      if (is.null(spec_sep_out)) return (NULL)
      K_subset <- as.integer(names(spec_sep_out))
      out <- do.call(rbind, lapply(K_subset, function(k){
        x_mid <- spec_sep_out[as.character(k)]
        spec_k_lb <- optimize(f = function(x) (p_phi_fn(x) - Q$n)^2, interval=c(Q$t[k],x_mid), tol=1e-4)$minimum
        spec_k_ub <- optimize(f = function(x) (p_phi_fn(x) - Q$n)^2, interval=c(x_mid,Q$t[k+1]), tol=1e-4)$minimum
        pw1 <- Q$pzw + sum(Q$pw[1:k]);pw2 <- Q$p - pw1
        return(c(spec_k_lb, spec_k_ub, pw1, pw2))
      }))
      rownames(out) <- K_subset
      return (out)
    }

    boundint <- spec_boundint()
    spec_supp <- function(){
      t1 <- min(Q$t)
      x_lb1 <- t1 - sqrt(1/Q$n * sum(Q$pwt2) ) - 1
      x_t1 <- optimize(f = function(x) (p_phi_fn(x) - Q$n)^2, interval=c(x_lb1, t1), tol=1e-4)$minimum
      tK <- max(Q$t)
      x_ubK <- tK + sqrt(1/Q$n * sum(Q$pwt2) ) + 1
      x_tK <- optimize(f = function(x) (p_phi_fn(x) - Q$n)^2, interval=c(tK, x_ubK), tol=1e-4)$minimum

      if (is.null(boundint)) {
        supp_intervals <- matrix(c(x_t1,x_tK), ncol=2)
        omega <- Q$p
      }
      else {
        supp_intervals <- matrix(c(x_t1, t(boundint[,1:2]), x_tK), byrow=TRUE, ncol=2)
        omega <- vapply(1:nrow(supp_intervals), function(i)
          sum(Q$pw[Q$t > supp_intervals[i,1] & Q$t < supp_intervals[i,2]] ), integer(1) )
      }
      rownames(supp_intervals) <- omega
      return(supp_intervals)
    }
    Q$u_Fbar <- spec_supp()
    Q$numint <- nrow(Q$u_Fbar)
    m_LF_u_Fbar_fn <- function(u) 1/Q$p * sum(Q$pw*Q$t/(Q$t - u) )
    Q$m_LF_u_Fbar <- apply(Q$u_Fbar, c(1,2), m_LF_u_Fbar_fn)
    endpoint_fn <- function(u, m_LF_u) max(0, u - Q$c*u*m_LF_u)
    Q$endpoints <- matrix(mapply(endpoint_fn, Q$u_Fbar, Q$m_LF_u_Fbar), nrow=Q$numint)
    Q$endpoints[1,1] <- ifelse(Q$p==Q$n, 0, Q$endpoints[1,1] )
    numeig_fn <- function(){
      if (Q$numint == 1) out <- Q$p - max(Q$pzw, Q$p - Q$n)
      else {
        out <- rep(0, Q$numint)
        out[1] <- boundint[1,3] - max(Q$pzw, Q$p - Q$n)
        if (Q$numint > 2) out[2:(Q$numint-1)] <- diff(boundint[1:(Q$numint - 1),3])
        out[Q$numint] <- boundint[(Q$numint-1),4]
      }
      return (as.integer(out) )
    }

    Q$numeig <- numeig_fn()
  }

  grid_fn <- function(Q){
    minnxi = 100
    nxi0 = max(minnxi,min(sum(Q$pw),Q$n))
    Q$mult = ceiling(nxi0 / min(sum(Q$pw),Q$n) )
    Q$nxi = Q$mult*min(sum(Q$pw),Q$n)
    Q$xi_list <- lapply(1:Q$numint, function(i) {
      lb <- Q$u_Fbar[i,1]; ub <- Q$u_Fbar[i,2]
      temp <- Q$numeig[i]*Q$mult
      return ( lb + (ub-lb)*(sin( pi/(2*(temp + 1)) * (1:temp) ) )^2  )
    })
  }

  sol_fn <- function(Q){
    Gamma_fn <- function(y, xi)
      sum(Q$tau^2/( (Q$tau - xi)^2 + y^2 ) ) - Q$n
    Q$zxi_list <- lapply(1:length(Q$xi_list), function(i) {
      vapply(1:(length(Q$xi_list[[i]]) ), function(j) {
        xi <- Q$xi_list[[i]][j]
        delta <- min( (Q$t - xi)^2 )
        y_ub <- sqrt(max(1/Q$n * sum(Q$pwt2) - delta, 0) ) + 1
        root <- optimize(f = function(y) (Gamma_fn(y, xi))^2, interval=c(0,y_ub), tol=1e-4)$minimum
        return (complex(real = xi, imaginary = root))}, complex(1))
    })
  }

  den_fn <- function(Q){
    Q$a = 4
    m_LF_fn <- Vectorize ( function(z) 1/Q$p * sum(Q$tau/(Q$tau - z) ), vectorize.args = "z" )
    Q$m_LF_zxi_list <- lapply(1:length(Q$zxi_list), function(i) m_LF_fn(Q$zxi_list[[i]]) )
    Q$x_list <- lapply(1:length(Q$zxi_list), function(i) Re(Q$zxi_list[[i]] * (1 - Q$c*Q$m_LF_zxi_list[[i]]) ) )
    Q$f_list <- lapply(1:length(Q$zxi_list), function(i) 1/(pi*Q$c) * Im(Q$zxi_list[[i]]) / (abs(Q$zxi_list[[i]])^2) )
    Q$zeta_list <- lapply(1:length(Q$x_list), function(i) Q$x_list[[i]]^(1/Q$a) )
    Q$g_list <- lapply(1:length(Q$zeta_list), function(i) Q$a*Q$zeta_list[[i]]^(Q$a-1) * Q$f_list[[i]])
  }

  dis_fn <- function(Q){
    Q$dis_x_list <- lapply(1:Q$numint, function(i) c(Q$endpoints[i,1], Q$x_list[[i]], Q$endpoints[i,2]))
    Q$dis_zeta_list <- lapply(1:Q$numint, function(i) c(Q$endpoints[i,1]^(1/Q$a), Q$zeta_list[[i]], Q$endpoints[i,2]^(1/Q$a) ) )
    Q$dis_m_LF_list <- lapply(1:Q$numint, function(i) c(Q$m_LF_u_Fbar[i,1], Q$m_LF_zxi_list[[i]], Q$m_LF_u_Fbar[i,2]))
    Q$dis_g_list <- lapply(1:Q$numint, function(i) c(0, Q$g_list[[i]], 0) )
    Q$F0 <- 1/Q$p * max(Q$pzw, (Q$p - Q$n) )
    Q$Fstart <- Q$F0 + c(0, cumsum(Q$numeig[-Q$numint]) ) / Q$p
    Q$Fend <- Q$F0 + cumsum(Q$numeig) / Q$p
    Q$intlength <- sapply(1:Q$numint, function(i) length(Q$dis_x_list[[i]]))
    Q$dis_G_raw <- lapply(1:Q$numint, function(i)
      c(0, cumsum(diff(Q$dis_zeta_list[[i]]) * (Q$dis_g_list[[i]][-1] + Q$dis_g_list[[i]][-Q$intlength[i]]) * 0.5 ) ) )
    Q$dis_G_list <- lapply(1:Q$numint, function(i) Q$Fstart[i] +(Q$Fend[i] - Q$Fstart[i]) * Q$dis_G_raw[[i]] / Q$dis_G_raw[[i]][Q$intlength[i]] )
  }

  lambda_fn <- function(Q){
    Q$F <- lapply(1:Q$numint, function(i) unique(Q$dis_G_list[[i]]))
    Q$F_idx <- lapply(1:Q$numint, function(i) which(!duplicated(Q$dis_G_list[[i]] ) ) )
    Q$nidx <- sapply(1:Q$numint, function(i) length(Q$F_idx[[i]]))
    Q$x_F <- lapply(1:Q$numint, function(i) (Q$dis_zeta_list[[i]][Q$F_idx[[i]]])^(Q$a) )
    Q$x_F_mean <- lapply(1:Q$numint, function(i) 0.5*(Q$x_F[[i]][-1] + Q$x_F[[i]][-length(Q$x_F[[i]])] ) )
    Q$x_F_diff <- lapply(1:Q$numint, function(i) diff(Q$x_F[[i]]))
    Q$F_diff <- lapply(1:Q$numint, function(i) diff(Q$F[[i]]))
    Q$nquant <- Q$numeig + 1
    Q$quant <- lapply(1:Q$numint, function(i) seq(Q$F[[i]][1], Q$F[[i]][Q$nidx[i]],length.out = Q$nquant[i]))
    Q$bins <- lapply(1:Q$numint, function(i) findInterval(Q$quant[[i]], Q$F[[i]], rightmost.closed=TRUE))
    if (any(sapply(1:length(Q$bins), function(i) c(Q$bins[[i]][1], Q$bins[[i]][Q$nquant[i]] ) ) != rbind(rep(1,Q$numint), Q$nidx-1) ) ) stop("Unexpected bin allocation")
    Q$integral_indic <- lapply(1:Q$numint, function(i)1*( tcrossprod(rep(1,Q$nquant[[i]]), Q$F_idx[[i]][-1]) <= tcrossprod(Q$bins[[i]], rep(1, (Q$nidx[i]-1) ) ) ) )
    Q$lambda <- rep(0, Q$p)
    X_integral <- lapply(1:Q$numint, function(i) Q$F_diff[[i]] * Q$x_F_mean[[i]] )
    integral_j_kappa <- lapply(1:Q$numint, function(i) (Q$quant[[i]] - Q$F[[i]][Q$bins[[i]]])* ( Q$x_F[[i]][Q$bins[[i]]] +
                                                                                                   0.5* (Q$quant[[i]] - Q$F[[i]][Q$bins[[i]]]) * Q$x_F_diff[[i]][Q$bins[[i]]] / Q$F_diff[[i]][Q$bins[[i]]]) )
    X_kappa_integral <- lapply(1:Q$numint, function(i)
      rowSums ( tcrossprod(rep(1,Q$nquant[[i]]), X_integral[[i]]) * Q$integral_indic[[i]] ) + integral_j_kappa[[i]] )
    for (i in 1:Q$numint)
      Q$lambda[round(Q$F[[i]][1] * Q$p + 1):round(Q$F[[i]][Q$nidx[i]] * Q$p)] <- diff(X_kappa_integral[[i]]) * Q$p
  }

  sup_fn(Q)
  grid_fn(Q)
  sol_fn(Q)
  den_fn(Q)
  dis_fn(Q)
  lambda_fn(Q)
  return(Q)
}


.nlshrink <- function(Q) {
  delta <- rep(0,Q$p)
  if ((Q$p - Q$n) > Q$pzw) {
    lb <- min(Q$tau) - 1/ Q$n * sum(Q$tau) - 1
    ub <- 1/Q$p * (Q$p - Q$pw[1])*Q$t[1]
    optim_fn <- function(u) (sum(Q$pw*Q$t / (Q$t - u)) - Q$n)^2
    u_Fbar0 <- optimize(f = optim_fn, interval = c(lb,ub))$minimum
    null_adj <- u_Fbar0 / (1 - Q$c)
    delta[1:(Q$p - Q$n)] <- null_adj
  }

  lim0 <- ifelse(Q$p == Q$n & all(Q$tau > 0), Q$n / sum(Q$pw / Q$t), 0)

  out <- lapply(1:Q$numint, function(i) {
    y <- rep(0, Q$nidx[i])
    Dr <- abs(1 - Q$c*Q$dis_m_LF_list[[i]][Q$F_idx[[i]]])^2
    if (Q$p == Q$n) {
      j = (Q$x_F[[i]] == 0 & Dr == 0)
      y[j] <- lim0
      y[!j] <- Q$x_F[[i]][!j] / Dr[!j]
    } else {
      y <- Q$x_F[[i]] / Dr
    }
    F_temp <- Q$F[[i]]
    b <- Q$bins[[i]]
    F_temp_b <- F_temp[b]
    integral_ydF <- diff(F_temp) * 0.5 * (y[-1] + y [-Q$nidx[i]])
    integral_ydF2 <- (Q$quant[[i]] - F_temp_b) * (y[b] + 0.5 * (Q$quant[[i]] - F_temp_b) * (y[b + 1] - y[b]) / (F_temp[b+1] - F_temp_b) )
    integral_ydF3 <- rowSums(tcrossprod(rep(1,Q$nquant[i]), integral_ydF) * Q$integral_indic[[i]]) + integral_ydF2
    return (diff(integral_ydF3) * Q$p)
  })
  for (i in 1:Q$numint)
    delta[round(Q$F[[i]][1]*Q$p + 1):round(Q$F[[i]][Q$nidx[i]] * Q$p)] <- out[[i]]
  if (Q$p == Q$n)
    delta[delta < lim0] <- lim0

  return (delta)
}

.getLambdaJ <- function(Q){
  rep1p <- rep(1,Q$p)
  sup_J_fn <- function(Q){
    u_vec <- c(t(Q$u_Fbar))
    Dr <- apply(Q$u_Fbar, c(1,2), function(u) sum(Q$tau^2 / (Q$tau-u)^3 ) )
    temp <- (array(1,dim=dim(Q$u_Fbar)) %o% Q$tau) - (Q$u_Fbar %o% rep1p)
    Q$sup_J <- (Q$u_Fbar %o% Q$tau) / (temp^3 * (Dr %o% rep1p) )
    Q$endpoints_J <- 1/Q$n * (Q$u_Fbar^2 %o% rep(1,Q$p)) / temp^2
    if (Q$p == Q$n) Q$endpoints_J[1,1,] <- 0
  }
  sup_J_fn(Q)

  Q$xi_J_list <- lapply(1:Q$numint, function(i) {
    length_temp <- length(Q$xi_list[[i]])
    temp <- (sin( pi/(2*(length_temp + 1)) * (1:length_temp) ) )^2
    return( tcrossprod((1 - temp), Q$sup_J[i,1,]) + tcrossprod(temp, Q$sup_J[i,2,]) )
  })

  den_J_fn <- function(Q){
    TAU_list <- lapply(1:Q$numint, function(i) tcrossprod(rep(1,length(Q$xi_list[[i]])), Q$tau) )
    TAU2_list <- lapply(1:Q$numint, function(i) tcrossprod(rep(1,length(Q$xi_list[[i]])), Q$tau^2) )
    ZXI_list <- lapply(1:Q$numint, function(i) tcrossprod(Q$zxi_list[[i]], rep1p ) )
    XI_list <- lapply(1:Q$numint, function(i) tcrossprod(Re(Q$zxi_list[[i]]), rep1p ) )
    YXI_list <- lapply(1:Q$numint, function(i) tcrossprod(Im(Q$zxi_list[[i]]), rep1p ) )
    YXI2_list <- lapply(1:Q$numint, function(i) tcrossprod(Im(Q$zxi_list[[i]])^2, rep1p ) )
    TAU_XI_list <- lapply(1:Q$numint, function(i) TAU_list[[i]] - XI_list[[i]])

    #Jacobian of yxi := imag(zxi)
    Q$yxi_J_list <- lapply(1:Q$numint, function(k) {
      NORM <- TAU_XI_list[[k]]^2 + YXI2_list[[k]]; NORM2 <- NORM*NORM
      Nr <- rowSums(TAU2_list[[k]] * TAU_XI_list[[k]] / NORM2)
      Dr <- rowSums(TAU2_list[[k]] * YXI_list[[k]] / NORM2)
      return ( (TAU_list[[k]] / NORM - TAU2_list[[k]] * TAU_XI_list[[k]] / NORM2 + tcrossprod(Nr, rep1p) * Q$xi_J_list[[k]])  / tcrossprod(Dr, rep1p) )
    })

    Q$zxi_J_list <- lapply(1:Q$numint, function(k) Q$xi_J_list[[k]] + 1i*Q$yxi_J_list[[k]])

    Q$f_J_list <- lapply(1:Q$numint, function(k)
      1/(pi*Q$c) * Im(Q$zxi_J_list[[k]] * tcrossprod(1/(Q$zxi_list[[k]]*Q$zxi_list[[k]]), rep1p) ) )

    Q$m_LF_J_list <- lapply(1:Q$numint, function(k) {
      TAU_ZXI2 <- (TAU_list[[k]] - ZXI_list[[k]])^2
      1/Q$p * (-ZXI_list[[k]] / TAU_ZXI2 + Q$zxi_J_list[[k]] * tcrossprod(rowSums(TAU_list[[k]] / TAU_ZXI2 ),rep1p) )
    })

    Q$x_J_list <- lapply(1:Q$numint, function(k)
      Re ( Q$zxi_J_list[[k]] * (1 - Q$c*tcrossprod(Q$m_LF_zxi_list[[k]], rep1p) ) - Q$c * ZXI_list[[k]] * Q$m_LF_J_list[[k]] ) )

    Q$zeta_J_list <- lapply(1:Q$numint, function(k)
      1/Q$a * Q$x_J_list[[k]] * tcrossprod(Q$x_list[[k]]^(1/Q$a - 1), rep1p) )

    Q$g_J_list <- lapply(1:Q$numint, function(k)
      Q$a * ( (Q$a - 1) * Q$zeta_J_list[[k]] * tcrossprod(Q$f_list[[k]]*Q$zeta_list[[k]]^(Q$a - 2), rep1p) +
                tcrossprod(Q$zeta_list[[k]]^(Q$a - 1), rep1p) * Q$f_J_list[[k]] ) )
  }

  den_J_fn(Q)

  dis_J_fn <- function(Q){
    Q$dis_g_J_list <- lapply(1:Q$numint, function(k) rbind(rep(0,Q$p), Q$g_J_list[[k]], rep(0,Q$p)))
    Q$dis_zeta_J_list <- lapply(1:Q$numint, function(k)
      rbind(1/Q$a * Q$endpoints[k,1]^(1/Q$a - 1) * Q$endpoints_J[k,1,],
            Q$zeta_J_list[[k]],
            1/Q$a * Q$endpoints[k,2]^(1/Q$a - 1) * Q$endpoints_J[k,2,]) )
    if (Q$p == Q$n) Q$dis_zeta_J_list[[1]][1,] <- 0

    dis_zeta_J_diff <- lapply(1:Q$numint, function(k) Q$dis_zeta_J_list[[k]][-1,] - Q$dis_zeta_J_list[[k]][-Q$intlength[k],])
    dis_zeta_diff <- lapply(1:Q$numint, function(k) diff(Q$dis_zeta_list[[k]]) )
    dis_g_mean <- lapply(1:Q$numint, function(k) 0.5*(Q$dis_g_list[[k]][-1] + Q$dis_g_list[[k]][-Q$intlength[k]]) )
    dis_g_J_mean <- lapply(1:Q$numint, function(k) 0.5*(Q$dis_g_J_list[[k]][-1,] + Q$dis_g_J_list[[k]][-Q$intlength[k],] ) )

    G_J_raw_list <- lapply(1:Q$numint, function(k) rbind(rep(0,Q$p),
                                                         apply(dis_zeta_J_diff[[k]] * tcrossprod(dis_g_mean[[k]], rep1p), 2, cumsum) +
                                                           apply(tcrossprod(dis_zeta_diff[[k]], rep1p) * dis_g_J_mean[[k]], 2, cumsum) ) )

    Fends_diff <- Q$Fend - Q$Fstart
    Q$dis_G_J <- lapply(1:Q$numint, function(k)
      Fends_diff[k] / Q$dis_G_raw[[k]][Q$intlength[k]] * (G_J_raw_list[[k]] -
                                                            tcrossprod(Q$dis_G_raw[[k]], rep1p) * tcrossprod(rep(1,Q$intlength[k]), G_J_raw_list[[k]][Q$intlength[k],]) ) )
  }

  dis_J_fn(Q)

  lambda_J_fn <- function(Q){
    Q$lambda_J <- matrix(0, nrow=Q$p, ncol = Q$p)
    if ((Q$p - Q$n) <= Q$pzw & Q$pzw > 0)
      Q$lambda_J[max(1,Q$p - Q$n + 1):Q$pzw, 1:Q$pzw] <- 1 - Q$c
    Q$F_J <- lapply(1:Q$numint, function(k) Q$dis_G_J[[k]][Q$F_idx[[k]],])
    Q$x_J_F <- lapply(1:Q$numint, function(k)
      tcrossprod((Q$a*Q$dis_zeta_list[[k]][Q$F_idx[[k]]]^(Q$a-1)), rep1p) * Q$dis_zeta_J_list[[k]][Q$F_idx[[k]],] )

    F_J_diff <- lapply(1:Q$numint, function(k) Q$F_J[[k]][-1,] - Q$F_J[[k]][-Q$nidx[k],])
    x_J_F_mean <- lapply(1:Q$numint, function(k) 0.5*(Q$x_J_F[[k]][-1,] + Q$x_J_F[[k]][-Q$nidx[k],]) )
    x_J_F_diff <- lapply(1:Q$numint, function(k) Q$x_J_F[[k]][-1,] - Q$x_J_F[[k]][-Q$nidx[k],])
    X_J_integral <- lapply(1:Q$numint, function(k)
      F_J_diff[[k]] * tcrossprod(Q$x_F_mean[[k]], rep1p) + tcrossprod(Q$F_diff[[k]], rep1p) * x_J_F_mean[[k]] )

    integral_j_kappa <- lapply(1:Q$numint, function(k) {
      q <- Q$quant[[k]]
      b <- Q$bins[[k]]
      tcrossprod(q,rep1p) * Q$x_J_F[[k]][b,] - Q$x_J_F[[k]][b,] * tcrossprod(Q$F[[k]][b], rep1p) -
        Q$F_J[[k]][b,] * tcrossprod(Q$x_F[[k]][b], rep1p) -
        Q$F_J[[k]][b,] * tcrossprod(((q - Q$F[[k]][b])*Q$x_F_diff[[k]][b] / Q$F_diff[[k]][b]), rep1p) +
        x_J_F_diff[[k]][b,] * tcrossprod( (0.5 * (q - Q$F[[k]][b])^2 / Q$F_diff[[k]][b]), rep1p) -
        F_J_diff[[k]][b,] * tcrossprod( (0.5 * (q - Q$F[[k]][b])^2 * Q$x_F_diff[[k]][b] / Q$F_diff[[k]][b]^2), rep1p) })

    X_J_kappa_integral <- lapply(1:Q$numint, function(i) Q$integral_indic[[i]] %*% X_J_integral[[i]] + integral_j_kappa[[i]])
    for (i in 1:Q$numint)
      Q$lambda_J[round(Q$F[[i]][1] * Q$p + 1):round(Q$F[[i]][Q$nidx[i]] * Q$p), ] <- apply(X_J_kappa_integral[[i]], 2, diff) * Q$p
  }

  lambda_J_fn(Q)
  return (Q$lambda_J)
}

.ESD <- function(tau, n) {
  Q <- .EstQ(tau,n)
  return( setNames(Reduce(c, Q$F), Reduce(c,Q$dis_x_list)) )
}

.estTau <- function(X, k = 0, control = list(rel.tol = 1e-4)){
  rho <- new.env()
  rho$n <- nrow(X);rho$p <- ncol(X);rho$k <- k
  if (rho$k == 0) {rho$X <- X - tcrossprod(rep(1,rho$n), colMeans(X));rho$k <- 1}
  else rho$X <- X
  if (rho$n > rho$k){rho$effn <- rho$n - rho$k}
  else{rho$k<-rho$n-1;rho$effn<-1}
  rho$S <- crossprod(rho$X)/rho$effn
  rho$lambda <- sort(eigen(rho$S, only.values = TRUE)$values)
  rho$lambda[rho$lambda < 0] = 0
  if (rho$p > rho$effn){rho$lambda[1:(rho$p - rho$effn)] <- 0}
  rho$lambda_ls <- .eigenshrink(X = rho$X, k = rho$k)

  lambda_obj <- function(tau){
    if(is.unsorted(tau)) {tausort <- sort(tau);tauorder <- order(tau)}
    else{tausort <- tau;tauorder <- 1:rho$p}
    Q <- .EstQ(tausort,rho$effn)
    lambda_est <- Q$lambda
    lambda_est_order <- lambda_est[tauorder]
    return(mean((lambda_est_order - rho$lambda)^2))
  }

  lambda_gr <- function(tau){
    if(is.unsorted(tau)){tausort<-sort(tau);tauorder<-order(tau)}
    else{tausort<-tau;tauorder<-1:rho$p}
    Q <- .EstQ(tausort,rho$effn)
    lambda_est <- Q$lambda
    lambda_est_order <- lambda_est[tauorder]
    lambda_est_J <- .getLambdaJ(Q)
    lambda_est_J_order <- lambda_est_J[tauorder,]
    return (as.numeric( 2/Q$p * crossprod((lambda_est_J_order), (lambda_est_order - rho$lambda) ) ) )
  }
  optim_out <- nlminb(start=rho$lambda_ls,objective=lambda_obj,gradient=lambda_gr,lower=min(rho$lambda),upper=max(rho$lambda),control=control)
  return(optim_out$par)
}

.eigenshrink <- function(X, k = 0){
  n <- nrow(X);p <- ncol(X)
  if (k == 0){X <- X - tcrossprod(rep(1,n), colMeans(X));k = 1}
  if (n > k) {effn <- n-k} else stop("k must be strictly less than nrow(X)")
  S <- crossprod(X)/effn
  lambda <- sort(eigen(S,only.values = TRUE)$values)
  lambda[lambda < 0] <- 0
  if (p > effn) lambda [1:(p-effn)] <- 0
  Z <- X*X
  phi <- sum(crossprod(Z) / effn - 2*crossprod(X)* S / effn + S*S)
  gamma <- sum((S-mean(diag(S))*diag(p))^2)
  alpha<-max(0, min(1,phi/gamma/effn))
  gmean=function(x,na.rm=TRUE){exp(sum(log(x[x>0]),na.rm=na.rm)/length(x))}
  lambda_mean<-gmean(lambda)
  return(lambda_mean+sqrt(1-alpha)*(lambda-lambda_mean))
}


#' print method for covRobust objects
#'
#' @param fit the object
#' @param digits the number of significant digits to display. defaults to 5. changes to 3 if the number of variables is >= 10.
#' @method print covRobust
#' @export
#'
#'
print.covRobust <- function(fit, digits = 5){
  if (ncol(fit$cov)>=10) digits <-3
  center <- format(round(fit$center, digits), nsmall = digits)
  cov <- format(round(fit$cov, digits), nsmall = digits)
  brown <- crayon::make_style(rgb(0.766, 0.204, 0.237))
  green2 <- crayon::make_style(rgb(0.435, 0.653, 0.218))
  cat(brown("Covariance:\n"))
  print(noquote(cov), right=T)
  cat(green2("\nLocation:\n"))
  print(noquote(center), right=T)
  if (!is.null(fit$com)){
    brown2 <- crayon::make_style(rgb(0.8166, 0.104, 0.237))
    green3 <- crayon::make_style(rgb(0.502, 0.62, 0.278))
    cov2 <- format(round(fit$com, digits), nsmall = digits)
    center2 <- format(round(fit$medians, digits), nsmall = digits)
    cat(brown2("\nL1 Scatter:\n"))
    print(noquote(cov2), right=T)
    cat(green3("\nL1 Location:\n"))
    print(noquote(center2), right=T)
  }
}

#' plot method for covRobust objects
#'
#' @param fit the object
#' @method plot covRobust
#' @export
#'
#'
plot.covRobust <- function(fit){
  dist <- fit$dist
  index <- 1:length(dist)
  outs <- fit$outlier
  thresh <- min(dist[outs])
  plot(1, type = "n", xlab="Index",
       ylab= "Mahalanobis Distance",
       main="Outlier Plot",
       xlim=c(1,max(index)), ylim=c(max(0,min(dist)-2), max(dist))
  )
  points(index[-outs], dist[-outs], col="#0644ccCC", bg = "#486bb82F", pch = 21)
  abline(a = thresh, b = 0, col = "#a30000BF", lwd = 2)
  points(index[outs],  dist[outs], col="#f3054eCC", bg = "#f22c852F", pch = 21)
}


#' Co-Median Robust Covariance Matrix
#'
#' @description  The co-median matrix is an alternative to the covariance matrix. To understand
#' how this works, first consider the definition of the median absolute deviation, \eqn{MAD(x) = md(x-md(x))}.
#' The MAD is usually scaled by a factor of 1.4826 to make it usable as a consistent robust estimator of the
#' standard deviation. Also offered as an option here is to replace the standard estimate of the median with
#' the Harrell-Davis estimator of the median, which can improve accuracy in smaller sample sizes (Harrell & Davis, 1982).
#' \cr
#' \cr
#' The co-median is defined by \eqn{com(x,y) = med((x-med(x) * (y-med(y))))}, and the standardized form analagous
#' to the correlation coefficient, \eqn{ \delta = com(x,y)/(MAD(x) * MAD(y))}. Note that \eqn{\delta}
#' is not guaranteed to lie within the interval [-1, 1] like the correlation coefficient, however, but
#' typically only deviates from this interval for non-normally distributed random variables and is a
#' smooth function of the correlation coefficient (Falk, 1997; Falk, 1998). \cr
#' \cr
#' A disadvantage of the median absolute deviation is that it can collapse to zero when half of the values
#' in a vector are the same. When a column with MAD=0 is detected, the function returns an error message.
#' Another disadvantage of the co-median matrix is that it is not guaranteed to be positive-semidefinite
#' even when n > p. To get around this problem this function implements an iterative algorithm proposed by
#' Sajesh and Srinivasan (2012), described below.
#'
#' 1. Let \eqn{\delta}(X) be the co-median correlation matrix of X. Compute the eigenvalues and eigenvectors
#' of \eqn{\delta}(X), and let \strong{E} denote the eigenvectors, and \strong{\eqn{\Lambda}} the diagonal matrix of
#' eigenvalues.  \cr
#'
#' 2. Let \strong{Q} = \strong{D}\strong{E}, where \strong{D} is a diagonal matrix of MADs. Let
#' \strong{invQ} be the inverse of \strong{Q}. Scores are then obtained as \strong{Z} = \strong{XinvQ},
#' whose squared-MADs are stored in a diagonal matrix, \strong{\eqn{\Gamma}}. Furthermore, denote the
#' vector of column medians of \strong{Z} as \eqn{\gamma}. \cr
#'
#' 3. The resulting robust estimates for location and scatter are then respectively defined as
#' \strong{\eqn{\Omega}} = \strong{Q}\strong{\eqn{\Gamma}}\strong{Q'} and \eqn{mu} = \strong{Q}\eqn{\gamma}. \cr \cr
#'
#' 4. Optional Step: Reiterate the above steps one or two times, but substituting \strong{\eqn{\Omega}} for
#' \eqn{\delta} and \strong{\eqn{\Gamma}} for the sample MADs in \strong{D} in the re-iterated steps.
#'
#' @param x a data frame or matrix containing numeric variables
#' @param method one of "med", "hd", or "aad". "med" uses the typical median and MAD. "hd" uses the Harrell-Davis
#' estimate of the median in place of the median, and "aad" uses the average absolute deviation in lieu of the
#' median absolute deviation. if option "aad" is used the appropriate consistency constant, sqrt(pi/2), is used
#' instead of 1.4826. the only time "aad" is preferable is when there are columns in the data with a median absolute
#' deviation of zero.
#' @param iter number of refinement iterations
#' @param alpha the chi-squared quantile for declaring an outlier in the final reweighted estimate. must be > 0.50.
#' @return a covRobust object containing the following elements:
#' \itemize{
#'          \item \strong{center}: multivariate mean of cleaned data set after discarding outliers identified by the mahalanobis distances of the co-median matrix.
#'          \item \strong{cov}: covariance matrix of cleaned data set after discarding outliers identified by the mahalanobis distances of the co-median matrix.
#'          \item \strong{medians}: estimated multivariate median
#'          \item \strong{com}: estimated co-median matrix
#'          \item \strong{delta}: the initial raw comedian correlation matrix
#'          \item \strong{dist}: the mahalanobis distances based on the cleaned covariance matrix
#'          \item \strong{distL1}: the mahalanobis distances based on the co-median matrix
#'          \item \strong{outliers}: the indices of the outliers identified by the co-median matrix based mahalanobis distances; these are the points removed to obtain the cleaned covariance matrix.
#'          \item \strong{weights}: the weights for downweighting outliers. here they are binary, with 0 marking an outlier and 1 otherwise.
#'         }
#' @export
#' @examples
#' cov.comed(x)
#'
#' @references
#' Falk, M. (1997) On MAD and comedians. Annals of the Institute of Statistical Mathematics 49, 615-644. \cr
#' \cr
#' Falk, M. (1998). A Note on the Comedian for Elliptical Distributions. Journal of Multivariate Analysis, 67(2), 306-317. doi:10.1006/jmva.1998.1775 \cr
#' \cr
#' Harrell, F. E. & Davis, C. E. (1982). A new distribution-free quantile estimator. Biometrika, 69, 635–640 \cr
#' \cr
#' Sajesh, T. A., & Srinivasan, M. R. (2012). Outlier detection for high dimensional data using the Comedian approach. Journal of Statistical Computation and Simulation, 82(5), 745-757. doi:10.1080/00949655.2011.552504
#'
cov.comed <- function(x, method = c("med", "hd", "aad"), iter = 1){

  method <- match.arg(method)
  if(method=="med"){med<-T;hd<-F;aad<-F}
  else if(method=="hd"){med<-F;hd<-T;aad<-F}
  else if(method=="aad"){med<-F;hd<-F;aad<-T}

  coMedfun <- function (omega,z,x){
    p <- ncol(x);U<-wsvd(omega)$u;madx<-colMads(x);invmad<-1/madx
    Q <-madx*U;iQ<-invmad*U;z<-x%*%iQ;madz<-colMads(z);Omega=mpd(tcrossprod(Q*rep(madz,each=p)))
    return(list(Omega=Omega, z=z, Q=Q))
  }

  coMedHDfun <- function (omega,z,x){
    hdmad <- function(i) cvreg:::HDMAD(i) * 1.4826
    p <- ncol(x);U<-wsvd(omega)$u;madx<-apply(x,2,hdmad);invmad<-1/madx
    Q <-madx*U;iQ<-invmad*U;z<-x%*%iQ;madz<-apply(z,2,hdmad);Omega=tcrossprod(Q*rep(madz,each=p))
    return(list(Omega=Omega, z=z, Q=Q))
  }

  coAADfun <- function (omega,z,x){
    p <- ncol(x);U<-wsvd(omega)$u;madx<-apply(x,2,aad);invmad<-1/madx
    Q <-madx*U;iQ<-invmad*U;z<-x%*%iQ;madz<-apply(z,2,aad);Omega=tcrossprod(Q*rep(madz,each=p))
    return(list(Omega=Omega, z=z, Q=Q))
  }


  if (med){
    x<-as.matrix(x);n<-nrow(x);p<-ncol(x);colmads<-colMads(x)

    if (any(colmads==0)){return(cat("Warning: At least one variable has a MAD of 0. Try using method='aad' instead."))}

    ## Calculate Co-Median Matrix and Initial Covariance Estimate
    imad <- 1/colmads;delta<-cvreg:::coMedianMatrix(x);delta[which(is.na(as.vector(delta)))]<-0;
    omega<-imad*mpd(delta)*rep(imad,each=p);U<-wsvd(omega)$u;iQ<-imad*U;
    z<-x%*%iQ;Sigma<-coMedfun(omega,z,x);for (i in 1:iter){Sigma<-coMedfun(Sigma$Omega,Sigma$z,x)}
    medians<-drop(Sigma$Q %*% colMedians(Sigma$z)); Omega<-Sigma$Omega;

    ## Calculate Weights Based On Initial Covariance Estimate
    alpha <- min(0.975, 1-((0.24815 - 0.0021885*p)/sqrt(n)));
    wtfun = function(d,p){q<-1.4826*(qchisq(alpha,p)/qchisq(0.5,p));as.numeric(d<median(d)*q)}
    dist2<-mahalanobis_dist(x,medians,Omega);wt=wtfun(dist2,p);idx=which(wt==1);h<-sum(wt);

    ## Calculate New Covariance Matrix
    center<-matrixStats::colWeightedMeans(x, wt);
    newx<-sweep(x[idx,],2,center);
    Sigma<-mpd(crossprod(newx)/h);

    colnames(Omega)<-rownames(Omega)<-colnames(delta)<-rownames(delta)<-colnames(Sigma)<-rownames(Sigma)<-names(medians)<-colnames(x)
    return(structure(list(center = center, cov = Sigma, dist = dist2, outliers = seq(1,nrow(x))[which(wt!=1)], weights = wt,
                          com = Omega, medians = medians, delta = delta),
                     class = "covRobust"))
  }
  if (hd){
    x<-as.matrix(x);n<-nrow(x);p<-ncol(x);colmads<-apply(x,2,cvreg:::HDMAD)*1.4826

    if (any(colmads==0)){return(cat("Warning: At least one variable has a MAD of 0. Try using method='aad' instead."))}

    ## Calculate Co-Median Matrix and Initial Covariance Estimate
    imad <- 1/colmads;delta<-cvreg:::coHDMedianMatrix(x);delta[which(is.na(as.vector(delta)))]<-0;
    omega<-imad*mpd(delta)*rep(imad,each=p);U<-wsvd(omega)$u;iQ<-imad*U;
    z<-x%*%iQ;Sigma<-coMedHDfun(omega,z,x);for (i in 1:iter){Sigma<-coMedHDfun(Sigma$Omega,Sigma$z,x)}
    medians<-drop(Sigma$Q %*% apply(Sigma$z,2,hdmedian)); Omega<-Sigma$Omega;

    ## Calculate Weights Based On Initial Covariance Estimate
    alpha <- min(0.975, 1-((0.24815 - 0.0021885*p)/sqrt(n)));
    wtfun = function(d,p){q<-1.4826*(qchisq(alpha,p)/qchisq(0.5,p));as.numeric(d<hdmedian(d)*q)}
    dist2<-mahalanobis_dist(x,medians,Omega);wt=wtfun(dist2,p);idx=which(wt==1);h<-sum(wt);

    ## Calculate New Covariance Matrix
    center<-matrixStats::colWeightedMeans(x, wt);
    newx<-sweep(x[idx,],2,center);
    Sigma<-mpd(crossprod(newx)/h);

    colnames(Omega)<-rownames(Omega)<-colnames(delta)<-rownames(delta)<-colnames(Sigma)<-rownames(Sigma)<-names(medians)<-colnames(x)
    return(structure(list(center = center, cov = Sigma, dist = dist2, outliers = seq(1,nrow(x))[which(wt!=1)], weights = wt,
                          com = Omega, medians = medians, delta = delta),
                     class = "covRobust"))
  }

  if (aad){

    x<-as.matrix(x);n<-nrow(x);p<-ncol(x);colmads<-apply(x,2,aad)
    ## Calculate Co-Median Matrix and Initial Covariance Estimate
    imad <- 1/colmads;delta<-cvreg:::coAADMatrix(x);delta[which(is.na(as.vector(delta)))]<-0;
    omega<-imad*mpd(delta)*rep(imad,each=p);U<-wsvd(omega)$u;iQ<-imad*U;
    z<-x%*%iQ;Sigma<-coAADfun(omega,z,x);for (i in 1:iter){Sigma<-coAADfun(Sigma$Omega,Sigma$z,x)}
    medians<-drop(Sigma$Q %*% apply(Sigma$z,2,hdmedian)); Omega<-Sigma$Omega;

    ## Calculate Weights Based On Initial Covariance Estimate
    alpha <- min(0.975, 1-((0.24815 - 0.0021885*p)/sqrt(n)));
    wtfun = function(d,p){q<-sqrt(pi/2)*(qchisq(alpha,p)/qchisq(0.5,p));as.numeric(d<hdmedian(d)*q)}
    dist2<-mahalanobis_dist(x,medians,Omega);wt=wtfun(dist2,p);idx=which(wt==1);h<-sum(wt);

    ## Calculate New Covariance Matrix
    center<-matrixStats::colWeightedMeans(x, wt);
    newx<-sweep(x[idx,],2,center);
    Sigma<-mpd(crossprod(newx)/h);

    colnames(Omega)<-rownames(Omega)<-colnames(delta)<-rownames(delta)<-colnames(Sigma)<-rownames(Sigma)<-names(medians)<-colnames(x)
    return(structure(list(center = center, cov = Sigma, dist = dist2, outliers = seq(1,nrow(x))[which(wt!=1)], weights = wt,
                          com = Omega, medians = medians, delta = delta),
                     class = "covRobust"))
  }

}


#' Weighted Co-Median Robust Covariance Matrix
#'
#' @description See \code{\link{cov.comed}} for details on how the co-median matrix is calculated. The difference
#' in this function is that the final covariance matrix is not based on dropping identified outliers, but rather,
#' smoothly downweighting them.
#'
#' @param x a data frame or matrix containing numeric variables
#' @param method one of "med", "hd", or "aad". "med" uses the typical median and MAD. "hd" uses the Harrell-Davis
#' estimate of the median in place of the median, and "aad" uses the average absolute deviation in lieu of the
#' median absolute deviation. if option "aad" is used the appropriate consistency constant, sqrt(pi/2), is used
#' instead of 1.4826. the only time "aad" is preferable is when there are columns in the data with a median absolute
#' deviation of zero.
#' @param iter number of refinement iterations
#' @return a covRobust object containing the following elements:
#' \itemize{
#'          \item \strong{center}: multivariate mean of cleaned data set after discarding outliers identified by the mahalanobis distances of the co-median matrix.
#'          \item \strong{cov}: covariance matrix of cleaned data set after discarding outliers identified by the mahalanobis distances of the co-median matrix.
#'          \item \strong{medians}: estimated columnwise medians
#'          \item \strong{com}: estimated co-median matrix
#'          \item \strong{delta}: the initial raw comedian correlation matrix
#'          \item \strong{dist}: the mahalanobis distances based on the cleaned covariance matrix
#'          \item \strong{distL1}: the mahalanobis distances based on the co-median matrix
#'          \item \strong{outliers}: the indices of the outliers identified by the co-median matrix based mahalanobis distances; these are the points removed to obtain the cleaned covariance matrix.
#'          \item \strong{weights}: the weights for downweighting outliers.
#'         }
#' @export
#' @examples
#' cov.wcomed(x)
#'
#' @references
#' Falk, M. (1997) On MAD and comedians. Annals of the Institute of Statistical Mathematics 49, 615-644. \cr
#' \cr
#' Falk, M. (1998). A Note on the Comedian for Elliptical Distributions. Journal of Multivariate Analysis, 67(2), 306-317. doi:10.1006/jmva.1998.1775 \cr
#' \cr
#' Harrell, F. E. & Davis, C. E. (1982). A new distribution-free quantile estimator. Biometrika, 69, 635–640 \cr
#' \cr
#' Sajesh, T. A., & Srinivasan, M. R. (2012). Outlier detection for high dimensional data using the Comedian approach. Journal of Statistical Computation and Simulation, 82(5), 745-757. doi:10.1080/00949655.2011.552504
#'
cov.wcomed <- function(x, hd = F, method = c("med", "hd", "aad"), iter = 2, k = 1.5){

    method <- match.arg(method)
    if(method=="med"){med<-T;hd<-F;aad<-F}
    else if(method=="hd"){med<-F;hd<-T;aad<-F}
    else if(method=="aad"){med<-F;hd<-F;aad<-T}

  coMedfun <- function (omega,z,x){
    p <- ncol(x);U<-wsvd(omega)$u;madx<-colMads(x);invmad<-1/madx
    Q <-madx*U;iQ<-invmad*U;z<-x%*%iQ;madz<-colMads(z);Omega=mpd(tcrossprod(Q*rep(madz,each=p)))
    return(list(Omega=Omega, z=z, Q=Q))
  }

  coMedHDfun <- function (omega,z,x){
    hdmad <- function(i) cvreg:::HDMAD(i) * 1.4826
    p <- ncol(x);U<-wsvd(omega)$u;madx<-apply(x,2,hdmad);invmad<-1/madx
    Q <-madx*U;iQ<-invmad*U;z<-x%*%iQ;madz<-apply(z,2,hdmad);Omega=tcrossprod(Q*rep(madz,each=p))
    return(list(Omega=Omega, z=z, Q=Q))
  }

  coAADfun <- function (omega,z,x){
    p <- ncol(x);U<-wsvd(omega)$u;madx<-apply(x,2,aad);invmad<-1/madx
    Q <-madx*U;iQ<-invmad*U;z<-x%*%iQ;madz<-apply(z,2,aad);Omega=tcrossprod(Q*rep(madz,each=p))
    return(list(Omega=Omega, z=z, Q=Q))
  }

  if (med){
    x<-as.matrix(x);n<-nrow(x);p<-ncol(x);colmads<-colMads(x)

    if (any(colmads==0)){return(cat("Warning: At least one variable has a MAD of 0. Try using method='aad' instead."))}

    ## Calculate Co-Median Matrix and Initial Covariance Estimate
    imad <- 1/colmads;delta<-cvreg:::coMedianMatrix(x);delta[which(is.na(as.vector(delta)))]<-0;
    omega<-imad*mpd(delta)*rep(imad,each=p);U<-wsvd(omega)$u;iQ<-imad*U;
    z<-x%*%iQ;Sigma<-coMedfun(omega,z,x);for (i in 1:iter){Sigma<-coMedfun(Sigma$Omega,Sigma$z,x)}
    medians<-drop(Sigma$Q %*% colMedians(Sigma$z)); Omega<-Sigma$Omega;

    ## Calculate Weights Based On Initial Covariance Estimate
    alpha <- min(0.975, 1-((0.24815 - 0.0021885*p)/sqrt(n)));
    cv <- 1.4826*(qchisq(alpha,p)/qchisq(0.5,p));
    dist2<-mahalanobis_dist(x,medians,Omega);
    wt = cvreg:::mvSmoothWt(dist2/median(dist2), cv, k); h<-sum(wt)
    outliers <- seq(1,nrow(x),by=1)[which(as.numeric(dist2>median(dist2)*cv)==1)]

    ## Calculate New Covariance Matrix
    wcov <- cov.wt(x, wt, method = "ML")
    center <- wcov$center
    Sigma <- wcov$cov

    colnames(Omega)<-rownames(Omega)<-colnames(delta)<-rownames(delta)<-colnames(Sigma)<-rownames(Sigma)<-names(medians)<-colnames(x)
    return(structure(list(center = center, cov = Sigma, dist = dist2, outliers = outliers, weights = wt,
                          com = Omega, medians = medians, delta = delta),
                     class = "covRobust"))
  }
  if (hd){
    x<-as.matrix(x);n<-nrow(x);p<-ncol(x);colmads<-apply(x,2,cvreg:::HDMAD)*1.4826

    if (any(colmads==0)){return(cat("Warning: At least one variable has a MAD of 0. Try using method='aad' instead."))}

    ## Calculate Co-Median Matrix and Initial Covariance Estimate
    imad <- 1/colmads;delta<-cvreg:::coHDMedianMatrix(x);delta[which(is.na(as.vector(delta)))]<-0;
    omega<-imad*mpd(delta)*rep(imad,each=p);U<-wsvd(omega)$u;iQ<-imad*U;
    z<-x%*%iQ;Sigma<-coMedHDfun(omega,z,x);for (i in 1:iter){Sigma<-coMedHDfun(Sigma$Omega,Sigma$z,x)}
    medians<-drop(Sigma$Q %*% apply(Sigma$z,2,hdmedian)); Omega<-Sigma$Omega;

    ## Calculate Weights Based On Initial Covariance Estimate
    alpha <- min(0.975, 1-((0.24815 - 0.0021885*p)/sqrt(n)));
    cv <- 1.4826*(qchisq(alpha,p)/qchisq(0.5,p));
    dist2<-mahalanobis_dist(x,medians,Omega);
    wt = cvreg:::mvSmoothWt(dist2/hdmedian(dist2), cv, k); h<-sum(wt)
    outliers <- seq(1,nrow(x),by=1)[which(as.numeric(dist2>median(dist2)*cv)==1)]

    ## Calculate New Covariance Matrix
    wcov <- cov.wt(x, w, method = "ML")
    center <- wcov$center
    Sigma <- wcov$cov

    colnames(Omega)<-rownames(Omega)<-colnames(delta)<-rownames(delta)<-colnames(Sigma)<-rownames(Sigma)<-names(medians)<-colnames(x)
    return(structure(list(center = center, cov = Sigma, dist = dist2, outliers = outliers, weights = wt,
                          com = Omega, medians = medians, delta = delta),
                     class = "covRobust"))
  }

  if (aad){

    x<-as.matrix(x);n<-nrow(x);p<-ncol(x);colmads<-apply(x,2,aad)
    ## Calculate Co-Median Matrix and Initial Covariance Estimate
    imad <- 1/colmads;delta<-cvreg:::coAADMatrix(x);delta[which(is.na(as.vector(delta)))]<-0;
    omega<-imad*mpd(delta)*rep(imad,each=p);U<-wsvd(omega)$u;iQ<-imad*U;
    z<-x%*%iQ;Sigma<-coAADfun(omega,z,x);for (i in 1:iter){Sigma<-coAADfun(Sigma$Omega,Sigma$z,x)}
    medians<-drop(Sigma$Q %*% apply(Sigma$z,2,hdmedian)); Omega<-Sigma$Omega;

    ## Calculate Weights Based On Initial Covariance Estimate
    alpha <- min(0.975, 1-((0.24815 - 0.0021885*p)/sqrt(n)));
    cv <- sqrt(pi/2)*(qchisq(alpha,p)/qchisq(0.5,p));
    dist2<-mahalanobis_dist(x,medians,Omega);
    wt = cvreg:::mvSmoothWt(dist2/hdmedian(dist2), cv, k); h<-sum(wt)
    outliers <- seq(1,nrow(x),by=1)[which(as.numeric(dist2>median(dist2)*cv)==1)]

    ## Calculate New Covariance Matrix
    wcov <- cov.wt(x, w, method = "ML")
    center <- wcov$center
    Sigma <- wcov$cov

    colnames(Omega)<-rownames(Omega)<-colnames(delta)<-rownames(delta)<-colnames(Sigma)<-rownames(Sigma)<-names(medians)<-colnames(x)
    return(structure(list(center = center, cov = Sigma, dist = dist2, outliers = outliers, weights = wt, com = Omega, medians = medians),
                     class = "covRobust"))
  }
}



#' Minimum Regularized Covariance Determinant Estimator
#'
#' @description This implements the minimum regularized covariance determinant estimator. This is similiar to the implementation
#' in the rrcov package, but has a few tweaks to increase the speed of the algorithm and improve estimation, as well as being simpler to use.
#' The minimum covariance determinant estimator is a robust estimator of the covariance which finds the 1-delta percentage of the data that
#' yield the smallest determinant for the covariance matrix. It was introduced initially as part of the paper on least trimmed squares, and
#' further developed into a workable algorithm within a few years(Rousseeuw, 1984; Rousseeuw & Van Driessen, 1999). Since a gigantic number of
#' combinations of data points could be chosen, some heuristics are applied to seek good candidates and reduce computation time, which also ensures
#' consistency through the determinstic selection of subsets (Hubert et al., 2012; 2018). The regularized version implemented here from Boudt et al (2019)
#' further regularizes the calculated robust covariance matrix in a manner similar to that seen in the \code{\link{covShrink}} function.
#' The method of calculating the regularization parameter in Boudt et al (2019) has been replaced by the faster method utilized by
#' Schaefer & Strimmer (2005), which utilizes an analytic formula for calculating the optimal parameter value.
#'
#' \cr
#' The MRCD method requires choosing a scale estimator. Several options are offered here: \cr \cr
#' - "tau" is the tau-scale defined in Yohai and Zamar (1998). \cr
#' - "pb" is the percentage bend estimator (Shoemaker & Hettmansperger, 1982). \cr
#' - "bisq" is Tukey's bisquare estimator. \cr
#' - "huber" is Huber's psi estimator (Huber, 1964). \cr
#' - "mopt" is the modified optimal estimator. \cr
#' - "mad" is the (adjusted) median absolute deviation. \cr
#' - "Qn" and "Sn" are two alternatives to the median based measures of location and scale (Rousseeuw, Peter, & Croux, 1993). \cr
#'
#' @param x a data frame or matrix of numeric covariates
#' @param kappa the the proportion of the data to use in each subset. defaults to 0.80. must be > 0.50.
#' @param alpha a custom value for the regularization parameter. suggested to leave as NULL unless numerical problems are encountered.
#' @param method a scale estimator. the tau-scale estimator is the default.
#' @param opts list of options for the various scale estimators. "b" determines the percentage bend coefficient for "pb", "eff" determines the efficiency of the "huber",  "bisquare" and "mopt"
#' scale estimators.
#' @return a covRobust object containing the following elements:
#' \itemize{
#'          \item \strong{center}: multivariate mean of cleaned data set after applying casewise weights.
#'          \item \strong{cov}: covariance matrix of cleaned data set after applying casewise weights.
#'          \item \strong{dist}: the mahalanobis distances used in calculating the weights.
#'          \item \strong{outliers}: the indices of the outliers identified.
#'          \item \strong{weights}: the weights for downweighting outliers.
#'         }
#' @export
#'
#' @references
#' Boudt, K.; Rousseeuw, P.J.; Vanduffel, S. (2019) The minimum regularized covariance determinant estimator. Stat Comput . doi: 10.1007/s11222-019-09869-x \cr
#' \cr
#' Huber, P. J. (1964). Robust estimation of a location parameter. Ann. Math. Statist. 35, 73–101. \cr
#' \cr
#' Hubert, M.; Rousseeuw, P.J.; Verdonck, T. (2012) A Deterministic Algorithm for Robust Location and Scatter, Journal of Computational and Graphical Statistics, 21:3, 618-637, DOI: 10.1080/10618600.2012.672100 \cr
#' \cr
#' Hubert, M.; Debruyne, M.; Rousseeuw, P.J. (2018) Minimum covariance determinant and extensions. WIREs Comput Stat. 10. doi: 10.1002/wics.1421 \cr
#' \cr
#' Rousseeuw, P.J. (1984) Least median of squares regression. J Am Stat Assoc 79:871–880. \cr
#' \cr
#' Rousseeuw, P.J.; Van Driessen, K. (1999) A fast algorithm for the Minimum Covariance Determinant estimator. Technometrics, 41:212–223. \cr
#' \cr
#' Schaefer, J. ; K. Strimmer (2005) A shrinkage approach to large-scale covariance estimation and implications for functional genomics. Statist. Appl. Genet. Mol. Biol. 4:32. \cr
#'
cov.mrcd = function (x, kappa = 0.80, alpha = NULL, method = c("tau", "pb", "bisq", "huber", "mopt", "Qn", "Sn"), opts = list("b" = 0.10, "eff" =  0.90)) {

  method <- match.arg(method);scalefn <- .scfun(method, opts);mufn <- .mufun(method, opts)

  x<-as.matrix(x);n=nrow(x);p=ncol(x);minscale=0.000001;varnames<-colnames(x);
  if(p<=10){maxcsteps=2000}else if(p<100&&p>10){maxcsteps=1500}else if(p>100&&p<=200){maxcsteps=750}else if(p>200&&p<600){maxcsteps=500}else{maxcsteps=250}
  hsets.init = NULL;save.hsets=missing(hsets.init);full.h=T;trace= FALSE
  rpack <- function(x, h, full.h, scaled = TRUE, scalefn, mufn) {
    initset <- function(X, scalefn, mufn, V, h) {
      stopifnot(length(d <- dim(X)) == 2, length(h) == 1, h >= 1)
      n <- d[1]
      stopifnot(h <= n)
      XC <- Scale2(X, mufn, NULL);lambda <- apply(XC,2,scalefn)
      sqrtcov <- V%*%(lambda*t(V));sqrtinvcov<-V%*%(t(V)/lambda)
      estloc <- apply(X%*%sqrtinvcov,2,mufn)%*%sqrtcov
      XC <- (X-rep(estloc,each = n))%*%V
      sort.list(mahalanobis_dist(XC,apply(XC,2,mufn),diag(lambda),tol=1e-100))[1:h]
    }
    stopifnot(length(dx <- dim(x)) == 2)
    n <- dx[1];p <- dx[2]
    if (!scaled) {x <- as.matrix(Scale2(x, center = mufn, scale = scalefn))}

    bacon.cov <- function(x){
      znorm <- sqrt(rowSums(x^2));ind5 <- order(znorm);half <- ceiling(n/2)
      Hinit <- ind5[1:half];return(cov(x[Hinit, , drop = FALSE]))
    }
    .cov <- function(x) {
      x<-na.omit(as.matrix(x));p<-ncol(x);n<-nrow(x);res<-crossprod(x)/n;return(res)
    }

    hsets <- matrix(integer(), h, 9); Thr <- 1/(4 * (n^0.25) * sqrt(pi * log(n)))
    hsets[, 1] <- initset(x, scalefn = scalefn, mufn = mufn, V = eigen(mpd(cor(tanh(x))),symmetric=T)$vectors, h=h)
    hsets[, 2] <- initset(x, scalefn = scalefn, mufn = mufn, V = eigen(mpd(cor(x,method="s")),symmetric=T)$vectors, h=h)
    hsets[, 3] <- initset(x, scalefn = scalefn, mufn = mufn, V = eigen(mpd(cor(qnorm(apply(x,2,rank)/(n+1)))),symmetric=T)$vectors, h=h)
    hsets[, 4] <- initset(x, scalefn = scalefn, mufn = mufn, V = eigen(cov2cor(mpd(bacon.cov(x))),symmetric=T)$vectors, h=h)
    hsets[, 5] <- initset(x, scalefn = scalefn, mufn = mufn, V = eigen(mpd(cor(psych::winsor(x,trim = 0.10))),symmetric=T)$vectors, h=h)
    hsets[, 6] <- initset(x, scalefn = scalefn, mufn = mufn, V = eigen(mpd(cor(qunif(apply(x,2,rank)/(n+1),-3, 3))),symmetric=T)$vectors, h=h)
    hsets[, 7] <- initset(x, scalefn = scalefn, mufn = mufn, V = eigen(mpd(cor(qnorm(apply(x,2,rank)/(n+1)))),symmetric=T)$vectors, h=h)
    hsets[, 8] <- initset(x, scalefn = scalefn, mufn = mufn, V = eigen(mpd(cov2cor(covShrink(apply(x,2,rank)))),symmetric=T)$vectors, h=h)
    hsets[, 9] <- initset(x, scalefn = scalefn, mufn = mufn, V = eigen(cor(qnorm(pmin(pmax(apply(x, 2, rank)/n, Thr), 1 - Thr))),symmetric=T)$vectors, h=h)


    if (full.h){hsetsN <- matrix(integer(), n, 9)}
    for (k in 1:9) {
      xk <- x[hsets[, k], , drop = FALSE]
      Eig <- eigen(crossprod(scale(xk,T,F))/nrow(xk), symmetric=T)
      score <- (x - rep(colMeans(xk), each = n)) %*% Eig$vectors
      values <- Eig$values
      ord <- order(mahalanobis_dist(score, colMeans(score), diag(sqrt(abs(as.vector(values)))),tol=1e-100))
      if (full.h){hsetsN[, k] <- ord}else{hsets[, k] <- ord[1:h]}
    }
    if (full.h){hsetsN}else{hsets}
  }

  .Inv <- function(alpha, mT, nu, mU) {
    p = dim(mT)[1];pp = dim(mU);vD = sqrt(diag(mT));imD = diag(vD^(-1));R = imD %*% mT %*% imD
    cr = R[2, 1];I = diag(1, p);J = matrix(1, p, p);imR = 1/(1 - cr) * (I - cr/(1 + (p - 1) * cr) * J)
    imB = (alpha)^(-1) * imD %*% imR %*% imD;Temp <- pseudoinverse(diag(pp[2]) + nu * (t(mU) %*% (imB %*% mU)),tol=1e-100)
    return(imB - (imB %*% mU) %*% (nu * Temp) %*% (t(mU) %*% imB))
  }
  .COV <- function(XX, vMu, alpha = NULL, mT, scfac, invert = FALSE) {
    mE <- XX - vMu;n <- dim(mE)[2];p <- dim(mE)[1]
    mS <- tcrossprod(mE)/n;rcov <- alpha * mT + (1 - alpha) * scfac * mS
    if (invert) {
      if (p>n||!pdcheck(mS)){nu<-(1-alpha)*scfac;mU<-mE/sqrt(n);inv_rcov<-.Inv(alpha=alpha,mT=mT,nu=nu,mU=mU)}
      else {inv_rcov<-pseudoinverse(rcov,tol=1e-100)}
      return(list(alpha = alpha, mT = mT, cov = mS, rcov = rcov, inv_rcov = inv_rcov))
    }
    else return(list(alpha = alpha, mT = mT, cov = mS, rcov = rcov))
  }

  .cons<-function(p,alpha){qalpha<-qchisq(alpha,p);caI<-pgamma(qalpha/2,p/2+1)/alpha;1/caI}

  .cstep <- function(mX,alpha=NULL,mT=NULL,vMu=NULL,mIS = NULL,h,scfac,index = NULL,maxcsteps) {
    n <- dim(mX)[2];p <- dim(mX)[1];if(is.null(index)){index<-sample(1:n,h)}
    XX <- mX[, index];if(is.null(vMu)){vMu<-rowMeans(XX)}
    if (is.null(mIS)){ret=.COV(XX=XX,vMu=vMu,alpha=alpha,mT=mT,scfac = scfac,invert=T);mIS = ret$inv_rcov}
    vdst = diag(t(mX-vMu)%*%(mIS%*%(mX - vMu)));index = sort(sort.int(vdst, index.return = T)$ix[1:h]);iter = 0
    while (iter < maxcsteps) {
      XX <- mX[, index];vMu<-rowMeans(XX)
      ret <- .COV(XX = XX, vMu = vMu, alpha = alpha, mT = mT, scfac = scfac, invert = T);mIS <- ret$inv_rcov
      vdst <- diag(t(mX - vMu) %*% (mIS %*% (mX - vMu)));nndex <- sort(sort.int(vdst, index.return = T)$ix[1:h])
      if (all(nndex == index))
        break
      index <- nndex;iter <- iter + 1
    }
    return(list(index=index,numit=iter,mu=vMu,cov=ret$rcov,icov=ret$inv_rcov,alpha=ret$alpha,mT=ret$mT,dist=vdst,scfac=scfac))
  }
  mX<-t(x);mindet<-0;n<-dim(mX)[2];p<-dim(mX)[1]
  if (!is.null(kappa)){h <- ceiling(kappa*n)}else{kappa<-0.80;h<-ceiling(kappa*n)}
  if (kappa < 0.5 | kappa > 1) stop("'kappa' must be between 0.5 and 1.0!")
  obj <- function(x) {det(x)^(1/p)}
  vmx <- apply(mX,1,mufn);vsd<-apply(mX,1,scalefn);vsd[vsd<minscale]<-minscale
  Dx <- diag(vsd);mU <- scale(t(mX), center = vmx, scale = vsd);mX <- t(mU);mT <- diag(p)
  if (is.null(hsets.init)) {
    hsets.init <- rpack(x = t(mX), h=h, full.h = full.h, scaled = FALSE, scalefn = scalefn, mufn = mufn)
    dh <- dim(hsets.init)
  }
  else {
    if (is.vector(hsets.init))
      hsets.init <- as.matrix(hsets.init)
    dh <- dim(hsets.init)
    if (dh[1] < h || dh[2] < 1)
      stop("'hsets.init' must be a  h' x L  matrix (h' >= h) of observation indices")
    if (full.h && dh[1] != n)
      warning("'full.h' is true, but 'hsets.init' has less than n rows")
    if (min(hsets.init) < 1 || max(hsets.init) > n)
      stop("'hsets.init' must be in {1,2,...,n}; n = ",
           n)
  }
  hsets.init <- hsets.init[1:h, ]
  scfac <- .cons(p, h/n)
  alpha9pack <- condnr <- c()
  nsets <- ncol(hsets.init)
  if (is.null(alpha)) {
    for (k in 1:9) {
      mXsubset <- mX[, hsets.init[, k]]
      alpha9pack[k] <- .estimateAlpha(t(mXsubset))
    }
    cutoffalpha <- max(c(0.25, hdmedian(alpha9pack)))
    alpha <- max(alpha9pack[alpha9pack <= cutoffalpha])
    Vselection <- seq(1, 9)
    Vselection[alpha9pack > cutoffalpha] = NA
    if (sum(!is.na(Vselection)) == 0) {
      stop("None of the initial subsets are positive-definite")
    }
    initV <- min(Vselection, na.rm = TRUE)
    setsV <- Vselection[!is.na(Vselection)]
    setsV <- setsV[-1]
  }
  else {
    setsV <- 1:ncol(hsets.init)
    initV <- 1
  }
  hset.csteps <- integer(9)
  ret <- .cstep(mX = mX, alpha = alpha, mT = mT, h=h, scfac = scfac, index = hsets.init[, initV], maxcsteps = maxcsteps)
  objret <- obj(ret$cov)
  hindex <- ret$index
  best9pack <- initV
  for (k in setsV) {
    tmp <- .cstep(mX = mX, alpha = alpha, mT = mT, h=h, scfac = scfac, index = hsets.init[, k], maxcsteps = maxcsteps)
    objtmp <- obj(tmp$cov)
    hset.csteps[k] <- tmp$numit
    if (objtmp < objret) {
      ret <- tmp;objret <- objtmp;hindex <- tmp$index;best9pack <- k
    }
    else if (objtmp == objret)
      best9pack <- c(best9pack, k)
  }
  c_kappa <- ret$scfac
  mE <- mX[, hindex] - ret$mu
  weightedScov <- tcrossprod(mE)/(h-1)
  D <- c_kappa * diag(1,p)
  muvec = apply(mX[, hindex], 1, mean)
  Sigma = alpha * mT + (1-alpha) * c_kappa * weightedScov
  if (p > n) {
    nu <- (1-alpha) * c_kappa; mU <- mE/sqrt(h - 1)
    Tau <- .Inv(alpha = alpha, mT = mT, nu = nu, mU = mU)
    Sigma <- pseudoinverse(Tau, tol = 1e-100)
  }
  else{
    Tau <- pseudoinverse(Sigma,tol = 1e-100)
  }
  mX <- t(crossprod(mX, Dx)) + vmx
  muvec <- as.vector(Dx %*% muvec + vmx)
  Sigma <- Dx %*% (Sigma %*% Dx)
  colnames(Sigma) <- rownames(Sigma) <- names(muvec) <- varnames
  return(structure(list(center = muvec, cov = Sigma,
                        dist = mahalanobis_dist(x, center=muvec, cov=Sigma, tol = 1e-100),
                        outliers = seq(1,nrow(x))[-hindex]), class = "covRobust"))
}



#' Ortogonalized Gnanadesikan-Kettenring (OGK) Robust Regularized Covariance Estimator
#'
#' @description Computes a robust covariance and location estimator using the pairwise algorithm
#' proposed by Marona and Zamar (2002), which solves the problem concerning the lack of affine
#' equivariance in the method proposed by Gnanadesikan-Kettenring (1972). Furthermore, the OGK
#' estimator is guaranteed to return a positive-definite covariance matrix when n > p. In the
#' implementation in this package, when n < p, mild shrinkage is applied to induce a
#' positive-definite matrix. \cr
#'
#' \cr
#' Several options to use as scale and location estimators are offered here: \cr \cr
#' - "tau" is the tau-scale defined in Yohai and Zamar (1998). \cr
#' - "pb" is the percentage bend estimator (Shoemaker & Hettmansperger, 1982). \cr
#' - "bisq" is Tukey's bisquare estimator. \cr
#' - "huber" is Huber's psi estimator (Huber, 1964). \cr
#' - "mopt" is the modified optimal estimator. \cr
#' - "Qn" and "Sn" are two alternatives to the median based measures of location and scale (Rousseeuw, Peter, & Croux, 1993). \cr
#'
#' @param x a data frame or matrix of numeric covariates
#' @param method "tau", "pb", "bisq", "huber", "mopt", "Qn", "Sn"
#' @param opts list of options for the various scale estimators. "b" determines the percentage bend coefficient for "pb", "eff"
#' determines the efficiency of the "huber", "bisquare" and "mopt" scale estimators.
#' @param iter the number of refinement steps. defaults to 2. can be 0 if the raw ogk estimator is desired.
#'
#' @return a covRobust object containing the following elements:
#' \itemize{
#'          \item \strong{center}: multivariate mean of cleaned data set after removing outliers.
#'          \item \strong{cov}: covariance matrix of cleaned data set after removing outliers.
#'          \item \strong{dist}: the mahalanobis distances used in calculating the weights.
#'          \item \strong{outliers}: the indices of the outliers identified.
#'          \item \strong{weights}: the weights for downweighting outliers.
#'         }
#' @export
#'
#' @references
#' Huber, P. J. (1964). Robust estimation of a location parameter. Ann. Math. Statist. 35, 73–101. \cr \cr
#' Shoemaker, L. H., & Hettmansperger, T. P. (1982). Robust estimates and tests for the one- and two-sample scale models. Biometrika, 69:47–54 \cr \cr
#' Rousseeuw, Peter J.; Croux, Christophe (1993), Alternatives to the Median Absolute Deviation. Journal of the American Statistical Association, 88(424): 1273–1283, doi:10.2307/229126 \cr \cr
#' Yohai, R.A. and Zamar, R.H. (1998) High breakdown point estimates of regression by means of the minimization of efficient scale. Journal of the American Statistical Association, 86:403–413.

cov.ogk <- function(x, method = c("tau", "pb", "bisq", "huber", "mopt", "Qn", "Sn"), iter = 2, opts = list("b" = 0.10, "eff" =  0.90)){

  x <- as.matrix(x);n <- nrow(x); p <- ncol(x)
  method <- match.arg(method)
  mufun <- .mufun(method, opts); scfun <- .scfun(method, opts)
  if (opts$b >= 0.50 || is.null(opts$b)) opts$b <- 0.49

  vrob <- .OGKcustomvrob(mufun, scfun)
  covfun <- .OGKmakecovfun(mufun,scfun,vrob)
  new <- first <- covfun(x)
  cov <- first$cov
  center <- as.vector(first$center)
  Z <- first$Z
  for (i in 1:iter) {
    old <- new
    new <- covfun(old$Z)
    cov <- mpd(old$A %*% tcrossprod(new$cov, old$A))
    center <- as.vector(old$A %*% as.vector(new$center))
    Z <- new$Z
  }
  mu <- apply(Z, 2, function(i) mufun(i))
  sigma <- apply(Z, 2, function(i) scfun(i))
  Z <- sweep(Z, 2, mu)
  Z <- sweep(Z, 2, sigma, "/")
  dist2 <- rowSums(Z^2)
  cdelta <- median(dist2)/qchisq(0.5, p)
  cov <- cdelta * cov
  dist2<-mahalanobis_dist(x,mu,cov);
  qq <- (qchisq(1 - (0.4/n^0.6), p)*median(dist2))/qchisq(0.5, p)
  wt <- ifelse(dist2 < qq, 1, 0)
  wcenter <- colMeans(x[which(wt==1), ])
  X <- scale(x[which(wt==1), ], wcenter, F)
  wcov <- mpd(crossprod(X)/nrow(X))
  if (!pdcheck(wcov)){
    tr1 <- sum(diag(wcov))
    wcov <- covShrink(x, wt, target = "adaptive")
    tr2 <- sum(diag(wcov)); adj <- tr1/tr2; wcov <- adj * wcov
  }
  return(structure(list(center=wcenter, cov=wcov, dist=dist2, outliers = which(wt!=1), weights = wt), class = "covRobust"))
}


#' Weighted Ortogonalized Gnanadesikan-Kettenring (WOGK) Robust Covariance Estimator
#'
#' @description See \code{\link{cov.ogk}} for information on how this estimator is computed. The only difference here
#' is that in this function is that the final covariance matrix is not based on dropping identified outliers, but rather,
#' smoothly downweighting them.
#'
#' \cr
#' Several options to use as scale and location estimators are offered here: \cr \cr
#' - "tau" is the tau-scale defined in Yohai and Zamar (1998). \cr
#' - "pb" is the percentage bend estimator (Shoemaker & Hettmansperger, 1982). \cr
#' - "bisq" is Tukey's bisquare estimator. \cr
#' - "huber" is Huber's estimator (Huber, 1964). \cr
#' - "mopt" is the modified optimal estimator. \cr
#' - "Qn" and "Sn" are two alternatives to the median based measures of location and scale (Rousseeuw, Peter, & Croux, 1993). \cr
#'
#' @param x a data frame or matrix of numeric covariates
#' @param method one of "tau", "pb", "bisq", "huber", "mopt", "Qn", or "Sn"
#' @param opts list of options for the various scale estimators. "b" determines the percentage bend coefficient for "pb", "eff"
#' determines the efficiency of the "huber", "bisquare" and "mopt" scale estimators.
#' @param iter the number of refinement steps. defaults to 2.
#' @return a covRobust object containing the following elements:
#' \itemize{
#'          \item \strong{center}: multivariate mean of cleaned data set after applying casewise weights.
#'          \item \strong{cov}: covariance matrix of cleaned data set after applying casewise weights.
#'          \item \strong{dist}: the mahalanobis distances used in calculating the weights.
#'          \item \strong{outliers}: the indices of the outliers identified.
#'          \item \strong{weights}: the weights for downweighting outliers.
#'         }
#' @export
#'
#' @references
#' Huber, P. J. (1964). Robust estimation of a location parameter. Ann. Math. Statist. 35, 73–101. \cr \cr
#' Shoemaker, L. H., & Hettmansperger, T. P. (1982). Robust estimates and tests for the one- and two-sample scale models. Biometrika, 69:47–54 \cr \cr
#' Rousseeuw, Peter J.; Croux, Christophe (1993). Alternatives to the Median Absolute Deviation, Journal of the American Statistical Association, 88(424): 1273–1283, doi:10.2307/229126 \cr \cr
#' Yohai, R.A. and Zamar, R.H. (1998). High breakdown point estimates of regression by means of the minimization of efficient scale, Journal of the American Statistical Association, 86:403–413. \cr \cr

cov.wogk <- function(x, method =c("tau", "pb", "bisq", "huber", "mopt", "Qn", "Sn"), iter = 2, opts = list("b" = 0.10, "eff" =  0.90)){

  x <- as.matrix(x);n <- nrow(x); p <- ncol(x)
  method <- match.arg(method)
  mufun <- .mufun(method, opts); scfun <- .scfun(method, opts)
  if (opts$b >= 0.50 || is.null(opts$b)) opts$b <- 0.49
  vrob <- .OGKcustomvrob(mufun, scfun)
  covfun <- .OGKmakecovfun(mufun,scfun,vrob)
  new <- first <- covfun(x)
  cov <- first$cov
  center <- as.vector(first$center)
  Z <- first$Z
  for (i in 1:iter) {
    old <- new
    new <- covfun(old$Z)
    cov <- mpd(old$A %*% tcrossprod(new$cov, old$A))
    center <- as.vector(old$A %*% as.vector(new$center))
    Z <- new$Z
  }
  mu <- apply(Z, 2, function(i) mufun(i))
  sigma <- apply(Z, 2, function(i) scfun(i))
  Z <- sweep(Z, 2, mu)
  Z <- sweep(Z, 2, sigma, "/")
  dist2 <- rowSums(Z^2)
  cdelta <- hdmedian(dist2)/qchisq(0.5, p)
  cov <- cdelta * cov

  ## Calculate Weights Based On Initial Covariance Estimate
  if (p<=10){alpha <- (0.241-0.0029*p)/sqrt(n)}
  if (p>=11){alpha <- (0.252-0.0018*p)/sqrt(n)}
  cv <- qchisq(alpha,p)/qchisq(0.5,p);
  dist2<-mahalanobis_dist(x,mu,cov);
  wt = cvreg:::mvSmoothWt(dist2/median(dist2), cv, 1.5); h<-sum(wt)
  outliers <- seq(1,nrow(x))[as.numeric(dist2>median(dist2)*cv)]

  ## Calculate New Covariance Matrix
  wcov <- cov.wt(x, wt, method = "ML")
  wcenter <- wcov$center
  wcov <- wcov$cov
  if (!pdcheck(wcov)){
    tr1 <- sum(diag(wcov))
    wcov <- covShrink(x, wt, target = "identity")
    tr2 <- sum(diag(wcov)); adj <- tr1/tr2; wcov <- adj * wcov
  }
  return(structure(list(center=wcenter, cov=wcov, dist=dist2, outliers = outliers, weights = wt), class = "covRobust"))
}


#' Reweighted Median Ball Covariance Estimator
#'
#' @description This implements the reweighted median ball estimator of Olive (2004). The algorithm is
#' initialized using the BACON algorithm of Billor, Hadi, and Velleman (2000).
#'
#' The implementation in this package checks each iteration's estimate of the covariance, and if the estimated covariance matrix
#' is not positive definite, shrinkage is applied to obtain a well conditioned covariance matrix. Following
#' the shrinkage, the matrix is adjusted to keep the positive-definiteness while not allowing the overall
#' size of the estimates be affected. This is done using the ratio of the trace of the initial non-positive definite
#' matrix to the trace of the shrinkage estimated matrix, ie, \eqn{cov_adj = cov_shrink * \left(tr(cov_init)/tr(cov_shrink)\right)}.
#'
#' @param x a data frame or matrix of numeric covariates
#' @param iter number of iterations. defaults to 5. recommended to leave as 5.
#' @param delta the chi-square quantile used to declare outliers. defaults to 0.975
#'
#' @return a covRobust object containing the following elements:
#' \itemize{
#'          \item \strong{center}: multivariate mean of cleaned data set after discarding outliers.
#'          \item \strong{cov}: covariance matrix of cleaned data set after discardng outliers.
#'          \item \strong{dist}: the mahalanobis distances used in calculating the weights.
#'          \item \strong{outliers}: the indices of the outliers identified.
#'          \item \strong{weights}: the weights for downweighting outliers.
#'         }
#' @export
#' @references
#' Olive, D. J. (2004). A resistant estimator of multivariate location and dispersion. Computational Statistics & Data Analysis, 46, 93–102 \cr \cr
#' Billor, N., Hadi, A. S., & Velleman , P. F. (2000). BACON: Blocked Adaptive Computationally-Efficient Outlier Nominators; Computational Statistics and Data Analysis 34, 279–298. doi: 10.1016/S0167-9473(99)00101-2

cov.rmb<-function(x, iter = 5){

  is.pd <- function(sigma) {min(eigen(sigma,T,T)$values) > 1e-6}
  x=as.matrix(x);varnames <- colnames(x)
  x=na.omit(x)
  p <- dim(x)[2]
  n <- dim(x)[1]
  init <- cov.bacon(x)
  covs <- init$cov
  mns <- init$center

  for(i in 1:iter) {
    md2   <- mahalanobis_dist(x, mns, covs)
    medd2 <- hdmedian(md2)
    mns   <- colMeans(x[md2 <= medd2,  ])
    covs  <- crossprod(sweep(x[md2 <= medd2,],2,mns))/nrow(x[md2 <= medd2,])
    if (!is.pd(covs)){
      ## If the calculated covariance is not positive definite, calculate
      ## a shrinkage estimate of the covariance to obtain a positive
      ## definite matrix. Adjust the shrinkage estimate to have the same
      ## trace as the initial matrix to prevent sudden jumps in the iterations.
      tr1 <- sum(diag(covs))
      covs <- covShrink(sweep(x[md2 <= medd2,],2,mns), target = "pooled")
      tr2 <- sum(diag(covs))
      adj <- tr1/tr2
      covs <- adj * covs
    }
  }
  covb <- covs
  mnb <- mns
  critb <- sqrt(det(covb))
  covv <- diag(p)
  med <- apply(x, 2, hdmedian)
  md2 <- mahalanobis_dist(x, center = med, covv)
  medd2 <- hdmedian(md2)
  mns <- apply(x[md2 <= medd2,  ], 2, cvreg:::huberMean)
  covs <- crossprod(sweep(x[md2 <= medd2,],2,mns))/nrow(x[md2 <= medd2,])
  if (!is.pd(covs)){
    tr1 <- sum(diag(covs))
    covs <- covShrink(x[md2 <= medd2,], target = "adaptive")
    tr2 <- sum(diag(covs))
    adj <- tr1/tr2
    covs <- adj * covs
  }
  for(i in 1:iter){
    md2 <- mahalanobis_dist(x, mns, covs)
    medd2 <- hdmedian(md2)
    mns   <- colMeans(x[md2 <= medd2,  ])
    covs  <- crossprod(sweep(x[md2 <= medd2,],2,mns))/nrow(x[md2 <= medd2,])
    if (!is.pd(covs)){
      tr1 <- sum(diag(covs))
      covs <- covShrink(sweep(x[md2 <= medd2,],2,mns), target = "pooled")
      tr2 <- sum(diag(covs))
      adj <- tr1/tr2
      covs <- adj * covs
    }
  }
  crit <- sqrt(det(covs))
  if(crit < critb) {
    critb <- crit
    covb <- covs
    mnb <- mns
  }
  ## Adjust Scale
  rd2 <- mahalanobis_dist(x, mnb, covb)
  const <- hdmedian(rd2)/(qchisq(0.5, p))
  covb <- const * covb
  ## First Reweighting Step
  rd2 <- mahalanobis_dist(x, mnb, covb)
  up <- qchisq(1 - (0.3/n^0.45), p)
  rmnb <- colMeans(x[rd2<=up,])
  rcovb <- crossprod(sweep(x[rd2<=up,], 2, rmnb)) / nrow(x[rd2<=up,])
  if (!is.pd(rcovb)){
    tr1 <- sum(diag(rcovb))
    rcovb <- cov.nlshrink(x[rd2 <= up,])
    tr2 <- sum(diag(rcovb))
    adj <- tr1/tr2
    covs <- adj * covs
  }
  rd2 <- mahalanobis_dist(x, rmnb, rcovb)
  const <- hdmedian(rd2)/(qchisq(0.5, p))
  rcovb <- const * rcovb
  ## Second Reweighting Step
  rd2 <- mahalanobis_dist(x, rmnb, rcovb)
  up <- qchisq(1 - (0.3/n^0.45), p)
  rmnb <- colMeans(x[rd2<=up,])
  rcovb <- crossprod(sweep(x[rd2<=up,], 2, rmnb)) / nrow(x[rd2<=up,])
  if (!is.pd(rcovb)){
    tr1 <- sum(diag(rcovb))
    rcovb <- cov.nlshrink(x[rd2 <= up,])
    tr2 <- sum(diag(rcovb))
    adj <- tr1/tr2
    covs <- adj * covs
  }
  outliers <- seq(1,nrow(x),by=1)[rd2 >= up]
  rd2 <- mahalanobis_dist(x, rmnb, rcovb)
  const <- hdmedian(rd2)/(qchisq(0.5, p))
  rcovb <- const * rcovb
  names(rmnb) <- colnames(rcovb) <- rownames(rcovb) <- varnames
  return(structure(list(center = rmnb, cov = rcovb, dist=rd2, outliers=outliers, weights = as.numeric(rd2 <= up)), class = "covRobust"))
}

#' Multivariate Student's T Distribution Covariance Estimator
#'
#' @param x a data frame or matrix of numeric covariates
#' @param nu the normality parameter. must be >= 3. lower values give more outlier resistant estimates. defaults to NULL, which estimates the parameter automatically.
#' @param wt an optional vector of initial weights equal in length to the number of observations.
#' @param maxit maximum number of iterations. defaults to 100.
#' @param tol convergence tolerance. defaults to 1e-4
#' @return a covRobust object containing the following elements:
#' \itemize{
#'          \item \strong{center}: multivariate mean of cleaned data set after applying casewise weights.
#'          \item \strong{cov}: covariance matrix of cleaned data set after applying casewise weights.
#'          \item \strong{dist}: the mahalanobis distances used in calculating the weights.
#'          \item \strong{outliers}: the indices of the outliers identified.
#'          \item \strong{weights}: the weights for downweighting outliers.
#'         }'
#' @export
#'
#' @references
#' Kent, J.T., Tyler, D.E., and Vardi, Y. (1994). A curious likelihood identity for the multivariate t-distribution. Communications in Statistics—Simulation and Computation 23, 441–453.
#'
cov.st = function (x, nu = NULL, wt = NULL, maxit = 100, tol = 0.0001) {
  if (is.null(wt)) wt <- rep(1, nrow(x))
  covfun <- function(x, nu = NULL, wt = NULL, maxit = 100, tol = 0.0001){
  center = TRUE
  test.values <- function(x) {
    if (any(is.na(x)) || any(is.infinite(x)))
      stop("missing or infinite values in 'x'")
  }
  scale.fast <- function(x, center, n, p) x - rep(center, rep(n, p))
  x <- as.matrix(x)
  n <- nrow(x)
  p <- ncol(x)
  dn <- colnames(x)
  test.values(x)
  if (is.null(wt)){wt <- rep(1, n)}else{wt <- wt; if(sum(wt)==1) wt <- (wt/sum(wt)) * n}
  wt0 <- wt
  test.values(wt)
  if (length(wt) != n)
    stop("length of 'wt' must equal number of observations")
  if (any(wt < 0))
    stop("negative weights not allowed")
  if (!sum(wt))
    stop("no positive weights")
  x <- x[wt > 0, , drop = FALSE]
  wt <- wt[wt > 0]
  n <- nrow(x); wt <- (wt/sum(wt)) * n
  loc <- colSums(wt * x)/sum(wt)
  if (is.numeric(center)) {
    if (length(center) != p)
      stop("'center' is not the right length")
    loc <- center
  }
  else if (is.logical(center) && !center)
    loc <- rep(0, p)
  use.loc <- is.logical(center) && center
  w <- wt * (1 + p/nu)
  endit <- 0
  for (iter in 1L:maxit) {
    w0 <- w
    X <- scale.fast(x, loc, n, p)
    sX <- svd(sqrt(w/sum(w)) * X, nu = 0)
    if (any(sX$d < 0.0001)){
      dm <- sX$d
      for (i in 2:length(dm)){
        dm[i] <- sum(c((0.95*dm[i]), (0.05*dm[i-1])))
      }
      dm[1] <- dm[1] * 0.95
      sX$d <- dm
    }
    wX <- X %*% sX$v %*% diag(1/sX$d, length(sX$d), ncol = p)
    Q <- drop(wX^2 %*% rep(1, p))
    w <- (wt * (nu + p))/(nu + Q)
    if (use.loc)
      loc <- colSums(w * x)/sum(w)
    if (all(abs(w - w0) < tol))
      break
    endit <- iter
  }
  cov <- mpd(crossprod(sqrt(w)*X)/sum(wt))
  if (length(dn)) {
    dimnames(cov) <- list(dn, dn)
    names(loc) <- dn
  }

  ans <- list(Sigma = cov, center = loc, w = w)
  ans
  }

  if (is.null(nu)){
    estNu <- function(x){
      make.nufun = function(x, weights){
        x <- as.matrix(x)
        function(df){
          out = covfun(x, nu = df, wt = weights)
          sum(mvtnorm::dmvt(x, out$center, out$Sigma, df = df, log = T))
        }
      }
      init <- sqrt(sqrt(nrow(x)) * sqrt(ncol(x)));nufun=make.nufun(x,wt)
      optim(init, fn = function(df){nufun = -1 * nufun(df)}, method = "L-BFGS-B", lower = 3, upper = 100)$par
    }
    nu <- estNu(x)
    ans <- covfun(x, nu, wt, maxit, tol)
    ans$nu <- nu
  }
  ans <- covfun(x, nu, wt, maxit, tol); ans$nu <- nu
  dist=mahalanobis_dist(x,center=ans$center,cov=ans$Sigma)
  crit<-sqrt(qchisq(0.975, ncol(x)))
  vec<-1:nrow(x)
  dis<-sqrt(dist)
  chk<-ifelse(dis>crit,TRUE,FALSE)
  outliers<-vec[chk]
  return(structure(list(center=ans$center, cov=ans$Sigma, dist=dist, weights = ans$w, outliers=outliers, nu=ans$nu), class="covRobust"))
}

#' Minimum Regularized Generalized Variance Covariance Estimator
#'
#' @description This is very similar to the minimum regularized covariant determinant
#' covariance estimator. The generalized variance of a data set is in fact simply the
#' determinant of its covariance matrix. However, this function uses an initial
#' robust estimator of the covariance for a "leave one out" data set for each of the
#' N-ith subsets. This is used to calculate depth values, for which an adaptive threshold
#' is used to declare an observation an outlier. The covariance matrix is then recalculated
#' on the subset. This process is then re-iterated until the generalized variance
#' of the matrix converges. In order to ensure that the generalized variance is non-negative
#' at each iteration, the estimated covariance is checked for positive-definiteness and shrinkage
#' applied in the event of a non-positive definite matrix.
#'
#' Optionally, the number of maximum iterations can be set to
#' zero, in which case the raw generalized variance estimator will be returned.
#'
#' @param x a data frame or matrix of numeric covariates
#' @param maxit number of maximum iterations. defaults to 10. set to 0 to obtain the raw initial estimate.
#' @param tol the tolerance value for convergence. defaults to 1e-6.
#'
#' @return a covRobust object containing the following elements:
#' \itemize{
#'          \item \strong{center}: multivariate mean of cleaned data set after removing outliers.
#'          \item \strong{cov}: covariance matrix of cleaned data set after removing outliers.
#'          \item \strong{dist}: the mahalanobis distances used in calculating the weights.
#'          \item \strong{outliers}: the indices of the outliers identified.
#'          \item \strong{weights}: the weights for downweighting outliers. here they are binary with 0 marking an outlier and 1 otherwise.
#'          \item \strong{iter}: the number of iterations taken to finish.
#'          \item \strong{converged}: TRUE if the algorithm converged by maxit, FALSE otherwise. If FALSE, re-run with a larger maxit.
#'          \item \strong{trace}: The generalized variance values at each iteration.
#'         }
#' @export
#'
cov.mrgv <-function(x, maxit = 10, tol = 1e-6){

  gvar<-function(m){exp(sum(log(eigen(crossprod(scale(m,T,F))/nrow(m),symmetric=T,only.values=T)$values)))}

  outFun <- function(x){
    x<-x[!is.na(x)];n<-length(x);temp<-idealf(x)
    gval <- (17.63*n-23.64)/(7.74*n-3.71)
    cl<-temp$ql-gval*(temp$qu-temp$ql);cu<-temp$qu+gval*(temp$qu-temp$ql)
    flag<-NA;outid<-NA;vec<-c(1:n);for(i in 1:n){flag[i]<-(x[i]< cl || x[i]> cu)}
    if(sum(flag)==0)outid<-NULL;if(sum(flag)>0)outid<-vec[flag]
    keep<-vec[!flag];outval<-x[flag];n.out=sum(length(outid))
    list(out.val=outval,out.id=outid,keep=keep,n=n,n.out=n.out,cl=cl,cu=cu)
  }

  idealf<-function(x,na.rm=FALSE){
    if(na.rm)x<-x[!is.na(x)];j<-floor(length(x)/4 + 5/12);y<-sort(x)
    g<-(length(x)/4)-j+(5/12);ql<-(1-g)*y[j]+g*y[j+1];k<-length(x)-j+1
    qu<-(1-g)*y[k]+g*y[k-1];list(ql=ql,qu=qu)
  }

  m<-x<-na.omit(as.matrix(x));p<-ncol(x);names<-colnames(x);dval<-0
  for(i in 1:nrow(m)){dval[i]<-gvar(m[-i,])}
  wch<-outFun(dval)$keep;m<-m[wch,];keeps<-keeps[wch]
  sigma  <- crossprod(scale(m,T,F))/nrow(m);
  pd <- min(eigen(sigma,T,T)$values) > 1e-6;
  if(!pd){
    sigma <- covShrink(m, target = "unequal")
    attr(sigma, "alpha") <- NULL
    attr(sigma, "alpha.var") <- NULL
  }
  center <- colMeans(m)
  gv<-trace<-exp(sum(log((eigen(sigma,symmetric=T,only.values=T)$values))));iter<-0;converged<-TRUE

  if (maxit > 0){
    iter<-0;change<-1000;mm<-m;gv.old<-gv;converged<-FALSE
    while(!converged & iter < maxit) {
      ## Recalculate Covariance
      dval <-0; for(i in 1:nrow(mm)){dval[i]<-gvar(mm[-i,])}
      wch <- outFun(dval)$keep;mm<-mm[wch,];keeps<-keeps[wch]
      center <- colMeans(mm);
      sigma  <- crossprod(scale(mm,T,F))/nrow(mm);
      ## If the matrix is not positive definite, use a shrinkage
      ## estimate to obtain a well conditioned matrix, and adjust
      ## to have the same trace as the unpenalized covariance
      ## matrix. This ensures that the shrinkage does not result
      ## in the algorithm terminating prematurely due to shrinkage
      ## inevitably reducing the determinant.
      pd <- min(eigen(sigma,T,T)$values) > 1e-6;
      if(!pd) {
        sigma <- covShrink(mm,target="adaptive")
        tr <- sum(diag(cov(mm))); tr2 <- sum(diag(sigma))
        tk <- tr/tr2; sigma <- sigma * tk
        attr(sigma, "alpha") <- NULL
        attr(sigma, "alpha.var") <- NULL
      }
      ## Check for convergence
      trace[2+iter]<- gv.new <- exp(sum(log(eigen(sigma,T,T)$values)))
      iter <- iter + 1
      change <- max(gv.old-gv.new, 0)
      gv.old <- gv.new
      converged <- change <= tol
    }
  }
  else{
    mm <- m
  }
  ## Adjust Scale
  rd2   <- mahalanobis_dist(mm,center,sigma)
  const <- median(rd2)/(qchisq(0.5, p))
  covb  <- mpd(const*sigma)
  colnames(covb)<-rownames(covb)<-names
  weights <- rep(0, nrow(x)); weights[keeps] <- 1
  return(structure(list(center = center, cov = covb, dist=rd2, outliers = setdiff(1:nrow(x),keeps), weights =  , iter = iter, trace = trace, converged = converged), class="covRobust"))
}

#' Minimum Variance Vector Covariance Estimator
#'
#' @description This implements the minimum variance vector covariance estimator described in
#' Herwindiati, Djauhari, and Mashuri (2007). It is exactly analagous to the minimum covariance
#' determinant (MCD), except the trace of the matrix is the objective function, rather than
#' the determinant. The deterministic MCD algorithm described in Hubert, Rousseeuw, and Verdonck (2012)
#' is adapted here, rather than adapting the fast MCD algorithm in the paper introducing the MVV.
#'
#' @param x a data frame or matrix of numeric covariates
#' @param kappa the the proportion of the data to use in each subset. defaults to 0.75. must be > 0.50.
#' @param method the method of measuring location and scale. see \code{\link{cov.mrcd}} for details on
#' the available options.
#' @param opts list of options for the various scale estimators. "b" determines the percentage bend coefficient for "pb", "eff" determines the efficiency of the "huber",  "bisquare" and "mopt"
#' scale estimators.
#' @return a covRobust object containing the following elements:
#' \itemize{
#'          \item \strong{center}: multivariate mean of cleaned data set after removing outliers.
#'          \item \strong{cov}: covariance matrix of cleaned data set after removing outliers.
#'          \item \strong{dist}: the mahalanobis distances used in calculating the weights.
#'          \item \strong{outliers}: the indices of the outliers identified.
#'          \item \strong{weights}: the weights for downweighting outliers. here they are binary, with 0 marking an outlier and 1 otherwise.
#'         }
#' @export
#'
#' @references
#' Herwindiati, D. E.; Djauhari, M. A.; Mashuri, M. (2007). Robust Multivariate Outlier Labeling. Communications in Statistics - Simulation and Computation, 36(6), 1287–1294. doi:10.1080/03610910701569044 \cr \cr
#' Hubert, M.; Rousseeuw, P.J.; Verdonck, T. (2012) A Deterministic Algorithm for Robust Location and Scatter, Journal of Computational and Graphical Statistics, 21(3), 618-637, doi: 10.1080/10618600.2012.672100 \cr

cov.mvv = function (x, kappa = 0.75, method = c("tau", "pb", "bisq", "huber", "mopt", "Qn", "Sn"), opts = list("b" = 0.10, "eff" =  0.90)) {

  method <- match.arg(method);scalefn <- .scfun(method, opts);mufn <- .mufun(method, opts)
  x<-as.matrix(x);n=nrow(x);p=ncol(x);minscale=0.000001;varnames<-colnames(x);maxcsteps=2000
  hsets.init = NULL;save.hsets=missing(hsets.init);full.h=T;trace= FALSE
  rpack <- function(x, h, full.h, scaled = TRUE, scalefn, mufn) {
    initset <- function(X, scalefn, mufn, V, h) {
      stopifnot(length(d <- dim(X)) == 2, length(h) == 1, h >= 1)
      n <- d[1]
      stopifnot(h <= n)
      XC <- Scale2(X, mufn, NULL);lambda <- apply(XC,2,scalefn)
      sqrtcov <- V%*%(lambda*t(V));sqrtinvcov<-V%*%(t(V)/lambda)
      estloc <- apply(X%*%sqrtinvcov,2,mufn)%*%sqrtcov
      XC <- (X-rep(estloc,each = n))%*%V
      sort.list(mahalanobis_dist(XC,apply(XC,2,mufn),diag(lambda),tol=1e-100))[1:h]
    }
    stopifnot(length(dx <- dim(x)) == 2)
    n <- dx[1];p <- dx[2]
    if (!scaled) {x <- as.matrix(Scale2(x, center = mufn, scale = scalefn))}

    bacon.cov <- function(x){
      znorm <- sqrt(rowSums(x^2));ind5 <- order(znorm);half <- ceiling(n/2)
      Hinit <- ind5[1:half];return(cov(x[Hinit, , drop = FALSE]))
    }
    .cov <- function(x) {
      x<-na.omit(as.matrix(x));p<-ncol(x);n<-nrow(x);res<-crossprod(x)/n;return(res)
    }

    hsets <- matrix(integer(), h, 7); Thr <- 1/(4 * (n^0.25) * sqrt(pi * log(n)))
    hsets[, 1] <- initset(x, scalefn = scalefn, mufn = mufn, V = eigen(mpd(cor(tanh(x))),symmetric=T)$vectors, h=h)
    hsets[, 2] <- initset(x, scalefn = scalefn, mufn = mufn, V = eigen(mpd(cor(x,method="s")),symmetric=T)$vectors, h=h)
    hsets[, 3] <- initset(x, scalefn = scalefn, mufn = mufn, V = eigen(mpd(cor(qnorm(apply(x,2,rank)/(n+1)))),symmetric=T)$vectors, h=h)
    hsets[, 4] <- initset(x, scalefn = scalefn, mufn = mufn, V = eigen(cov2cor(mpd(bacon.cov(x))),symmetric=T)$vectors, h=h)
    hsets[, 5] <- initset(x, scalefn = scalefn, mufn = mufn, V = eigen(mpd(cor(psych::winsor(x,trim = 0.10))),symmetric=T)$vectors, h=h)
    hsets[, 6] <- initset(x, scalefn = scalefn, mufn = mufn, V = eigen(cov2cor(covShrink(apply(x,2,rank))),symmetric=T)$vectors, h=h)
    hsets[, 7] <- initset(x, scalefn = scalefn, mufn = mufn, V = eigen(cor(qnorm(pmin(pmax(apply(x, 2, rank)/n, Thr), 1 - Thr))),symmetric=T)$vectors, h=h)

    hsetsN <- matrix(integer(), n, 7)
    for (k in 1:7) {
      xk <- x[hsets[, k], , drop = FALSE]
      Eig <- eigen(crossprod(scale(xk,T,F))/nrow(xk), symmetric=T)
      score <- (x - rep(colMeans(xk), each = n)) %*% Eig$vectors
      values <- Eig$values
      ord <- order(mahalanobis_dist(score, colMeans(score),diag(sqrt(abs(as.vector(values)))),tol=1e-100))
      hsetsN[, k] <- ord
    }
    hsetsN
  }

  .Inv <- function(alpha, mT, nu, mU) {
    p = dim(mT)[1];pp = dim(mU);vD = sqrt(diag(mT));imD = diag(vD^(-1));R = imD %*% mT %*% imD
    cr = R[2, 1];I = diag(1, p);J = matrix(1, p, p);imR = 1/(1 - cr) * (I - cr/(1 + (p - 1) * cr) * J)
    imB = (alpha)^(-1) * imD %*% imR %*% imD;Temp <- pseudoinverse(diag(pp[2]) + nu * (t(mU) %*% (imB %*% mU)),tol=1e-100)
    return(imB - (imB %*% mU) %*% (nu * Temp) %*% (t(mU) %*% imB))
  }
  .COV <- function(XX, vMu, alpha = NULL, mT, scfac, invert = FALSE) {
    mE <- XX - vMu;n <- dim(mE)[2];p <- dim(mE)[1]
    mS <- tcrossprod(mE)/n;rcov <- alpha * mT + (1 - alpha) * scfac * mS
    if (invert) {
      if (p>n||!pdcheck(mS)){nu<-(1-alpha)*scfac;mU<-mE/sqrt(n);inv_rcov<-.Inv(alpha=alpha,mT=mT,nu=nu,mU=mU)}
      else {inv_rcov<-pseudoinverse(rcov,tol=1e-100)}
      return(list(alpha = 0, mT = mT, cov = mS, rcov = rcov, inv_rcov = inv_rcov))
    }
    else return(list(alpha = 0, mT = mT, cov = mS, rcov = rcov))
  }

  .cons<-function(p,alpha){qalpha<-qchisq(alpha,p);caI<-pgamma(qalpha/2,p/2+1)/alpha;1/caI}

  .cstep <- function(mX,alpha=0,mT=NULL,vMu=NULL,mIS = NULL,h,scfac,index = NULL,maxcsteps) {
    n <- dim(mX)[2];p <- dim(mX)[1];if(is.null(index)){index<-sample(1:n,h)}
    XX <- mX[, index];if(is.null(vMu)){vMu<-rowMeans(XX)}
    if (is.null(mIS)){ret=.COV(XX=XX,vMu=vMu,alpha=alpha,mT=mT,scfac = scfac,invert=T);mIS = ret$inv_rcov}
    vdst = diag(t(mX-vMu)%*%(mIS%*%(mX - vMu)));index = sort(sort.int(vdst, index.return = T)$ix[1:h]);iter = 0
    while (iter < maxcsteps) {
      XX <- mX[, index];vMu<-rowMeans(XX)
      ret <- .COV(XX = XX, vMu = vMu, alpha = 0, mT = mT, scfac = scfac, invert = T);mIS <- ret$inv_rcov
      vdst <- diag(t(mX - vMu) %*% (mIS %*% (mX - vMu)));nndex <- sort(sort.int(vdst, index.return = T)$ix[1:h])
      if (all(nndex == index))
        break
      index <- nndex;iter <- iter + 1
    }
    return(list(index=index,numit=iter,mu=vMu,cov=ret$rcov,icov=ret$inv_rcov,alpha=ret$alpha,mT=ret$mT,dist=vdst,scfac=scfac))
  }
  mX<-t(x);mintrace<-0;n<-dim(mX)[2];p<-dim(mX)[1]
  if (!is.null(kappa)){h <- ceiling(kappa*n)}else{kappa<-0.80;h<-ceiling(kappa*n)}
  if (kappa < 0.5 | kappa > 1) stop("'kappa' must be between 0.5 and 1.0!")
  obj <- function(x) {sum(diag(x))}
  vmx <- apply(mX,1,mufn);vsd<-apply(mX,1,scalefn);vsd[vsd<minscale]<-minscale
  Dx <- diag(vsd);mU <- scale(t(mX), center = vmx, scale = vsd);mX <- t(mU);mT <- diag(p)
  if (is.null(hsets.init)) {
    hsets.init <- rpack(x = t(mX), h=h, full.h = full.h, scaled = FALSE, scalefn = scalefn, mufn = mufn)
    dh <- dim(hsets.init)
  }
  else {
    if (is.vector(hsets.init))
      hsets.init <- as.matrix(hsets.init)
    dh <- dim(hsets.init)
    if (dh[1] < h || dh[2] < 1)
      stop("'hsets.init' must be a  h' x L  matrix (h' >= h) of observation indices")
    if (full.h && dh[1] != n)
      warning("'full.h' is true, but 'hsets.init' has less than n rows")
    if (min(hsets.init) < 1 || max(hsets.init) > n)
      stop("'hsets.init' must be in {1,2,...,n}; n = ",
           n)
  }
  setsV <- 1:ncol(hsets.init)
  hsets.init <- hsets.init[1:h, ]
  scfac <- .cons(p, h/n)
  nsets <- ncol(hsets.init)
  hset.csteps <- integer(7)
  ret <- .cstep(mX = mX, alpha = 0, mT = mT, h=h, scfac = scfac, index = hsets.init[, 1], maxcsteps = maxcsteps)
  objret <- obj(ret$cov)
  hindex <- ret$index
  best7pack <- 1
  for (k in setsV) {
    tmp <- .cstep(mX = mX, alpha = 0, mT = mT, h=h, scfac = scfac, index = hsets.init[, k], maxcsteps = maxcsteps)
    objtmp <- obj(tmp$cov)
    hset.csteps[k] <- tmp$numit
    if (objtmp < objret) {
      ret <- tmp;objret <- objtmp;hindex <- tmp$index;best7pack <- k
    }
    else if (objtmp == objret)
      best7pack <- c(best7pack, k)
  }
  c_kappa <- ret$scfac
  mE <- mX[, hindex] - ret$mu
  weightedScov <- tcrossprod(mE)/(h-1)
  D <- c_kappa * diag(1,p)
  muvec = apply(mX[, hindex], 1, mean)
  Sigma = c_kappa * weightedScov
  mX <- t(crossprod(mX, Dx)) + vmx
  muvec <- as.vector(Dx %*% muvec + vmx)
  Sigma <- Dx %*% (Sigma %*% Dx)
  colnames(Sigma) <- rownames(Sigma) <- names(muvec) <- varnames
  weights <- rep(0, n); weights[hindex] <- 1
  return(structure(list(center = muvec, cov = Sigma, weights = weights,
                        dist = mahalanobis_dist(x,center=muvec, cov=Sigma, tol = 1e-100),
                        outliers = seq(1, nrow(x))[-hindex], weights = weights),
                   class = "covRobust"))
}


#' Rocke's Multivariate Translated Biweight S-Estimator
#'
#' @description Using a robust \eqn{\rho} function in multivariate estimation has a major
#' drawback. As the dimensionality of the data matrix increases (variables) the rows of
#' the matrix (observations) will receive increasingly uniform weights. Hence, as p grows
#' in size, the robust multivariate estimate converges to the standard estimator. However,
#' this is undesirable. While this would be a good thing if the relative effeciency to the
#' MLE inreased with the number of observations, it is not desirable to occur as a result of
#' increasing dimensionality - the robustness is lost. Worse is that severe outliers may
#' receive larger weights than inliers and consequently induce bias to the estimation. David
#' Rocke (1996) proposed a modified "translated" bisquare \eqn{\rho} function that adapts to
#' the dimensionality of a data set to prevent the ARE from increasing to values infinitesimally
#' close to one while also preserving the robustness and unbiasedness of the estimation.
#'
#' The algorithm is initialized here by using BACON (Blocked Adaptive Computationally-Efficient
#' Outlier Nominator) algorithm of Billor, Hadi, and Velleman (2000).
#'
#'
#' @param X a data frame or matrix of numeric covariates.
#' @param phi the factor determining the minimum number of observations not declared outliers following the
#' initial estimation stage. the number of points not declared outliers will be at least phi * p. the default
#' is 2. put differently, the number of observations declared outliers in the initial step will be at most n - phi*p.
#' however, this has only an indirect effect on the final points declared outliers.
#' @param q a tuning parameter. defaults to 2. recommended to not change unless you know what you are doing.
#' @param maxit maximum number of iterations
#' @param maxsteps limit on the number of steps for the line search section of the algorithm.
#' @param tol numeric tolerance for convergence.
#'
#' @return a covRobust object containing the following elements:
#' \itemize{
#'          \item \strong{center}: multivariate mean of cleaned data set after applying casewise weights.
#'          \item \strong{cov}: covariance matrix of cleaned data set after applying casewise weights.
#'          \item \strong{dist}: the mahalanobis distances used in calculating the weights.
#'          \item \strong{outliers}: the indices of the outliers identified.
#'          \item \strong{weights}: the weights for downweighting outliers.
#'         }
#' @export
#'
#' @references
#' Rocke, D. M. (1996). Robustness properties of S-estimators of multivariate location and shape in high dimension. Annals of Statistics, 24, 1327–1345. \cr \cr
#' Rocke, D. M., & Woodruff, D. L. (1996). Identification of outliers in multivariate data. Journal of the American Statistical Association, 91, 1047–1061. \cr \cr
#' Billor, N., Hadi, A. S., & Velleman , P. F. (2000). BACON: Blocked Adaptive Computationally-Efficient Outlier Nominators; Computational Statistics and Data Analysis 34, 279–298. doi: 10.1016/S0167-9473(99)00101-2
#'
cov.rocke <- function (X, phi = 2, q = 2, maxit = 25, maxsteps = 5, tol = 1e-06) {
  X <- as.matrix(X)
  n <- nrow(X); p <- ncol(X)
  consts <- .rockconst(p = p, n = n)
  pcrit <- consts$alpha
  gamma0 <- consts$gamma
  init <- cov.bacon(X)
  S0 = init$cov ; mu0 = init$center
  S0 = S0/(det(S0)^(1/p))
  mah0 = init$dist
  mah = mah0
  delta <- (1 - p/n)/2
  sig <- .rocke_scale(x = mah, gamma = gamma0, q = q, delta = delta)
  didi <- mah/sig
  dife <- sort(abs(didi - 1))
  gg <- min(dife[(1:n) >= (phi * p)])
  gamma <- max(gg, gamma0)
  sig0 <- .rocke_scale(x = mah, gamma = gamma, delta = delta, q = q)
  iter <- 0
  difpar <- difsig <- +Inf
  while ((((difsig > tol) | (difpar > tol)) & (iter < maxit)) &
         (difsig > 0)) {
    iter <- iter + 1
    w <- wt.rocke(tt = mah/sig, gamma = gamma, q = q)
    mu <- colMeans(X * w)/mean(w)
    Xcen <- scale(X, center = mu, scale = FALSE)
    S <- t(Xcen) %*% (Xcen * w)/n
    S <- S/(det(S)^(1/p))
    mah <- mahalanobis_dist(x = X, center = mu, cov = S)
    sig <- .rocke_scale(x = mah, gamma = gamma, delta = delta, q = q)
    step <- 0
    delgrad <- 1
    while ((sig > sig0) & (step < maxsteps)) {
      delgrad <- delgrad/2
      step <- step + 1
      mu <- delgrad * mu + (1 - delgrad) * mu0
      S <- delgrad * S + (1 - delgrad) * S0
      S <- S/(det(S)^(1/p))
      mah <- mahalanobis_dist(x = X, center = mu, cov = S)
      sig <- .rocke_scale(x = mah, gamma = gamma, delta = delta, q = q)
    }
    dif1 <- as.vector(t(mu - mu0) %*% solve(S0, mu - mu0))/p
    dif2 <- max(abs(solve(S0, S) - diag(p)))
    difpar <- max(dif1, dif2)
    difsig <- 1 - sig/sig0
    mu0 <- mu
    S0 <- S
    sig0 <- sig
  }
  tmp <- .rockemat(S0 = S0, dis = mah)
  S <- tmp$S
  ff <- tmp$ff
  mah <- mah/ff
  crit<-sqrt(qchisq(1 - pcrit, ncol(X)))
  vec<-1:nrow(X)
  dis<-sqrt(mah)
  chk<-ifelse(dis>crit,TRUE,FALSE)
  outliers<-vec[chk]
  return(structure(list(cov = S, center = mu, dist = mah, outliers = outliers, weights = w, gamma = gamma), class = "covRobust"))
}


#' Yohai's Smoothed Hard Rejection Multivariate M-Estimator
#'
#' @description This implements the 'smoothed hard rejection' estimator. It is very
#' similar to Rocke's translated biweight S-estimator in the \code{\link{cov.shr}}
#' function, but has been found to work better in problems where the number of
#' variables is greater than 15 or so (Maronna & Yohai, 2017). The algorithm is
#' initialized with Billor, Hadi, and Velleman's (2000) BACON algorithm.
#'
#' @param X a data frame or matrix of numeric covariates
#' @param maxit maximum number of iterations. defaults to 50.
#' @param tol convergence tolerance
#'
#' @return a covRobust object containing the following elements:
#' \itemize{
#'          \item \strong{center}: multivariate mean of cleaned data set after applying casewise weights.
#'          \item \strong{cov}: covariance matrix of cleaned data set after applying casewise weights.
#'          \item \strong{dist}: the mahalanobis distances used in calculating the weights.
#'          \item \strong{outliers}: the indices of the outliers identified.
#'          \item \strong{weights}: the weights for downweighting outliers.
#'         }
#' @export
#'
#' @references
#' Muler, N. & Yohai, V.J. (2002). Robust estimates for arch processes. Journal of Time Series Analysis, 23(3), 341–375. doi:10.1111/1467-9892.00268  \cr \cr
#' Maronna, R.A. & Yohai, V.J. (2017) Robust and efficient estimation of multivariate scatter and location. Computational Statistics and Data Analysis, 109, 64–75. doi: 10.1016/j.csda.2016.11.006 \cr \cr
#' Billor, N., Hadi, A. S., & Velleman , P. F. (2000). BACON: Blocked Adaptive Computationally-Efficient Outlier Nominators; Computational Statistics and Data Analysis, 34, 279–298. doi: 10.1016/S0167-9473(99)00101-2
#'
cov.shr <- function (X, maxit = 50, tol = 1e-04){
  X = as.matrix(X)
  d <- dim(X)
  n <- d[1]
  p <- d[2]
  delta <- 0.5 * (1 - p/n)
  cc <- .shrconst(p, n)
  const <- cc[1]
  init <- cov.bacon(X)
  S0 <- init$cov ; mu0 <- init$center
  S0 <- S0/(det(S0)^(1/p))
  mah0 <- init$dist
  mah <- mah0
  sigma <- .scale_shr(t = mah, delta = delta) * const
  iter <- 0
  difpar <- +Inf
  S0 <- diag(p)
  mu0 <- rep(0, p)
  dist <- mah
  while ((difpar > tol) & (iter < maxit)) {
    iter <- iter + 1
    w <- wt.shr(dist/sigma)
    mu <- colMeans(X * w)/mean(w)
    Xcen <- scale(X, center = mu, scale = FALSE)
    S <- t(Xcen) %*% (Xcen * w)
    S <- S/(det(S)^(1/p))
    dist <- mahalanobis_dist(x = X, center = mu, cov = S)
    dif1 <- t(mu - mu0) %*% solve(S0, mu - mu0)
    dif2 <- max(abs(c(solve(S0, S) - diag(p))))
    difpar <- max(c(dif1, dif2))
    mu0 <- mu
    S0 <- S
  }
  tmp <- .shrmat(S0 = S0, dis = dist)
  dist <- dist/tmp$ff
  crit<-sqrt(qchisq(0.975, ncol(X)))
  vec<-1:nrow(X)
  dis<-sqrt(dist)
  chk<-ifelse(dis>crit,TRUE,FALSE)
  outliers<-vec[chk]
  structure(list(center = mu0, cov = tmp$S, dist = dist, weights = w, outliers = outliers), class = "covRobust")

}



.minNotZero <- function(x){
  zeros = any(x==0)
  if (zeros) return(x[-which(x==0)])
  else min(x)
}

.mufun <- function(method = c("tau", "pb", "bisq", "huber", "mopt", "Qn", "Sn", "mad"), opts){
  method <- match.arg(method)
  switch(method,
                "pb" =  function(x){unname(pbLocScale(x,opts$b,T)[1])},
                "bisq" = function(x){.locScaleMmod(x, psi = "bisquare", eff = opts$eff)$mu},
                "mopt" = function(x){.locScaleMmod(x, psi = "mopt", eff = opts$eff)$mu},
                "huber" = function(x){.locScaleMmod(x, psi = "huber", eff = opts$eff)$mu},
                "Qn" = function(x){median(x)},
                "Sn" = function(x){median(x)},
                "tau" = function(x){unname(cvreg::tauLocScale(x, mu=T)[1])},
                "mad" = function(x){hdmedian(x)},
  )
}

.scfun <- function(method = c("tau", "pb", "bisq", "huber", "mopt", "Qn", "Sn", "mad"), opts){
  method <- match.arg(method)
  switch(method,
                "pb" = function(x){unname(pbLocScale(x,opts$b,F))},
                "bisq" = function(x){.locScaleMmod(x, psi = "bisquare", eff = opts$eff)$disper},
                "mopt" = function(x){.locScaleMmod(x, psi = "mopt", eff = opts$eff)$disper},
                "huber" = function(x){.locScaleMmod(x, psi = "huber", eff = opts$eff)$disper},
                "Qn" = function(x){robustbase::s_Qn(x,F)},
                "Sn" = function(x){robustbase::s_Sn(x,F)},
                "tau" = function(x){unname(cvreg::tauLocScale(x,mu=F))},
                "mad" = function(x){cvreg:::HDMAD(x)*1.4826},
  )
}


.rocke_scale <- function (x, gamma, q, delta = 0.5, tol = 1e-05) {
  n <- length(x)
  y <- sort(abs(x))
  n1 <- floor(n * (1 - delta))
  n2 <- ceiling(n * (1 - delta)/(1 - delta/2))
  qq <- y[c(n1, n2)]
  u <- 1 + gamma * (delta - 1)/2
  sigin <- c(qq[1]/(1 + gamma), qq[2]/u)
  if (qq[1] >= 1) {
    tolera <- tol
  }
  else {
    tolera <- tol * qq[1]
  }
  if (mean(x == 0) > (1 - delta)) {
    sig <- 0
  }
  else {
    .averho <- function(sig, x, gamma, delta, q){
      u <- (x/sig - 1)/gamma
      y <- ((u/(2 * q) * (q + 1 - u^q) + 0.5))
      y[u >= 1] <- 1
      y[u < (-1)] <- 0
      mean(y) - delta
    }
    sig <- saferoot(f = .averho, interval = sigin, x = x, gamma = gamma, delta = delta, q = q, tol = tolera)$root
  }
  return(sig)
}

wt.rocke <- function (tt, gamma, q) {
  ss <- (tt - 1)/gamma
  w <- 1 - ss^q
  w[abs(ss) > 1] <- 0
  return(w)
}

.rockemat <-function (S0, dis) {
  p <- dim(S0)[1]
  sig <- hdmedian(dis)
  cc <- qchisq(0.5, df = p)
  ff <- sig/cc
  return(list(ff = ff, S = S0 * ff))
}


.rockconst <- function (p, n) {
  beta <- c(-6.1357, -1.0078, 0.81564)
  if (p >= 15) {
    a <- c(1, log(p), log(n))
    alpha <- exp(sum(beta * a))
    gamma <- qchisq(1 - alpha, df = p)/p - 1
    gamma <- min(gamma, 1)
  }
  else {
    gamma <- 1
    alpha <- 1e-06
  }
  return(list(gamma = gamma, alpha = alpha))
}



.shrconst <- function (p, n){
  x <- c(1/p, p/n, 1)
  beta <- c(4.3331193, -1.0018907,  0.6352537,  2.4980042, -0.6920007, 0.7344242)
  beta <- matrix(beta, byrow = TRUE, ncol = 3)
  return(t(x) %*% t(beta))
}


.shrmat <-function (S0, dis) {
  p <- dim(S0)[1]
  sig <- hdmedian(dis)
  cc <- qchisq(0.5, df = p)
  ff <- sig/cc
  return(list(ff = ff, S = S0 * ff))
}


.scale_shr <- function (t, delta = 0.5, sig0 = hdmedian(t), niter = 50, tol = 1e-04) {

  .averho <- function (sig, d, delta) {
    d <- d/sig
    G1 <- (-1.944); G2 <- 1.728; G3 <- (-0.312); G4 <- 0.016
    u <- (d > 9); v <- (d < 4); w <- (1 - u) * (1 - v)
    z <- v * d/2 + w * ((G4/8) * (d^4) + (G3/6) * (d^3) + (G2/4) *
                          (d^2) + (G1/2) * d + 1.792) + 3.25 * u
    z <- z/3.25
    return(sig * mean(z)/delta)
  }

  if (mean(t < 1e-16) >= 0.5) {
    sig <- 0
  }
  else {
    sig1 <- sig0
    y1 <- .averho(sig1, t, delta)
    while ((y1 > sig1) & ((sig1/sig0) < 1000)) {
      sig1 <- 2 * sig1
      y1 <- .averho(sig1, t, delta)
    }
    if ((sig1/sig0) >= 1000) {
      sig <- sig0
    }
    else {
      iter <- 0
      sig2 <- y1
      while (iter <= niter) {
        iter <- iter + 1
        y2 <- .averho(sig2, t, delta)
        den <- sig2 - y2 + y1 - sig1
        if (abs(den) < tol * sig1) {
          sig3 <- sig2
          iter <- niter + 1
        }
        else {
          sig3 <- (y1 * sig2 - sig1 * y2)/den
        }
        sig1 <- sig2
        sig2 <- sig3
        y1 <- y2
      }
      sig <- sig2
    }
  }
  return(sig)
}


wt.shr <- function(u){
  G1 <- -1.944
  G2 <- 1.728
  G3 <- -0.312
  G4 <- 0.016
  o <- (u > 9)
  v <- (u < 4)
  w <- (1 - o) * (1 - v)
  z <- v/2 + w * ((G4/2) * (u^3) + (G3/2) * (u^2) + (G2/2) * u + (G1/2))
  w <- z/3.25
  return(w / max(w))
}


.OGKmakecovfun <- function(mufun, scfun, vrob){
  function(X){
    X<-as.matrix(X);n<-nrow(X);p<-ncol(X)
    sigma <- apply(X, 2, function(i) scfun(i))
    Y <- X %*% diag(1/sigma)
    U <- matrix(1, p, p)
    for (i in 1:p) for (j in i:p) {
      U[j, i] <- U[i, j] <- vrob(Y[, i], Y[, j])
    }
    diag(U) <- 1
    E <- eigen(U)$vectors
    A <- diag(sigma) %*% E
    Z <- Y %*% E
    cent <- apply(Z, 2, mufun)
    sigma <- apply(Z, 2, scfun)
    cov <- A %*% tcrossprod(diag(sigma^2), A)
    loc <- A %*% cent
    list(cov = mpd(cov), center = loc, A = A, Z = Z)
  }
}


.OGKcustomvrob <- function(mufun, scfun){
  function(x, y){
    x<-x-mufun(x);y<-y-mufun(y);
    return(((scfun(x+y)^2)-(scfun(x-y)^2))/4)
  }
}


## Modification of RobStatTM's locScaleM function to accomodate variables with mad=0
.locScaleMmod <- function (x, psi = "bisquare", eff = 0.95, maxit = 150, tol = 1e-06){
  suppressPackageStartupMessages(library(RobStatTM))
  kpsi <- switch(psi, bisquare = 1, huber = 2, mopt = 3)
  if (kpsi == 1) {
    if (eff < 0.80 || eff > 0.99){return(cat(crayon::red("eff must be > 0.80 and < 0.99")))}
    kBis = splinefun(c(0.80,0.85,0.9,0.95,0.999),c(3.136909,3.44369,3.882662,4.685065,5.526268))(eff)
    efis = c(0.80,0.85,0.9,0.95,0.999)
    keff = eff
    if (kpsi > 0 & keff > 0) {
      ktun = kBis; mu0 = hdmedian(x); sig0 = cvreg:::huberMean(abs(x - hdmedian(x)))/sqrt(2/pi)
      resi <- (x-mu0)/sig0
      if (sig0 < 1e-10) {
        mu <- mu0
        sigma <- 0
      }
      else{
        dife = 1e+10
        iter = 0
        while (dife > tol & iter < maxit) {
          iter = iter + 1
          resi = (x-mu0)/sig0
          ww = wt.Bisquare(resi/ktun, kpsi)
          mu = sum(ww*x)/sum(ww)
          dife = abs(mu-mu0)/sig0
          mu0 = mu
        }
      }
    }
    rek = resi/ktun
    pp = RobStatTM:::psif(rek, kpsi)*ktun
    n = length(x)
    a = mean(pp^2)
    b = mean(RobStatTM:::psipri(rek, kpsi))
    sigmu = sig0^2 * a/(n * b^2)
    sigmu = sqrt(sigmu)
    scat <- RobStatTM:::mscale(u = x - mu, delta = 0.5, tuning.chi = 1.54764, family = "bisquare")
    resu <- list(mu = mu, std.mu = sigmu, disper = scat)
  }
  else if (kpsi==2){
    if (eff < 0.80 || eff > 0.99){return(cat(crayon::red("eff must be > 0.80 and < 0.99")))}
    kHub = splinefun(c(0.80,0.85,0.9,0.95,0.99),c(0.593,0.732,0.981,1.34,1.809))(eff)
    efis = c(0.80,0.85,0.9,0.95,0.99)
    keff = eff
    ktun = kHub; mu0 = hdmedian(x);
    huberscale <- function (y, k = 1.5, mu, tol = 1e-06, maxit = 1000){
        y <- y[!is.na(y)];n<-length(y);mu0 <- mu;mu1 <- mu;n1<-n;s0 <- cvreg:::huberMean(abs(y-hdmedian(y)))/sqrt(2/pi)
        th <- 2 * pt(k, df = n) - 1;beta <- th + k^2 * (1 - th) - 2 * k * dt(k, df = n)
        iter <- 0
        converged <- F
        while(!converged & iter < maxit) {
          yy <- pmin(pmax(mu0 - k * s0, y), mu0 + k*s0)
          ss <- sum((yy - mu1)^2)/n1;s1 <- sqrt(ss/beta)
          if ((abs(mu0 - mu1) < tol * s0) && abs(s0 - s1) < tol * s0){converged <- TRUE}
          else{converged<-FALSE}
          iter <- iter + 1;mu0 <- mu1;s0 <- s1
        }
        s0
      }
    scale <- huberscale(x, k = ktun, mu = mu0, tol = tol, maxit = maxit)
    resu <- list(mu=MASS::hubers(x, k = ktun, s = scale, initmu = mu0)$mu, disper = scale)
    resi <- (x - resu$mu) / resu$disper
    sigmu <- sqrt(mean(resi^2))/sqrt(length(x))
    resu <- list(mu=resu$mu, sigmu = sigmu, disper = resu$disper)
  }
  else {
    family <- "mopt"
    eff <- match.closest(eff, c(0.80, 0.85,0.9,0.95,0.99))
    efis <- c(0.80,0.85,0.9,0.95,0.99)
    if (eff < 0.80 || eff > 0.99){return(cat(crayon::red("eff must be > 0.80 and < 0.99")))}
    keff <- eff
    if (!is.na(keff)) {
      cc <- RobStatTM::mopt(eff)
      mu0 <- hdmedian(x)
      sig0 <- cvreg:::huberMean(abs(x-hdmedian(x)))/sqrt(2/pi)
      resi <- (x - mu0)/sig0
      if (sig0 < 1e-10) {
        mu <- mu0
        sigma <- 0
      }
      else {
        dife <- 1e+10
        iter <- 0
        while (dife > tol && iter < maxit) {
          iter <- iter + 1
          resi <- (x - mu0)/sig0
          ww <- RobStatTM:::Mwgt(resi, cc, "mopt")
          mu <- sum(ww * x)/sum(ww)
          dife <- abs(mu - mu0)/sig0
          mu0 <- mu
        }
      }
      pp <- RobStatTM::rhoprime(resi, "mopt", cc)
      n <- length(x)
      a <- mean(pp^2)
      b <- mean(RobStatTM:::rhoprime2(resi, "mopt", cc))
      sigmu <- sqrt(sig0^2 * a/(n * b^2))
      f <- function(u, family = "mopt", cc) {
        cc["c"] <- u
        integrate(function(x,fam,cc) RobStatTM:::rho(x, fam, cc) *  dnorm(x), -Inf, Inf, fam = family, cc = cc)$value - 0.5
      }
      cc["c"] <- cvreg::saferoot(f, c(0.01, 10), family = family, cc = cc)$root
      scat <- RobStatTM:::mscale(x - mu, delta = 0.5, tuning.chi = cc, family = "mopt")
      resu <- list(mu = mu, std.mu = sigmu, disper = scat)
    }
    else {
      print(c(eff, " No such eff"))
      resu <- NA
    }
  }
  return(resu)
}


#' Calculate Adaptive Huber Covariance Matrix
#'
#' @param x a data frame or matrix of numeric variables
#'
#' @return a covariance matrix
#' @export
#' @keywords internal
#' @examples
#' cov.ahuber(x)
#'
#' @references
#' Huber, P. J. (1964). Robust estimation of a location parameter. Ann. Math. Statist. 35, 73–101. \cr
cov.ahuber <- function(x){
  cvreg:::huberCov(as.matrix(x))
}


#' Blocked Adaptive Computationally-Efficient Outlier Nominators
#'
#' @description Intended only for use as an initial estimator for more robust covariance estimates.
#'
#' @param x a data frame or matrix of numeric covariates
#' @param collect numeric factor chosen by the user to define the size of the initial subset (p * collect)
#' @param center0 the initial estimate of the multivariate center.
#' defaults to "SpatialMedian". The other option is "Medians" which uses
#' the componentwise medians (column univariate medians).
#' @param maxiter
#'
#' @return a list
#' @export
#' @keywords internal
cov.bacon <- function(x, collect = 4, center0 = c("SpatialMedian", "Medians"), maxiter = 500){

  ## Original code written May 25, 2001 by U. Oetliker for S-Plus 5.1.
  ## more Modifications in porting to R : Martin Maechler, May 2009.
  ## Additional modifications by Brandon Vaughan, 3/13/2020.
  x <- as.matrix(x)
  center0 <- match.arg(center0)
  n <- nrow(x); p <- ncol(x)
  m = min(collect * p, n * 0.5)
  alpha <- min(0.975, 1-((0.24815 - 0.0021885*p)/sqrt(n)))

  chiCrit <- function(n, p, r, alpha){
    h <- (n + p + 1) / 2
    chr <- max(0, (h - r) / (h + r))
    if (n > p){cnp <- 1 + (p + 1) / (n - p) + 2 / (n - 1 - 3*p)}
    else{cnp <- 1 + (n + 1) / (p - n) + 2 / (p - 1 - 3*n)}
    cnpr <- cnp + chr
    cnpr * sqrt(qchisq(alpha / n, p, lower.tail = FALSE))
  }

  ordered.indices <-
    switch(center0,
           "Medians" = {
             n <- nrow(x); p <- ncol(x)
             center <- colMedians(x)
             x.centr <- sweep(x, 2, center)
             sigma <-  crossprod(x.centr)/nrow(x.centr)
             rho <- cov2cor(sigma); sigma <- diag(sigma)
             alpha <- cvreg:::.estimateAlpha(x.centr)
             rho <- ((1-alpha)*rho) + (alpha*diag(p))
             order(mahalanobis_dist(x, center, cor2cov(rho, sigma)))
           },
           "SpatialMedian" = {
             n <- nrow(x); p <- ncol(x)
             center <- L1median(x)$center
             x.centr <- sweep(x, 2, center)
             sigma <-  crossprod(x.centr)/nrow(x.centr)
             rho <- cov2cor(sigma); sigma <- diag(sigma)
             alpha <- cvreg:::.estimateAlpha(x.centr)
             rho <- ((1-alpha)*rho) + (alpha*diag(p))
             order(mahalanobis_dist(x, center, cor2cov(rho, sigma)))
           }
    )
  m <- as.integer(m)
  stopifnot(n >= m, m > 0)

  ordered.x <- x[ordered.indices, , drop = FALSE]
  while (m < n && p > (rnk <- qr(var(ordered.x[1:m, , drop = FALSE]))$rank))
    m <- m + 1L

  subset <- 1:n %in% ordered.indices[1:m]

  presubset <- rep(FALSE, n)
  converged <- FALSE
  steps <- 1L
  while (steps < maxiter &&
         !converged) {
    r <- sum(subset)
    xhat <- x[subset, , drop = FALSE]
    center <- colMeans(xhat)
    cov <- cov(xhat)
    if (!all(eigen(cov,T,T)$values > 0)){
      cov <- covShrink(xhat)
    }
    dis <- sqrt(mahalanobis_dist(x, center, cov))
    converged <- !any(xor(presubset, subset))
    presubset <- subset
    limit <- chiCrit(n, p, r, alpha)
    subset <- dis < limit
    steps <- steps + 1L
  }

  list(center = center, cov = cov, dist = dis^2, outliers = which(!subset),
       steps = steps, limit = limit, converged = converged)
}


#' Estimate a Robust Correlation Matrix
#'
#' @param x a data frame or matrix of numeric covariates
#' @param covfun a covariance function that returns a list with items named "center" and "cov". defaults to \code{\link{cov.shr}}
#' @param args an optional named list of arguments to pass to covfun
#'
#' @return a correlation matrix
#' @export
robcor <- function(x, covfun = cov.shr, args = NULL){
  if (!is.null(args)){
    args[["x"]] <- x
    out <- do.call(covfun, args)
  }
  else{
    out <- covfun(x)
  }
  return(cov2cor(out$cov))
}
abnormally-distributed/cvreg documentation built on May 3, 2020, 3:45 p.m.