R/ldarob.R

Defines functions msd adclust summary.ldarob print.qdarob summary.qdarob predict.ldarob predict.qdarob ldarob

Documented in ldarob predict.ldarob predict.qdarob print.qdarob summary.ldarob summary.qdarob

#' Robust Linear Discriminant Analysis
#'
#' @description This model utilizes robust estimation in order to improve the performance
#' of the standard linear discriminant. This is a heavy modification of MASS's lda function. Due to the substantial modifications
#' the returned object is of its own "ldarob" class and has an accompanying predict method.
#'
#' @param formula a model formula
#' @param data a data frame
#' @param prior a vector of prior probabilities for each class
#' @param method the scale estimation method. one of "rocke" (\code{\link{cov.rocke}}) or "shr" (\code{\link{cov.shr}})
#' @param tol tolerance for small-variance variables. If the variance is smaller than tol, the variable is declared constant. Defaults to 1e-04.
#'
#' @return
#' An ldarob object
#' @export
#'
#' @examples
#' ldarob(Species ~.,iris)
#'
ldarob <- function(formula, data, prior = proportions,method = c("rocke","shr"), tol = 1e-10){

  method <- match.arg(method)
  m <- model.frame(formula, data)
  Terms <- attr(m, "terms")
  grouping <- model.response(m)
  x <- model.matrix(Terms, m)
  xint <- match("(Intercept)", colnames(x), nomatch = 0L)
  if (xint > 0L)
    x <- x[, -xint, drop = FALSE]
  if (is.null(dim(x)))
    stop("'x' is not a matrix")
  x <- as.matrix(x)
  if (any(!is.finite(x)))
    stop("infinite, NA or NaN values in 'x'")
  n <- nrow(x)
  p <- ncol(x)
  if (n != length(grouping))
    stop("nrow(x) and length(grouping) are different")
  g <- as.factor(grouping)
  lev <- lev1 <- levels(g)
  counts <- as.vector(table(g))
  if (!missing(prior)) {
    if (any(prior < 0) || round(sum(prior), 5) != 1)
      stop("invalid 'prior'")
    if (length(prior) != nlevels(g))
      stop("'prior' is of incorrect length")
    prior <- prior[counts > 0L]
  }
  if (any(counts == 0L)) {
    empty <- lev[counts == 0L]
    warning(sprintf(ngettext(length(empty), "group %s is empty",
                             "groups %s are empty"), paste(empty, collapse = " ")),
            domain = NA)
    lev1 <- lev[counts > 0L]
    g <- factor(g, levels = lev1)
    counts <- as.vector(table(g))
  }
  n <- nrow(x)
  p <- ncol(x)
  proportions <- counts/n
  ng <- length(proportions)
  names(prior) <- names(counts) <- lev1
  mcd <- covRobFun(x, g, method = method)
  group.means <- mcd$center
  f1 <- mcd$f1
  cov <- mcd$swcov
  scaling <- mcd$scaling
  if (any(f1 < tol)) {
    const <- format((1L:p)[f1 < tol])
    stop(sprintf(ngettext(length(const), "variable %s appears to be constant within groups",
                          "variables %s appear to be constant within groups"),
                 paste(const, collapse = " ")), domain = NA)
  }
  sX <- svd(cov, nu = 0L)
  rank <- sum(sX$d > tol^2)
  if (rank == 0L)
    stop("rank = 0: variables are numerically constant")
  xbar <- colSums(prior %*% group.means)
  fac <- 1/(ng - 1)
  scaling <- scaling %*% sX$v[, 1L:rank] %*% diag(sqrt(1/sX$d[1L:rank]), ncol = rank)
  X <- sqrt((n * prior) * fac) * scale(group.means, center = xbar, scale = FALSE) %*% scaling
  X.s <- svd(X, nu = 0L)
  rank <- sum(X.s$d > tol * X.s$d[1L])
  if (rank == 0L)
    stop("group means are numerically identical")
  scaling <- scaling %*% X.s$v[, 1L:rank]

  if (is.null(dimnames(x)))
    dimnames(scaling) <- list(NULL, paste("LD", 1L:rank,sep = ""))
  else {
    dimnames(scaling) <- list(colnames(x), paste("LD", 1L:rank,sep = ""))
    dimnames(group.means)[[2L]] <- colnames(x)
  }
  cl <- match.call()
  cl[[1L]] <- as.name("ldarob")
  res <- structure(list(prior = prior, counts = counts, means = group.means, true.classes = m[,1],
                 scaling = scaling, lev = lev, svd = X.s$d[1L:rank], N = n,
                 call = cl, predictors = m, functionalForm = "linear"), class = "ldarob")
  res$terms <- Terms
  res$call <- cl
  res$predictors <- m[,-1]
  res$contrasts <- attr(x, "contrasts")
  res$xlevels <- .getXlevels(Terms, m)
  res$na.action <- attr(m, "na.action")
  res
}


#' Robust Quadratic Discriminant Analysis
#'
#' @description This model utilizes robust estimation in order to improve the performance
#' of the standard quadratic discriminant. This is a heavy modification of MASS's qda function. Due to the substantial modifications
#' the returned object is of its own "qdarob" class and has an accompanying predict method.
#'
#' @param formula a model formula
#' @param data a data frame
#' @param prior a vector of prior probabilities for each class
#' @param method the scale estimation method. one of "rocke" (\code{\link{cov.rocke}}) or "shr" (\code{\link{cov.shr}})#' @return
#' A qdarob object
#' @export
#'
#' @examples
#' qdarob(Species ~.,iris)
#'
qdarob = function (formula, data, grouping, prior = proportions,  method = c("rocke","shr")) {

  method <- match.arg(method)
  m <- model.frame(formula, data)
  Terms <- attr(m, "terms")
  grouping <- model.response(m)
  x <- model.matrix(Terms, m)
  xvars <- as.character(attr(Terms, "variables"))[-1L]
  if ((yvar <- attr(Terms, "response")) > 0)
    xvars <- xvars[-yvar]
  xint <- match("(Intercept)", colnames(x), nomatch = 0L)
  if (xint > 0)
    x <- x[, -xint, drop = FALSE]

  if (is.null(dim(x)))
    stop("'x' is not a matrix")
  x <- as.matrix(x)
  if (any(!is.finite(x)))
    stop("infinite, NA or NaN values in 'x'")
  n <- nrow(x)
  p <- ncol(x)
  if (n != length(grouping))
    stop("nrow(x) and length(grouping) are different")
  g <- as.factor(grouping)
  lev <- levels(g)
  counts <- as.vector(table(g))
  names(counts) <- lev
  proportions <- counts/length(g)
  ng <- length(proportions)
  if (any(prior < 0) || round(sum(prior), 5) != 1)
    stop("invalid 'prior'")
  if (length(prior) != ng)
    stop("'prior' is of incorrect length")
  names(prior) <- lev
  mcd <- covRobFun(x, g, method = method, return.all = TRUE)
  group.means <- mcd$center
  scaling <- array(dim = c(p, p, ng))
  ldet <- numeric(ng)
  for (i in 1L:ng) {
    cX <- mcd$within.lvl.cov[[i]]
    sX <- svd(cX, nu = 0)
    scaling[, , i] <- sX$v %*% diag(sqrt(1/sX$d), ncol=p)
    ldet[i] <- sum(log(sX$d))
  }
  if (is.null(dimnames(x)))
    dimnames(scaling) <- list(NULL, as.character(1L:p), lev)
  else {
    dimnames(scaling) <- list(colnames(x), as.character(1L:p), lev)
    dimnames(group.means)[[2L]] <- colnames(x)
  }
  cl <- match.call()
  cl[[1L]] <- as.name("qdarob")
  res <- structure(list(prior = prior, counts = counts, means = group.means, true.classes = m[,1],
              scaling = scaling, ldet = ldet, lev = lev, N = n, call = cl), class = "qdarob")
  res$terms <- Terms
  res$call <- cl
  res$predictors <- m[,-1]
  res$functionalForm <- "quadratic"
  res$contrasts <- attr(x, "contrasts")
  res$xlevels <- .getXlevels(Terms, m)
  res$na.action <- attr(m, "na.action")
  res
}

#' Prediction method for qdarob objects
#'
#' @param object the model fit
#' @param newdata optional newdata. if left as NULL the fitted values are returned.
#' @param type recognizes "probs" or "posterior" to return probabilities, "class" for class labels
#' @return a vector of class labels or a matrix of class probabilities
#' @export
#' @method predict qdarob
#'
predict.qdarob <- function(object, newdata = NULL, type = c("probs", "posterior", "class")){

  type <- match.arg(type)

  if (is.null(newdata)){
    newdata <- object$predictors
  }
  attr(object, "class") <- "qda"
  preds <- MASS:::predict.qda(object, newdata, method = "predictive")

  if (type == "class"){
    return(preds$class)
  }
  else if (type == "probs" || type == "posterior"){
    return(preds$posterior)
  }
}

#' Prediction method for ldarob objects
#'
#' @param object the model fit
#' @param newdata optional newdata. if left as NULL the fitted values are returned.
#' @param type recognizes "probs" or "posterior" to return probabilities, "class" for class labels, "LD" or "discriminant" for the linear discriminants
#' @return a vector of class labels, a matrix of probabilities, or a matrix of linear discriminants
#' @export
#' @method predict ldarob
#'
predict.ldarob <- function(object, newdata = NULL, type = c("probs", "posterior", "class", "LD", "discriminant")){

  type <- match.arg(type)

  if (is.null(newdata)){
    newdata <- object$predictors
  }
  attr(object, "class") <- "lda"
  preds <- MASS:::predict.lda(object, newdata, method = "debiased")

  if (type == "class"){
    return(preds$class)
  }
  else if (type == "probs" || type == "posterior"){
    return(preds$posterior)
  }
  else if (type == "LD" || type == "discriminant"){
    return(preds$x)
  }

}



#' Summary method for qdarob objects
#'
#' @param x the model fit
#' @export
#' @method summary qdarob
#'
summary.qdarob <- function(x){
  cat(crayon::bold(crayon::red("\nPrior probabilities of groups:\n")))
  print(round(x$prior,4))
  cat(crayon::bold(crayon::blue("\nSample probabilities of groups:\n")))
  likelihood <- round(x$counts, 4)
  likelihood <- round(likelihood / sum(likelihood), 4)
  names(likelihood) <- names(x$prior)
  print(likelihood)
  cat(crayon::bold(crayon::green("\nGroup means:\n")))
  print(round(x$means, 4))
}


#' Print method for qdarob objects
#'
#' @param x the model fit
#' @export
#' @method print qdarob
#'
print.qdarob <- function(x){
  if (!is.null(cl <- x$call)) {
    names(cl)[2L] <- ""
    cat("Call:\n")
    dput(cl, control = NULL)
  }
  cat(crayon::red("\nPrior probabilities of groups:\n"))
  print(round(x$prior,4))
  cat(crayon::green("\nGroup means:\n"))
  means <- x$means
  colnames(means) <- abbreviate(colnames(means), named = F, minlength = 3, method = "both.sides")
  print(round(means, 3))
}

#' Print method for ldarob objects
#'
#' @param x the model fit
#' @export
#' @method print ldarob
#'
print.ldarob <- function (x) {
  if (!is.null(cl <- x$call)) {
    names(cl)[2L] <- ""
    cat("Call:\n")
    dput(cl, control = NULL)
  }
  cat(crayon::red("\nPrior probabilities of groups:\n"))
  print(round(x$prior, 4))
  cat(crayon::green("\nGroup means:\n"))
  means <- x$means
  colnames(means) <- abbreviate(colnames(means), named = F, minlength = 3, method = "both.sides")
  print(round(means, 3))
  cat(crayon::magenta("\nCoefficients of linear discriminants:\n"))
  print(round(x$scaling, 4))
  svd <- x$svd
  names(svd) <- dimnames(x$scaling)[[2L]]
  if (length(svd) > 1L) {
    cat(crayon::cyan("\nProportion of trace:\n"))
    print(round(svd^2/sum(svd^2), 4L))
  }
  invisible(x)
}


#' Summary method for ldarob objects
#'
#' @param x the model fit
#' @export
#' @method summary ldarob
#'
summary.ldarob <- function(x) {
  cat(crayon::bold(crayon::red("\nPrior probabilities of groups:\n")))
  print(round(x$prior, 4))
  cat(crayon::bold(crayon::blue("\nSample probabilities of groups:\n")))
  likelihood <- round(x$counts, 4)
  names(likelihood) <- names(x$prior)
  print(likelihood)
  cat(crayon::bold(crayon::green("\nGroup means:\n")))
  means <- x$means
  colnames(means) <- abbreviate(colnames(means), named = F, minlength = 3, method = "both.sides")
  print(round(means, 4))
  cat(crayon::bold(crayon::magenta("\nCoefficients of linear discriminants:\n")))
  print(round(x$scaling, 4))
}


#' Robust Regularized Discriminant Analysis
#'
#' @description This model utilizes a combination of shrinkage and robust estimation in order to improve the performance
#' of the standard quadratic discriminant. Robustness is obtained by utilizing the minimum covariance determinant covariance
#' estimate. Regularization is performed in two ways: first each within-level covariance matrix
#' is shrunk towards a pooled covariance matrix. The amount of shrinkage for this is controlled by the hyperparameter \eqn{\alpha}.
#' Second, each covariance matrix is shrunk towards an identity matrix scaled by the geometric mean of the diagonal of
#' a respective matrix. This is controlled by the hyperparameter \eqn{\gamma}. A guide to help interpret how the two
#' hyperparameters interact is given below. \cr \cr
#'
#' \eqn{\gamma} = 0 ; \eqn{\alpha} = 0 : Equivalent to robust qda. \cr
#' \eqn{\gamma} = 0 ; \eqn{\alpha} = 1 : Equivalent to robust lda. \cr
#' \eqn{\gamma} = 1 ; \eqn{\alpha} = 0 : Levels are conditionally independent and within-level covariances are the scaled diagonal. \cr
#' \eqn{\gamma} = 1 ; \eqn{\alpha} = 1 : Classification based on robust distance to the nearest mean. Similar to k-means. \cr
#'
#' \cr
#' Note that the returned object is of class "qdarob" and hence is compatible with its methods.
#'
#' @param formula a model formula
#' @param data a data frame
#' @param prior a vector of prior probabilities for each class
#' @param alpha parameter controlling shrinkage towards pooled covariance.
#' @param gamma within-level covariance shrinkage parameter.
#' @param kappa the minimum proportion of observations retained in the minimum covariance determinant
#' estimators of the covariances. defaults to 0.75.
#' @return
#' A qdarob object
#' @export
#'
#' @examples
#' rdarob(Species ~.,iris)
#'
rdarob = function (formula, data, grouping, prior = proportions, gamma = 0.5, alpha = 0.5, kappa = 0.75) {

  method <- match.arg(method)
  m <- model.frame(formula, data)
  Terms <- attr(m, "terms")
  grouping <- model.response(m)
  x <- model.matrix(Terms, m)
  xvars <- as.character(attr(Terms, "variables"))[-1L]
  if ((yvar <- attr(Terms, "response")) > 0)
    xvars <- xvars[-yvar]
  xint <- match("(Intercept)", colnames(x), nomatch = 0L)
  if (xint > 0)
    x <- x[, -xint, drop = FALSE]

  if (is.null(dim(x)))
    stop("'x' is not a matrix")
  x <- as.matrix(x)
  if (any(!is.finite(x)))
    stop("infinite, NA or NaN values in 'x'")
  n <- nrow(x)
  p <- ncol(x)
  if (n != length(grouping))
    stop("nrow(x) and length(grouping) are different")
  g <- as.factor(grouping)
  lev <- levels(g)
  counts <- as.vector(table(g))
  names(counts) <- lev
  proportions <- counts/length(g)
  ng <- length(proportions)
  if (any(prior < 0) || round(sum(prior), 5) != 1)
    stop("invalid 'prior'")
  if (length(prior) != ng)
    stop("'prior' is of incorrect length")
  names(prior) <- lev
  mcd <- covRobFun2(x, g, kappa = kappa, gamma = gamma, alpha = alpha)
  group.means <- mcd$center
  scaling <- array(dim = c(p, p, ng))
  ldet <- numeric(ng)
  for (i in 1L:ng) {
    cX <- mcd$covs[[i]]
    sX <- svd(cX, nu = 0)
    scaling[, , i] <- sX$v %*% diag(sqrt(1/sX$d), ncol=p)
    ldet[i] <- sum(log(sX$d))
  }
  if (is.null(dimnames(x)))
    dimnames(scaling) <- list(NULL, as.character(1L:p), lev)
  else {
    dimnames(scaling) <- list(colnames(x), as.character(1L:p), lev)
    dimnames(group.means)[[2L]] <- colnames(x)
  }
  cl <- match.call()
  cl[[1L]] <- as.name("rdarob")
  res <- structure(list(prior = prior, counts = counts, means = group.means, true.classes = m[,1],
                        scaling = scaling, ldet = ldet, lev = lev, N = n, call = cl), class = "qdarob")
  res$terms <- Terms
  res$call <- cl
  res$predictors <- m[,-1]
  res$functionalForm <- "quadratic"
  res$contrasts <- attr(x, "contrasts")
  res$xlevels <- .getXlevels(Terms, m)
  res$na.action <- attr(m, "na.action")
  res
}



adclust <- function(X, nclust=3, max.iter=100, tol=1e-9, scale = TRUE){
  X <- as.matrix(X); columnNames <- colnames(X)
  n = nrow(X); p = ncol(X)
  if (scale) X <- scale(X)
  Sigma = mpd(cov(X))
  Gamma_old = cvreg:::.adjLoadings(eigen(Sigma,T)$vectors[,1:nclust])
  change <- Inf; iter <- 0
  while (change > tol && iter < max.iter){
    scores = X%*%Gamma_old
    xcmeans  = e1071::cmeans(scores, nclust)
    H = model.matrix( ~ 0 + as.factor(xcmeans$cluster))
    M = crossprod(X,H)%*%pseudoinverse(crossprod(H))
    Sigma = tcrossprod(M,H)%*%tcrossprod(H,M)
    Gamma = cvreg:::.adjLoadings(eigen(Sigma)$vectors[,1:nclust])
    change = base::norm(Gamma_old-Gamma, "2")
    Gamma_old  = Gamma
    iter = iter + 1
  }
  loadings <- cvreg:::.adjLoadings(Gamma)
  scores <- X%*%loadings
  values <- apply(scores,2,var)
  ord <- order(values, decreasing = T)
  values <- values[ord]; loadings <- loadings[,ord]; scores <- scores[,ord]
  rownames(loadings) <- columnNames;
  colnames(loadings) <- colnames(scores) <- paste0("C", 1:ncol(scores))
  fit <- list(loadings = loadings, values = apply(scores,2,var), scores = scores)
  return(fit)
}


msd <- function(formula, data, ncomp = NULL, lambda = 1, scale = TRUE){
  if (scale) data <- Scale(data)
  X <- model.matrix(formula, data);
  columnNames <- colnames(X)[-1]; X <- X[,-1]
  n = nrow(X); p = ncol(X)
  response <- model.response(model.frame(formula, data))
  response  <- as.numeric(as.factor(response))
  id = unique(response)
  if (is.null(ncomp)) ncomp <- id - 1
  ncomp = max(ncomp, 2)
  SigmaWithin = array(0,c(p,p))
  for (i in 1:ncomp){
    idx = which(response==id[i])
    SigmaWithin = SigmaWithin + crossprod(X[idx,])
  }
  SigmaBtwn = array(0,c(p,p))
  mu  = colMeans(X)
  for (i in 1:ncomp){
    idx = which(response==id[i])
    Nk     = length(idx)
    meandiff  = (colMeans(X[idx,])-mu)
    SigmaBtwn = SigmaBtwn + Nk*outer(meandiff,meandiff)
  }
  SigmaDiff <- SigmaBtwn-(lambda*SigmaWithin)
  loadings <- cvreg:::.adjLoadings(eigen(SigmaDiff,T)$vectors[,1:ncomp])
  scores <- X%*%loadings
  rownames(loadings) <- columnNames;
  colnames(loadings) <- colnames(scores) <- paste0("C", 1:ncol(scores))
  fit <- list(loadings = loadings, values = apply(scores,2,var), scores = scores)
  return(fit)
}



covRobFun <- function (x, grouping,  method = c("rocke", "shr"), return.all = F) {
  method <- match.arg(method)
  if (method=="rocke"){
    covfun <- cov.rocke
  }
  else{
    covfun <- cov.shr
  }
  covMWcd.B <- function() {
    group.means <- matrix(0, ng, p)
    mcd.lev <- list()
    for (i in 1:ng) {
      mcd <- covfun(x[which(grouping == lev[i]), ])
      if (return.all){mcd.lev[[lev[i]]] <- mcd$cov; rownames(mcd.lev[[lev[i]]]) <- colnames(mcd.lev[[lev[i]]]) <- colnames(x)}
      group.means[i, ] <- mcd$center
    }
    mcd <- covfun(x - group.means[g, ])
    wt <- rep(1, nrow(x)); wt[mcd$outliers] <- 0
    wcov = try(cov.nlshrink((x - group.means[g, ])[wt==1, ]), silent = TRUE)
    if (class(wcov)=="try.error"){
      wcov <- mcd$cov
    }
    wcov.trc <- sum(diag(wcov)); mcd.trc <- sum(diag(mcd$cov)); wcov <- wcov * (mcd.trc/wcov.trc)
    f1 <- sqrt(diag(wcov)); scaling <- diag(1/f1, p); cq <- sum(wt)/(sum(wt) - ng)
    swcov <- try(cq * cvreg:::cov.nlshrink( ((x - group.means[g, ])[wt==1, ])  %*% scaling, k = ng), silent = TRUE)
    if (class(swcov)=="try.error"){
      swcov <- cq * covShrink( ((x - group.means[g, ])[wt==1, ])  %*% scaling, target = "unequal")
    }
    ans <- list(center = group.means, cov = wcov, swcov = swcov, dist = mcd$dist, outliers = mcd$outliers, f1 = f1, scaling = scaling)
    if (return.all) {ans[["within.lvl.cov"]] <- mcd.lev}
    return(ans)
  }
  x <- as.matrix(x)
  p <- ncol(x)
  n <- nrow(x)
  g <- as.factor(grouping)
  lev <- levels(g)
  ng <- length(lev)
  out <- covMWcd.B()
  colnames(out$center) <- colnames(x)
  rownames(out$center) <- lev
  colnames(out$cov) <- rownames(out$cov) <- colnames(x)
  return(structure(out, class = "covRobust"))
}


covRobFun2 <- function (x, grouping, kappa = 0.75, alpha, gamma) {
  covMWcd.B <- function() {
    group.means <- matrix(0, ng, p)
    mcd.lev <- list()
    for (i in 1:ng) {
      mcd <- cov.mrcd(x[which(grouping == lev[i]), ], kappa = kappa)
      mcd.lev[[lev[i]]] <- mcd$cov; rownames(mcd.lev[[lev[i]]]) <- colnames(mcd.lev[[lev[i]]]) <- colnames(x)
      group.means[i, ] <- mcd$center
    }
    mcd <- cov.mrcd(x - group.means[g, ], kappa = kappa)
    wcov <- mcd$cov
    for (i in 1:ng) {
      mcd.lev[[lev[i]]] <- ((1-alpha)* mcd.lev[[i]]) + (alpha*wcov)
      mcd.lev[[lev[i]]] <- ((1-gamma)* mcd.lev[[i]])  + (gamma*diag(rep(mean(diag(mcd.lev[[i]])),length(diag(mcd.lev[[i]])))))
    }
    ans <- list(center = group.means, covs = mcd.lev, dist = mcd$dist, outliers = mcd$outliers)
    return(ans)
  }
  x <- as.matrix(x)
  p <- ncol(x)
  n <- nrow(x)
  g <- as.factor(grouping)
  lev <- levels(g)
  ng <- length(lev)
  out <- covMWcd.B()
  colnames(out$center) <- colnames(x)
  rownames(out$center) <- lev
  return(structure(out, class = "covRobust"))
}
abnormally-distributed/cvreg documentation built on May 3, 2020, 3:45 p.m.