R/clustSE.R
In CR2: Compute Cluster Robust Standard Errors with Degrees of Freedom Adjustments

Documented in clustSE

#' Cluster robust standard errors with degrees of freedom adjustments (for lm and glm objects)
#'
#' Function to compute the CR0, CR1, CR2 cluster
#' robust standard errors (SE) with Bell and McCaffrey (2002)
#' degrees of freedom (df) adjustments. Useful when dealing with datasets with a few clusters.
#' Shows output using different CR types and degrees of freedom choices (for comparative purposes only).
#' For linear and logistic regression models (as well as other GLMs). Computes the BRL-S2 variant.
#'
#' @importFrom stats nobs resid residuals var coef pt model.matrix family weights fitted.values
#' @param mod The \code{lm} model object.
#' @param clust The cluster variable (with quotes).
#' @param digits Number of decimal places to display.
#' @param ztest If a normal approximation should be used as the naive degrees of freedom. If FALSE, the between-within degrees of freedom will be used.
#' @return A data frame with the CR adjustments with p-values.
#' \item{estimate}{The regression coefficient.}
#' \item{se.unadj}{The model-based (regular, unadjusted) SE.}
#' \item{CR0}{Cluster robust SE based on Liang & Zeger (1986).}
#' \item{CR1}{Cluster robust SE (using an adjustment based on number of clusters).}
#' \item{CR2}{Cluster robust SE based on Bell and McCaffrey (2002).}
#' \item{tCR2}{t statistic based on CR2.}
#' \item{dfn}{Degrees of freedom(naive): can be infinite (z) or between-within (default). User specified.}
#' \item{dfBM}{Degrees of freedom based on Bell and McCaffrey (2002).}
#' \item{pv.unadj}{p value based on model-based standard errors.}
#' \item{CR0pv}{p value based on CR0 SE with dfBM.}
#' \item{CR0pv.n}{p value  based on CR0 SE with naive df.}
#' \item{CR1pv}{p value based on CR1 SE with dfBM.}
#' \item{CR1pv.n}{p value  based on CR1 SE with naive df.}
#' \item{CR2pv}{p value based on CR2 SE with dfBM.}
#' \item{CR2pv.n}{p value  based on CR2 SE with naive df.}
#'
#' @examples
#' clustSE(lm(mpg ~ am + wt, data = mtcars), 'cyl')
#' data(sch25)
#' clustSE(lm(math ~ ses + minority + mses + mhmwk, data = sch25), 'schid')
#'
#' @references
#' \cite{Bell, R., & McCaffrey, D. (2002). Bias reduction in standard errors for linear regression with multi-stage samples. Survey Methodology, 28, 169-182.
#' (\href{https://www150.statcan.gc.ca/n1/pub/12-001-x/2002002/article/9058-eng.pdf}{link})}
#'
#' Liang, K.Y., & Zeger, S. L. (1986). Longitudinal data analysis using generalized linear models. \emph{Biometrika, 73}(1), 13–22.
#' \doi{10.1093/biomet/73.1.13}
#'
#' @export
clustSE <- function(mod, clust = NULL, digits = 3, ztest = FALSE){

  #if (is.null(data))
  data <- eval(mod$call$data) #OLD
  if (class(mod)[1] != 'glm' & class(mod)[1] != 'lm') stop("Must include a model object of class glm or lm.")
  # is(mod, c('lm', 'glm')) {
  #   stop("Must include a model object of class glm or lm.")
  #   }
  if (is.null(clust)) stop("Must include a cluster name. clust = 'cluster'")

  tmp <- summary(mod)
  se.n <- tmp$coefficients[,2]
  pv.n <- tmp$coefficients[,4]

  #X <- model.matrix.lm(mod, data, na.action = "na.pass")
  X <- Xo <- model.matrix(mod) #to keep NAs if
  n <- nobs(mod) #how many total observations

  if (family(mod)[[1]] != 'gaussian') {
    wts <- weights(mod, "working")
    X <- X * sqrt(wts)
    #re <- resid(mod, 'working') * wts
    #re <- resid(mod, 'response') #y - pp
    re <- resid(mod, 'pearson')
  } else {
    wts <- rep(1, n)
    re <- resid(mod, 'pearson')
  }

  Wm <- diag(wts) #Weight matrix

  if(nrow(X) != nrow(data)) {
    #warning("Just a note: Missing data in original data.")
    data <- data[names(fitted.values(mod)), ]
  }

  #data[,clust] <- as.character(data[,clust])

  NG <- length(table(data[,clust])) #how many clusters
  #cpx <- solve(crossprod(X)) #(X'X)-1 or inverse of the cp of X
  cpx <- chol2inv(qr.R(qr(X))) #using QR decomposition, faster, more stable?
  #cpx <- chol2inv(chol(t(Xo) %*% Wm %*% Xo))

  cnames <- names(table(data[,clust])) #names of the clusters
  js <- table(data[,clust]) #how many in each cluster
  k <- mod$rank #predictors + intercept

  Xj <- function(x){ #inverse of the symmetric square root (p. 709 IK)
    index <- which(data[,clust] == x)
    Xs <- X[index, , drop = F] #X per cluster [original, unweighted]
    #wm <- Wm[index, index]
    Hm <- Xs %*% cpx %*% t(Xs) # %*% Wm[index, index] # the Hat matrix, not symmetric
    #Hm <- sqrt(wm) %*% Xs %*% cpx %*% t(Xs) %*% sqrt(wm) # the Hat matrix
    ## This is the orig formulation
    IHjj <- diag(nrow(Xs)) - Hm
    return(MatSqrtInverse(IHjj)) #I - H

    # V3 <- chol(Wm[index, index]) #based on MBB
    # Bi <- V3 %*% IHjj %*% Wm[index, index] %*% t(V3)
    # t(V3) %*% MatSqrtInverse(Bi) %*% V3

  }

  ml <- lapply(cnames, Xj) #need these matrices for CR2 computation

  #re <- resid(mod, 'pearson') #works for both lm and glm

  cdata <- data.frame(data[,clust], re ) #data with cluster and residuals
  names(cdata) <- c('cluster', 'r')
  gs <- names(table(cdata$cluster))

  u1 <- u2 <- matrix(NA, nrow = NG, ncol = k)
  dfa <- (NG / (NG - 1))  * ((n - 1)/(n - k)) #for HC1 / Stata
  #dfa <- NG / (NG - 1) #used by SAS

  ## function for Liang and Zeger SEs
  uu1 <- function(x){
    t(cdata$r[cdata$cluster == x]) %*%
      X[cdata$cluster == x, ] #e'X #plain vanilla
  }

  u1 <- t(sapply(cnames, uu1)) #use as a list?
  #have to transpose to get into proper shape
  #because of sapply
  mt <- crossprod(u1)

  # br <- solve(crossprod(X)) #cpx
  br <- cpx #just copying, got this earlier
  clvc <- br %*% mt %*% br #LZ vcov matrix

  uu2 <- function(x){
    ind <- which(cnames == x)
    t(cdata$r[cdata$cluster == x]) %*% ml[[ind]] %*%
      X[cdata$cluster == x, ]
  }

  u2 <- t(sapply(cnames, uu2)) #have to transpose because of sapply
  mt2 <- crossprod(u2)
  clvc2 <- br %*% mt2 %*% br  #BR LZ2 vcov matrix

  #### To compute empirically-based DF

  ## STEP 1
  tXs <- function(s) {
    index <- which(cdata$cluster == s)
    Xs <- X[index, , drop = F]
    IHjj <- diag(nrow(Xs)) - Xs %*% cpx %*% t(Xs)
    MatSqrtInverse(IHjj) %*%
      Xs
  } # A x Xs / Need this first

  tX <- lapply(cnames, tXs)

  ## STEP 2
  tHs <- function(s) {
    index <- which(cdata$cluster == s)
    Xs <- X[index, , drop = F]

    ss <- matrix(0, nrow = n, ncol = length(index)) #all 0, n x G
    ss[cbind(index, 1:length(index))] <- 1 #indicator
    ss - X %*% cpx %*% t(Xs) #overall X x crossprod x Xs'
  }

  tH <- lapply(cnames, tHs) #per cluster

  ## STEP 3

  id <- diag(k) #number of coefficients // for different df
  degf <- numeric(k) #vector for df // container

  for (j in 1:k){ #using a loop since it's easier to see

    Gt <- sapply(seq(NG), function(i) tH[[i]] %*%
                   tX[[i]] %*% cpx %*% id[,j])
    #already transposed because of sapply: this is G'
    #ev <- eigen(Gt %*% t(Gt))$values #eigen values: n x n
    ev <- eigen(t(Gt) %*% Gt)$values #much quicker this way, same result: p x p:
    degf[j] <- (sum(ev)^2) / sum(ev^2) #final step to compute df
  }

  ### Computing the dofHLM

  if (ztest == FALSE){
    ### figuring out Number of L2 and L1 vars for dof

    chk <- function(x){
      vrcheck <- sum(tapply(x, data[,clust], var), na.rm = T) #L1,
      # na needed if only one observation with var = NA
      y <- 1 #assume lev1 by default
      if (vrcheck == 0) (y <- 2) #if variation, then L2
      return(y)
    }

    if (family(mod)[[1]] != 'gaussian') X <- model.matrix(mod) ## use original matrix to check for df
    ## don't need the X matrix after this

    levs <- apply(X, 2, chk) #all except intercept
    levs[1] <- 1 #intercept

    tt <- table(levs)
    l1v <- tt['1']
    l2v <- tt['2']

    l1v[is.na(l1v)] <- 0
    l2v[is.na(l2v)] <- 0

    ####

    #ns <- nobs(mod)
    df1 <- n - l1v - NG #l2v old HLM
    df2 <- NG - l2v - 1

    dfn <- rep(df1, length(levs)) #naive
    dfn[levs == '2'] <- df2
    dfn[1] <- df2 #intercept

      } else {

    dfn <- rep(Inf, k) #infinite
  }

  ### Putting it all together

  CR1 <- sqrt(diag(clvc * dfa))
  CR2 <- sqrt(diag(clvc2))
  CR0 = sqrt(diag(clvc))
  beta <- coef(mod)
  tCR0 <- beta / CR0
  tCR1 <- beta / CR1
  tCR2 <- beta / CR2
  CR0pv.n = round(2 * pt(-abs(tCR0), df = dfn), digits) #naive: either inf or HLM df
  CR0pv = round(2 * pt(-abs(tCR0), df = degf), digits) #using adjusted df
  CR1pv.n = round(2 * pt(-abs(tCR1), df = dfn), digits) #naive: either inf or HLM df
  CR1pv = round(2 * pt(-abs(tCR1), df = degf), digits) #using adjusted df
  CR2pv.n = round(2 * pt(-abs(tCR2), df = dfn), digits) #naive: either inf or HLM df
  CR2pv = round(2 * pt(-abs(tCR2), df = degf), digits) #using adjusted df

  ####
  res <- data.frame(estimate = round(beta, digits),
                    se.unadj = round(se.n, digits),
                    CR0 = round(CR0, digits), #LZeger
                    CR1 = round(CR1, digits), #stata
                    CR2 = round(CR2, digits),
                    tCR2 = round(tCR2, digits),
                    dfn = dfn,
                    dfBM = round(degf, 2),
                    pv.unadj = round(pv.n, digits),
                    CR0pv = round(CR0pv, digits),
                    CR0pv.n = round(CR0pv.n, digits),
                    CR1pv,
                    CR1pv.n,
                    CR2pv.n,
                    CR2pv) #BM
  return(res)
}