tests/test_id.R

cat("# testing use of 'id' to get appropriate standard errors \n")
library("qgcomp")
#install.packages('sandwich', repos="https://cran.rstudio.com/", dependencies = c("Depends"))
2#library("sandwich")
set.seed(2123)
N = 250
t = 4
dat <- data.frame(row.names = 1:(N*t))
dat <- within(dat, {
  id = do.call("c", lapply(1:N, function(x) rep(x, t)))
  u =  do.call("c", lapply(1:N, function(x) rep(runif(1), t)))
  x1 = rnorm(N, u)
  x2 = rnorm(N, u)
  y = rnorm(N) + u + x1 - x2*x1
})


# from sandwich package 2.5.1 (to avoid having to install zoo dependency)
bread.glm <- function (x, ...) {
  if (!is.null(x$na.action)) 
    class(x$na.action) <- "omit"
  sx <- summary(x)
  wres <- as.vector(residuals(x, "working")) * weights(x, "working")
  dispersion <- if (substr(x$family$family, 1, 17) %in% c("poisson", 
                                                          "binomial", "Negative Binomial")) 
    1
  else sum(wres^2)/sum(weights(x, "working"))
  return(sx$cov.unscaled * as.vector(sum(sx$df[1:2])) * dispersion)
}

bread <- function (x, ...) 
{
  UseMethod("bread")
}


sandwich <- function (x, bread. = bread, meat. = meat, ...) {
  if (is.list(x) && !is.null(x$na.action)) 
    class(x$na.action) <- "omit"
  if (is.function(bread.)) 
    bread. <- bread.(x)
  if (is.function(meat.)) 
    meat. <- meat.(x, ...)
  n <- NROW(estfun(x))
  return(1/n * (bread. %*% meat. %*% bread.))
}

meatCL <- function (x, cluster = NULL, type = NULL, cadjust = TRUE, multi0 = FALSE, 
          ...) 
{
  if (is.list(x) && !is.null(x$na.action)) 
    class(x$na.action) <- "omit"
  ef <- estfun(x, ...)
  k <- NCOL(ef)
  n <- NROW(ef)
  rval <- matrix(0, nrow = k, ncol = k, dimnames = list(colnames(ef), 
                                                        colnames(ef)))
  if (is.null(cluster)) 
    cluster <- attr(x, "cluster")
  if (is.null(cluster)) 
    cluster <- seq_len(n)
  if (inherits(cluster, "formula")) {
    cluster_tmp <- expand.model.frame(x, cluster, na.expand = FALSE)
    cluster <- model.frame(cluster, cluster_tmp, na.action = na.pass)
  }
  else {
    cluster <- as.data.frame(cluster)
  }
  if ((n != NROW(cluster)) && !is.null(x$na.action) && (class(x$na.action) %in% 
                                                        c("exclude", "omit"))) {
    cluster <- cluster[-x$na.action, , drop = FALSE]
  }
  if (NROW(cluster) != n) 
    stop("number of observations in 'cluster' and 'estfun()' do not match")
  p <- NCOL(cluster)
  if (p > 1L) {
    cl <- lapply(seq_len(p), function(i) combn(seq_len(p), i, simplify = FALSE))
    cl <- unlist(cl, recursive = FALSE)
    sign <- sapply(cl, function(i) (-1L)^(length(i) + 1L))
    paste_ <- function(...) paste(..., sep = "_")
    for (i in (p + 1L):length(cl)) {
      cluster <- cbind(cluster, Reduce(paste_, unclass(cluster[, 
                                                               cl[[i]]])))
    }
    if (multi0) 
      cluster[[length(cl)]] <- seq_len(n)
  }
  else {
    cl <- list(1)
    sign <- 1
  }
  g <- sapply(1L:length(cl), function(i) {
    if (is.factor(cluster[[i]])) {
      length(levels(cluster[[i]]))
    }
    else {
      length(unique(cluster[[i]]))
    }
  })
  if (is.null(type)) {
    type <- if (class(x)[1L] == "lm") 
      "HC1"
    else "HC0"
  }
  type <- match.arg(type, c("HC", "HC0", "HC1", "HC2", "HC3"))
  if (type == "HC") 
    type <- "HC0"
  if (type %in% c("HC2", "HC3")) {
    if (any(g == n)) 
      h <- hatvalues(x)
    if (!all(g == n)) {
      if (!(class(x)[1L] %in% c("lm", "glm"))) 
        warning("clustered HC2/HC3 are only applicable to (generalized) linear regression models")
      X <- model.matrix(x)
      if (any(alias <- is.na(coef(x)))) 
        X <- X[, !alias, drop = FALSE]
      attr(X, "assign") <- NULL
      w <- weights(x, "working")
      XX1 <- if (is.null(w)) 
        chol2inv(qr.R(qr(X)))
      else chol2inv(qr.R(qr(X * sqrt(w))))
      res <- rowMeans(ef/X, na.rm = TRUE)
      res[apply(abs(ef) < .Machine$double.eps, 1L, all)] <- 0
      matpower <- function(X, p) {
        if ((ncol(X) == 1L) && (nrow(X) == 1L)) 
          return(X^p)
        Xeig <- eigen(X, symmetric = TRUE)
        if (any(Xeig$values < 0)) 
          stop("matrix is not positive semidefinite")
        sqomega <- diag(Xeig$values^p)
        return(Xeig$vectors %*% sqomega %*% t(Xeig$vectors))
      }
    }
  }
  for (i in 1L:length(cl)) {
    efi <- ef
    adj <- if (multi0 & (i == length(cl))) {
      if (type == "HC1") 
        (n - k)/(n - 1L)
      else 1
    }
    else {
      if (cadjust) 
        g[i]/(g[i] - 1L)
      else 1
    }
    if (type %in% c("HC2", "HC3")) {
      if (g[i] == n) {
        efi <- if (type == "HC2") {
          efi/sqrt(1 - h)
        }
        else {
          efi/(1 - hatvalues(x))
        }
      }
      else {
        for (j in unique(cluster[[i]])) {
          ij <- which(cluster[[i]] == j)
          Hij <- if (is.null(w)) {
            X[ij, , drop = FALSE] %*% XX1 %*% t(X[ij, 
                                                  , drop = FALSE])
          }
          else {
            X[ij, , drop = FALSE] %*% XX1 %*% t(X[ij, 
                                                  , drop = FALSE]) %*% diag(w[ij], nrow = length(ij), 
                                                                            ncol = length(ij))
          }
          Hij <- if (type == "HC2") {
            matpower(diag(length(ij)) - Hij, -0.5)
          }
          else {
            solve(diag(length(ij)) - Hij)
          }
          efi[ij, ] <- drop(Hij %*% res[ij]) * X[ij, 
                                                 , drop = FALSE]
        }
      }
      efi <- sqrt((g[i] - 1L)/g[i]) * efi
    }
    efi <- if (g[i] < n) 
      apply(efi, 2L, rowsum, cluster[[i]])
    else efi
    rval <- rval + sign[i] * adj * crossprod(efi)/n
  }
  if (type == "HC1") 
    rval <- (n - 1L)/(n - k) * rval
  return(rval)
}

estfun.glm <- function (x, ...) {
  xmat <- model.matrix(x)
  xmat <- naresid(x$na.action, xmat)
  if (any(alias <- is.na(coef(x)))) 
    xmat <- xmat[, !alias, drop = FALSE]
  wres <- as.vector(residuals(x, "working")) * weights(x, "working")
  dispersion <- if (substr(x$family$family, 1, 17) %in% c("poisson", 
                                                          "binomial", "Negative Binomial")) 
    1
  else sum(wres^2, na.rm = TRUE)/sum(weights(x, "working"), 
                                     na.rm = TRUE)
  rval <- wres * xmat/dispersion
  attr(rval, "assign") <- NULL
  attr(rval, "contrasts") <- NULL
  #res <- residuals(x, type = "pearson")
  #if (is.ts(res)) 
  #  rval <- ts(rval, start = start(res), frequency = frequency(res))
  #if (is.zoo(res)) 
  #  rval <- zoo(rval, index(res), attr(res, "frequency"))
  return(rval)
}

estfun <- function (x, ...) 
{
  UseMethod("estfun")
}

vcovCL <- function (x, cluster = NULL, type = NULL, sandwich = TRUE, fix = FALSE, 
          ...) 
{
  rval <- meatCL(x, cluster = cluster, type = type, ...)
  if (sandwich) 
    rval <- sandwich(x, meat. = rval)
  if (fix && any((eig <- eigen(rval, symmetric = TRUE))$values < 
                 0)) {
    eig$values <- pmax(eig$values, 0)
    rval[] <- crossprod(sqrt(eig$values) * t(eig$vectors))
  }
  return(rval)
}
################################################################################
#
#
#
################################################################################
# pre quantize
expnms = "x1"
datl = quantize(dat, expnms = expnms)


#' \dontrun{
#' 
#' # delta method/bootstrap variance ignoring clustering
#' noclust = qgcomp.noboot(y~ x1, data=datl$dat, id="id", family=gaussian(), q = NULL)
#' noclust.b = qgcomp.boot(y~ x1, data=datl$dat, family=gaussian(), q = NULL, MCsize=1000)
#' 
#' # bootstrap variance with sampling by cluster
#' clust.b = qgcomp.boot(y~ x1, data=datl$dat, id="id", family=gaussian(), q = NULL, MCsize=5000, B = 500)
#' #clust.g = summary(geeglm(y~x1, data=datl$dat, id=id, corstr = "independence"))
#' fitglm = glm(y~x1, data=datl$dat)
#' # cluster robust variance
#' sw.cov = vcovCL(fitglm, cluster=~id, type = "HC0")[2,2]
#' 
#' stopifnot(all.equal(clust.b$var.psi, sw.cov, tolerance = 0.005))
#' 
#' # change in variance should be the same in gee and bootstrap
#' stopifnot( 
#'   (clust.b$var.psi > noclust.b$var.psi) & (sw.cov > noclust.b$var.psi) |
#'   (clust.b$var.psi < noclust.b$var.psi) & (sw.cov < noclust.b$var.psi) 
#'   )
#' stopifnot( 
#'   (clust.b$var.psi > noclust$var.psi) & (sw.cov > noclust$var.psi) |
#'   (clust.b$var.psi < noclust$var.psi) & (sw.cov < noclust$var.psi) 
#' )
#' }
cat("done")

Try the qgcomp package in your browser

Any scripts or data that you put into this service are public.

qgcomp documentation built on Aug. 10, 2023, 5:07 p.m.