R/calc_se_da.R

Defines functions getSE_Model calcOFIM_QML calcEFIM_QML calcEFIM_LMS calcOFIM_LMS calcSE_da solveFIM calcHessian fdHESS calcFIM_da

calcFIM_da <- function(model,
                       finalModel,
                       theta,
                       data = NULL,
                       method = "lms",
                       calc.se = TRUE,
                       FIM = "observed",
                       robust.se = FALSE,
                       P = NULL,
                       hessian = FALSE,
                       EFIM.parametric = TRUE,
                       NA__ = -999,
                       EFIM.S = 3e4,
                       epsilon = 1e-8,
                       R.max = 1e6,
                       verbose = FALSE,
                       cr1s = TRUE) {
  if (!calc.se) return(list(FIM = NULL, vcov = NULL, vcov.sub = NULL, type = "none",
                            raw.labels = names(theta), n.additions = 0))
  if (verbose) printf("Calculating standard errors (%s)\n", FIM)

  I <- switch(method,
     lms =
       switch(FIM,
          observed = calcOFIM_LMS(model, theta = theta, data = data,
                                  epsilon = epsilon, hessian = hessian, P = P,
                                  robust.se = robust.se, cluster = data$cluster,
                                  cr1s = cr1s),
          expected = calcEFIM_LMS(model, finalModel = finalModel, theta = theta,
                                  data = data, epsilon = epsilon, S = EFIM.S,
                                  parametric = EFIM.parametric, verbose = verbose,
                                  R.max = R.max, P = P),
          stop2("FIM must be either expected or observed")),
     qml =
       switch(FIM,
          observed = calcOFIM_QML(model, theta = theta, data = data,
                                  hessian = hessian, epsilon = epsilon,
                                  robust.se = robust.se, cluster = data$cluster,
                                  cr1s = cr1s),
          expected = calcEFIM_QML(model, finalModel = finalModel, theta = theta,
                                  data = data, epsilon = epsilon, S = EFIM.S,
                                  parametric = EFIM.parametric, verbose = verbose,
                                  R.max = R.max),
          stop2("FIM must be either expected or observed")),
     stop2("Unrecognized method: ", method)
  )


  if (robust.se) {
    warnif(hessian && FIM == "observed",
           "`robust.se = TRUE` should not be paired with ",
           "`OFIM.hessian = TRUE` and `FIM = \"observed\"`")
    H <- calcHessian(model, theta = theta, data = data, method = method,
                     epsilon = epsilon, P = P)
    invH <- solveFIM(H, NA__ = NA__)

    vcov <- invH %*% I %*% invH

  } else {
    vcov <- solveFIM(I, NA__ = NA__)
  }

  vcov.all <- getVCOV_LabelledParams(vcov = vcov, model = model, theta = theta,
                                     method = method)

  nAdditions   <- ncol(vcov.all) - ncol(vcov)
  lavLabels    <- model$lavLabels
  subLavLabels <- lavLabels[colnames(vcov.all) %in% names(theta)]
  rawLabels    <- colnames(vcov.all)
  dimnames(vcov.all) <- list(lavLabels, lavLabels)
  dimnames(I) <- dimnames(vcov) <- list(subLavLabels, subLavLabels)

  list(FIM = I, vcov.all = vcov.all, vcov.free = vcov, type = FIM,
       raw.labels = rawLabels, n.additions = nAdditions)
}


fdHESS <- function(pars, ...) {
  tryCatch(
    nlme::fdHess(pars = pars, ...)$Hessian,
    error = function(e) {
      warning2("Calculation of Hessian matrix failed...\n  ", e$message)
      matrix(NA, nrow = length(pars), ncol = length(pars))
    }
  )
}


calcHessian <- function(model, theta, data, method = "lms",
                        epsilon = 1e-8, P = NULL) {
  if (method == "lms") {
    if (is.null(P)) P <- estepLms(model, theta = theta, data = data)
    # negative hessian (sign = -1)
    fH <- \(model) observedInfoFromLouisLms(model = model, theta = theta,
                                            data = data, P = P)$I.obs

    H <- tryCatch(suppressWarnings(fH(model)), error = function(e) {
      warning2("Optimized calculation of Hessian failed, attempting to switch!\n", e)
      model$gradientStruct$hasCovModel <- TRUE

      suppressWarnings(fH(model))
    })

  } else if (method == "qml") {
    # negative hessian (sign = -1)
    suppressWarnings({

    H <- fdHESS(pars = theta, fun = logLikQml, model = model,
                sign = -1, .relStep = .Machine$double.eps^(1/5))

    })
  }

  H
}


solveFIM <- function(H, NA__ = -999, use.ginv = FALSE) {
  tryCatch(if (use.ginv) GINV(H) else solve(H),
           error = function(e) {
             if (!use.ginv) return(solveFIM(H, NA__ = NA__, use.ginv = TRUE))

             H[TRUE] <- NA__
             H
           },
           warning = function(w)
             if (grepl("NaN", conditionMessage(w))) suppressWarnings(solve(H)) else solve(H)
  )
}


calcSE_da <- function(calc.se = TRUE, vcov, rawLabels, NA__ = -999) {
  if (!calc.se) return(rep(NA__, length(rawLabels)))
  if (is.null(vcov)) {
    warning2("Fisher Information Matrix (FIM) was not calculated, ",
             "unable to compute standard errors", immediate. = FALSE)
    return(rep(NA__, length(rawLabels)))
  }

  se <- suppressWarnings(sqrt(diag(vcov)))

  if (all(is.na(se))) {
    warning2("Standard errors could not be computed, negative Hessian is singular.",
             immediate. = FALSE)
  } else if (any(is.nan(se))) {
    warning2("Standard errors for some coefficients could not be computed.",
             immediate. = FALSE)
  }

  if (!is.null(names(se))) names(se) <- rawLabels
  se[is.na(se)] <- NA__
  se
}


calcOFIM_LMS <- function(model, theta, data, hessian = FALSE,
                         epsilon = 1e-6, P = NULL,
                         robust.se = FALSE,
                         cluster   = NULL,
                         cr1s      = TRUE) {
  if (is.null(P)) P <- estepLms(model, theta = theta, data = data)

  if (hessian) {
    # negative hessian (sign = -1)
    I <- calcHessian(model, theta = theta, data = data,
                     method = "lms", epsilon = epsilon, P = P)
    return(I)
  }

  # S: N x k matrix of individual score contributions (OPG)
  S <- suppressWarnings(
    gradientObsLogLikLms_i(theta, model = model, data = data,
                           P = P, sign = +1, epsilon = epsilon)
  )

  if (!robust.se || is.null(cluster)) {
    # classic OFIM via outer product of gradients (BHHH)
    return(crossprod(S))
  }

  stopif(length(cluster) != nrow(S),
         "Length of 'cluster' must equal the number of rows in the data / scores.")

  f <- as.factor(cluster)
  G <- nlevels(f)
  k <- ncol(S)

  # aggregate scores by cluster: s_g = sum_{i in g} s_i
  Sg <- matrix(0, nrow = G, ncol = k)
  lev <- levels(f)
  for (g in seq_len(G)) {
    idx <- which(f == lev[g])
    Sg[g, ] <- colSums(S[idx, , drop = FALSE])
  }

  B <- crossprod(Sg)  # meat = sum_g s_g s_g'

  # optional CR1S small-sample correction
  if (isTRUE(cr1s)) {
    N <- nrow(S); q <- ncol(S)
    if (G > 1 && N > q) {
      B <- B * (G / (G - 1)) * ((N - 1) / (N - q))
    }
  }

  B
}


calcEFIM_LMS <- function(model, finalModel = NULL, theta, data,
                         S         = 100,
                         parametric = TRUE,
                         epsilon    = 1e-6,
                         verbose    = FALSE,
                         R.max      = 1e6,
                         P          = NULL) {
  k <- length(theta)                       # number of free parameters
  N <- data$n
  R <- min(R.max, N * S)
  warnif(R.max <= N, "R.max is less than N!")

  ovs <- colnames(data$data.full)

  if (parametric) {
    stopif(is.null(finalModel), "finalModel must be included in calcEFIM_LMS")

    parTable   <- modelToParTable(finalModel, method = "lms")
    population <- simulateDataParTable(parTable, N = R, colsOVs = ovs)$oV

  } else {
    population <- data$data.full[sample(data$n, R, replace = TRUE), , drop = FALSE]

  }

  population <- patternizeMissingDataFIML(population)
  
  popEstep <- estepLms(model      = model,
                       theta      = theta,
                       data       = population,
                       recalcQuad = TRUE,
                       lastQuad   = if(!is.null(P)) P$quad else NULL)

  suppressWarnings({

  J <- gradientObsLogLikLms_i(theta = theta,
                              model = model,
                              data  = population,
                              P     = popEstep,
                              sign  = +1,
                              epsilon = epsilon)      # R × k matrix
  })

  I <- matrix(0, nrow = k, ncol = k)
  for (i in seq_len(S)) {
    if (R == N * S) {
      # non-overlapping split
      idx1 <- (i - 1) * N + 1
      sub  <- idx1:(idx1 + N - 1)
    } else {
      sub <- sample(R, N)
    }

    I <- I + crossprod(J[sub, , drop = FALSE])
  }

  if (verbose) cat("\n")

  I / S
}


calcEFIM_QML <- function(model, finalModel = NULL, theta, data, S = 100,
                         parametric = TRUE, epsilon = 1e-8, verbose = FALSE,
                         R.max = 1e6) {
  k <- length(theta)                       # number of free parameters
  N <- data$n
  R <- min(R.max, N * S)
  warnif(R.max <= N, "R.max is less than N!")

  ovs <- colnames(data$data.full)

  if (parametric) {
    stopif(is.null(finalModel), "finalModel must be included in calcEFIM_QML")

    parTable <- modelToParTable(finalModel, method = "qml")
    population <- tryCatch(
      simulateDataParTable(parTable, N = R, colsOVs = ovs)$oV,

      error = function(e) {
        warning2("Unable to simulate data for EFIM, using stochastic sampling instead")
        calcEFIM_QML(model = model, theta = theta, data = data, S = S,
                     parametric = FALSE, epsilon = epsilon)
      }
    )

  } else population <- data$data.full[sample(N, R, replace = TRUE), ]

  model$data <- patternizeMissingDataFIML(population)

  if (!is.null(model$matrices$fullU)) {
    fullU <- model$matrices$fullU
    model$matrices$fullU <- fullU[rep(seq_len(N), length.out = R), , drop = FALSE]
  }

  suppressWarnings({
    J <- gradientLogLikQml_i(theta = theta, model = model, sign = +1,
                             epsilon = epsilon)
  })

  I <- matrix(0, nrow = k, ncol = k)
  for (i in seq_len(S)) {
    if (R == N * S) {
      # non-overlapping split
      idx1 <- (i - 1) * N + 1
      sub  <- idx1:(idx1 + N - 1)
    } else {
      sub <- sample(R, N)
    }

    I <- I + crossprod(J[sub, , drop = FALSE])
  }

  if (verbose) cat("\n")

  I / S
}


calcOFIM_QML <- function(model, theta, data, hessian = FALSE,
                         epsilon = 1e-8,
                         robust.se = FALSE,
                         cluster   = NULL,
                         cr1s      = TRUE) {
  N <- nrow(model$data)

  if (hessian) {
    # negative hessian (sign = -1)
    I <- calcHessian(model = model, theta = theta, data = data,
                     method = "qml", epsilon = epsilon)
    return(I)
  }

  # S: N x k matrix of individual score contributions (sign = +1 => score)
  S <- suppressWarnings(
    gradientLogLikQml_i(theta, model = model, sign = +1, epsilon = epsilon)
  )

  if (!robust.se || is.null(cluster)) {
    # classic OFIM (BHHH / OPG)
    return(crossprod(S))
  }

  stopif(length(cluster) != nrow(S),
         "Length of 'cluster' must equal the number of rows in the data / scores.")

  f <- as.factor(cluster)
  G <- nlevels(f)
  k <- ncol(S)

  # s_g = sum_{i in g} s_i
  Sg <- matrix(0, nrow = G, ncol = k)
  lev <- levels(f)
  for (g in seq_len(G)) {
    idx <- which(f == lev[g])
    Sg[g, ] <- colSums(S[idx, , drop = FALSE])
  }

  B <- crossprod(Sg)  # meat = sum_g s_g s_g'

  # Optional CR1S small-sample correction
  if (isTRUE(cr1s)) {
    q <- ncol(S)
    if (G > 1 && N > q) {
      B <- B * (G / (G - 1)) * ((N - 1) / (N - q))
    }
  }

  B
}


getSE_Model <- function(model, se, method, n.additions) {
  model$lenThetaLabel <- model$lenThetaLabel + n.additions
  fillModel(replaceNonNaModelMatrices(model, value = -999),
            theta = se, method = method)
}

Try the modsem package in your browser

Any scripts or data that you put into this service are public.

modsem documentation built on Aug. 27, 2025, 9:08 a.m.