R/GEESAR.R
In spatemR: Generalized Spatial Autoregresive Models for Mean and Variance

#' Generalized Estimating Equations with Spatial Autoregressive Components
#'
#' @description
#' `GEESAR` estimates generalized estimating equations (GEE) incorporating spatial autoregressive (SAR) components.  
#' It extends GEE models to account for spatial dependence in the response variable.
#'
#' @param formula A formula specifying the model structure (response ~ predictors).
#' @param family A description of the error distribution and link function. Default is `gaussian()`.
#' @param weights Optional vector of prior weights. Must be positive.
#' @param data A data frame containing the variables in the model.
#' @param W A spatial weights matrix defining the spatial dependence structure.
#' @param start Optional starting values for parameter estimation.
#' @param toler Convergence tolerance for iterative optimization. Default is `1e-05`.
#' @param maxit Maximum number of iterations for model fitting. Default is `50`.
#' @param trace Logical; if `TRUE`, prints iteration details. Default is `FALSE`.
#'
#' @details
#' The function estimates a spatially autoregressive GEE model by iteratively updating the spatial dependence 
#' parameter (`rho`) and regression coefficients (`beta`). The estimation follows a quasi-likelihood approach 
#' using iterative weighted least squares (IWLS).  
#'
#' The function supports common GLM families (`gaussian`, `binomial`, `poisson`, `Gamma`, `inverse.gaussian`) and 
#' their quasi-likelihood equivalents.
#'
#' @return A list of class `"GEESAR"` containing:
#' \item{coefficients}{Estimated regression coefficients.}
#' \item{rho}{Estimated spatial autoregressive parameter.}
#' \item{fitted.values}{Predicted values from the model.}
#' \item{linear.predictors}{Linear predictor values (`X * beta`).}
#' \item{prior.weights}{Weights used in estimation.}
#' \item{y}{Observed response values.}
#' \item{formula}{Model formula.}
#' \item{call}{Function call used to fit the model.}
#' \item{data}{Data used in the model.}
#' \item{converged}{Logical indicating whether the algorithm converged.}
#' \item{logLik}{Quasi-log-likelihood of the fitted model.}
#' \item{deviance}{Residual deviance.}
#' \item{df.residual}{Residual degrees of freedom.}
#' \item{phi}{Dispersion parameter estimate.}
#' \item{CIC}{Corrected Information Criterion.}
#' \item{RJC}{Robust Jackknife Correction.}
#'
#' @seealso
#' \code{\link{glm}}, \code{\link[gee]{gee}}, \code{\link[spdep]{spdep}}
#'
#' @references Cruz, N. A., Toloza-Delgado, J. D., & Melo, O. O. (2024). 
#' Generalized spatial autoregressive model. arXiv preprint arXiv:2412.00945.
#' @source https://doi.org/10.48550/arXiv.2412.00945
#'
#' @examples
#' \donttest{
#' library(spdep)
#' library(sp)
#' data(meuse)
#' sp::coordinates(meuse) <- ~x+y
#' W <- spdep::nb2mat(knn2nb(knearneigh(meuse, k=5)), style="W")
#' fit <- GEESAR(cadmium ~ dist + elev, family=poisson(), data=meuse, W=W)
#' summary_SAR(fit)
#'}
#' @export

GEESAR <- function (formula, family = gaussian(), weights=NULL, data, W,
                    start = NULL, 
          toler = 1e-04, maxit = 200, trace = FALSE) {
  mf <- stats::model.frame(formula, data=data)
  y <- base::as.matrix(model.response(mf))
  if (is(family, "function")) 
    family <- stats::family()
  if (family$family %in% c("quasi", "quasibinomial", "quasipoisson")) {
    if (family$family == "quasi") {
      family$family <- switch(family$varfun, constant = "gaussian", 
                              `mu(1-mu)` = "binomial", mu = "poisson", `mu^2` = "Gamma", 
                              `mu^3` = "inverse.gaussian")
    }
    else {
      family$family <- switch(family$family, quasibinomial = "binomial", 
                              quasipoisson = "poisson")
    }
    family <- do.call(family$family, list(link = family$link))
    family2 <- family
  }else {
    if (family$family %in% c("gaussian", "binomial", "poisson", 
                             "Gamma", "inverse.gaussian")) {
      varfun <- switch(family$family, gaussian = "constant", 
                       binomial = "mu(1-mu)", poisson = "mu", Gamma = "mu^2", 
                       inverse.gaussian = "mu^3")
      family2 <- do.call("quasi", list(variance = varfun, 
                                       link = family$link))
      if (family$family == "binomial") 
        family2 <- do.call("quasibinomial", list(link = family$link))
      if (family$family == "poisson") 
        family2 <- do.call("quasipoisson", list(link = family$link))
    }else family2 <- family
  }
  if (ncol(y) == 2 & family$family == "binomial") {
    weights <- as.matrix(y[, 1] + y[, 2])
    y <- as.matrix(y[, 1]/weights)
  }
  y <- as.matrix(as.numeric(y))
  offset <- as.vector(model.offset(mf))
  X <- model.matrix(formula, data)
  p <- ncol(X)
  n <- nrow(X)
  if (is.null(offset)){ 
    offs <- matrix(0, n, 1)}else{ offs <- as.matrix(offset)}
  if (is.null(weights)) {
    weights <- matrix(1, n, 1)} else {
      weights <- as.matrix(weights)}
  if (any(weights <= 0)) 
    stop("Only positive weights are allowed!!", call. = FALSE)
  datas <- cbind(X,y, offs, weights)
  
  
  qll <- NULL
  if (family$family == "gaussian") 
    qll <- function(weights, y, mu) -weights * (y - mu)^2/2
  if (family$family == "binomial") 
    qll <- function(weights, y, mu) weights * (y * log(mu) + (1 - y) * log(1 - mu))
  if (family$family == "poisson") 
    qll <- function(weights, y, mu) weights * (y * log(mu) - mu)
  if (family$family == "Gamma") 
    qll <- function(weights, y, mu) -weights * (y/mu + log(mu))
  if (family$family == "inverse.gaussian") 
    qll <- function(weights, y, mu) weights * (mu - y/2)/mu^2
  if (family$family == "ptfamily") 
    qll <- function(weights, y, mu) weights * (y*log(mu)-mu-log(1-exp(-mu)))
  if (family$family == "Negative Binomial") {
    .Theta <- function() return(.Theta)
    environment(.Theta) <- environment(family$variance)
    .Theta <- .Theta()
    qll <- function(weights, y, mu) weights * (y * log(mu/(mu + .Theta)) + .Theta * 
                                                 log(.Theta/(.Theta + mu)))
  }
  if (family$family == "Tweedie") {
    .Theta <- function() return(p)
    environment(.Theta) <- environment(family$variance)
    .Theta <- .Theta()
    if (.Theta %in% c(0, 1, 2)) {
      if (.Theta == 0) 
        qll <- function(weights, y, mu) -weights * (y - mu)^2/2
      if (.Theta == 1) 
        qll <- function(weights, y, mu) weights * (y * log(mu) - mu)
      if (.Theta == 2) 
        qll <- function(weights, y, mu) -weights * (y/mu + log(mu))
    }
    else qll <- function(weights, y, mu) mu^(-.Theta) * (mu * y/(1 - .Theta) - mu^2/(2 - 
                                                                                       .Theta))
  }
  if (is.null(qll)) 
    qll <- function(weights, y, mu) family$dev.resids(y, mu, weights)
  
  qllp <- function(rho, beta, W, D,n){
    A <- diag(n) - rho*W
    Xi <-solve(A)%*%matrix(D[, 1:p], ncol = p)
    yi <- D[, p + 1]
    ni <- nrow(Xi)
    etai <- Xi %*% beta + D[, p + 2]
    mui <- family$linkinv(etai[,1])
    QL <- sum(qll(weights=D[,p+3], y=D[,p+1], mu=mui))
    return(-QL)
  }
  score <- function(D, W, rho, beta, ni) {
    A <- diag(ni) - rho*W
    Xi <- solve(A)%*%matrix(D[, 1:p], ncol = p)
    yi <- D[, p + 1]
    ni <- nrow(Xi)
    etai <- Xi %*% beta + D[, p + 2]
    mui <- family$linkinv(etai[,1])
    wi <- sqrt(family$variance(mui)/D[, p + 3])
    Xiw <- diag(family$mu.eta(etai[,1]))%*%Xi 
    Vi2 <- diag(1/wi)%*%tcrossprod(A) %*% diag(1/wi)
    Xiw2 <- crossprod(Vi2, Xiw)
    cbind(crossprod(Xiw2, (yi - mui)), crossprod(Xiw2,Xiw), 
               crossprod(Xiw2, (tcrossprod(yi - mui)) %*% Xiw2))
  }
  
  if(family2$family=="ptfamily" &  !all(y>0)){
    stop("Only y>1 are allowed in ptfamily!!", call. = FALSE)
  }
  rho <- 0
  A <- diag(n) - rho*W
  
  beta_new <- try(glm.fit(y = y, x = solve(A)%*%X, family = family2, 
                            weights = weights, offset = offs), silent = TRUE)
  beta_new <- matrix(beta_new$coefficients, nrow=p)
  rho_f <- optim(par=rho,qllp, beta=beta_new, W=W, D=datas,n=n, method="L-BFGS-B", lower=-0.99,upper=0.99)
  rho_new <- rho_f$par
  tolrho <- 1
  niterrho <- 1
  
  # rrr = c()
  # for(rho in seq(-0.9,0.9,0.01)) rrr = c(rrr, qllp(rho=rho, W=W, D=datas, beta=beta_new, n=n))
  # plot(seq(-0.9,0.9, 0.01),-rrr, type="l")
  # 
  Result <- rho_new
  while(tolrho>toler & niterrho < maxit){
  rho <- rho_new
  A = diag(n) - rho*W
  beta_new <- try(glm.fit(y = y, x = solve(A)%*%X, family = family2, 
                            weights = weights, offset = offs), silent = TRUE)
  beta_new <- matrix(beta_new$coefficients, nrow=p)
  
  niter <- 1
  tol <- 1
  while (tol > toler & niter < maxit) {
    beta_old <- beta_new
    resume2 <- score(D=datas,W=W, rho=rho, beta = beta_old, ni=n)
    kchol <- base::chol2inv(chol(resume2[1:p, 2:(p + 1)]))
    beta_new <- beta_old + crossprod(kchol, resume2[, 1])
    tol <- max(abs((beta_new - beta_old)/beta_old))
    niter <- niter + 1
  }
  
  rho_f <- optim(par=rho,qllp, beta=beta_new, W=W, D=datas, n=n,
                 method="L-BFGS-B", lower=-0.99,upper=0.99, hessian = TRUE)
  rho_new <- rho_f$par
  varrho <- -rho_f$hessian[1,1]
  tolrho <- abs(rho_new-rho)
  niterrho <- niterrho + 1
  Result <- c(Result, rho_new)
  }
  ## plot(as.ts(Result))
  ##print(rho_f)
  rho <- rho_new
  resume2 <- score(D=datas,W=W, rho=rho, beta = beta_new, ni=n)
  A = diag(n) - rho*W
  rownames(beta_new) <- colnames(X)
  colnames(beta_new) <- ""
  if (niterrho == maxit) 
    warning("Iteration limit exceeded!!\n", call. = FALSE)
  eta <- solve(A)%*% X %*% beta_new + offs
  mu <- family$linkinv(eta[,1])
  phiis <- diag(solve(tcrossprod(A)))
  vari <- sqrt(family$variance(mu)/(datas[, p + 3]*phiis))
  phi <- sum(((y-mu)/sqrt(vari))^2)/(n-p)
  I0 <- solve(resume2[1:p, 2:(p + 1)])
  I1 <- resume2[1:p, (p + 2):(2 * p + 1)]
  RJC <- crossprod(I0, I1)
  vcovs <- RJC %*% I0
  if(family$family =="binomial" & all(weights==1)){
    vcovs <- I0
  }
  rownames(vcovs) <- colnames(X)
  colnames(vcovs) <- colnames(X)
  
  w <- sqrt(weights * family2$mu.eta(eta[,1])^2/family2$variance(mu))
  Xw <- matrix(w, nrow(X), ncol(X)) * (A%*%X)
  CIC <- sum(diag((crossprod(Xw)/phi) %*% I0))
  RJC <- sqrt((1 - sum(diag(RJC))/(p * phi))^2 + (1 - sum(diag(RJC %*% 
                                                                 RJC))/(p * phi^2))^2)
  logLik <- -qllp(rho=rho, W=W, D=datas, beta=beta_new, n=n)
  estfun <- as.matrix(resume2[, 1])
  rownames(estfun) <- colnames(X)
  colnames(estfun) <- ""
  out_ <- list(coefficients = beta_new, rho=rho, varrho=-1/varrho ,fitted.values = mu, 
               linear.predictors = eta,  arrangedata = datas, vcovs=vcovs,
               prior.weights = weights, y = y, formula = formula, call = match.call(), 
               offset = offs, model = mf, data = data, 
               score = score, converged = ifelse(niter < maxit, TRUE, FALSE), estfun = estfun, 
               naive = I0, family = family,  phi = phi, phiis = phiis, CIC = CIC, RJC = RJC, 
               logLik = logLik, deviance = sum(family$dev.resids(y, mu, weights)), 
               df.residual = length(y) - length(beta_new), 
               levels = .getXlevels(attr(mf, "terms"), mf),
               contrasts = attr(X, "contrasts"), 
               start = start, iter = niter, linear = TRUE)
  class(out_) <- "GEESAR"
  return(out_)
}
Any scripts or data that you put into this service are public.
spatemR documentation built on June 8, 2025, 1:16 p.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
spatemR
Generalized Spatial Autoregresive Models for Mean and Variance

R/GEESAR.R
In spatemR: Generalized Spatial Autoregresive Models for Mean and Variance

Try the spatemR package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

spatemR Generalized Spatial Autoregresive Models for Mean and Variance

R/GEESAR.R In spatemR: Generalized Spatial Autoregresive Models for Mean and Variance

Try the spatemR package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

spatemR
Generalized Spatial Autoregresive Models for Mean and Variance

R/GEESAR.R
In spatemR: Generalized Spatial Autoregresive Models for Mean and Variance