R/RRglmGOF.R
In GLMMRR: Generalized Linear Mixed Model (GLMM) for Binary Randomized Response Data

Documented in RRglmGOF

#' Goodness-of-fit statistics for binary Randomized Response data
#'
#' Compute goodness-of-fit statistics for binary Randomized Response data.
#' Pearson, Deviance and Hosmer-Lemeshow statistics are available.
#'
#' @param RRglmOutput
#' a model fitted with the \code{\link{RRglm}} function.
#' @param doPearson
#' compute Pearson statistic.
#' @param doDeviance
#' compute Deviance statistic.
#' @param doHlemeshow
#' compute Hosmer-Lemeshow statistic.
#' @param hlemeshowGroups
#' number of groups to split the data into for the Hosmer-Lemeshow statistic (default: 10).
#' @param rm.na
#' remove cases with missing data.
#'
#' @return
#' an option of class RRglmGOF.
#' @export
#'
#' @examples
#'out <- RRglm(response ~ Gender + RR + pp + age, link="RRlink.logit", RRmodel=RRmodel,
#'          p1=RRp1, p2=RRp2, data=Plagiarism, etastart=rep(0.01, nrow(Plagiarism)))
#'RRglmGOF(RRglmOutput = out, doPearson = TRUE, doDeviance = TRUE, doHlemeshow = TRUE)
RRglmGOF <- function(RRglmOutput, doPearson = TRUE, doDeviance = TRUE, doHlemeshow = TRUE, hlemeshowGroups = 10, rm.na = TRUE)
{
  # Initialize
  pearson <- list(do = doPearson, obs = NULL, exp = NULL, res = NULL, stat = NA, pvalue = NA, df = NA, nGroup = NA)
  deviance <- list(do = doDeviance, obs = NULL, exp = NULL, res = NULL, stat = NA, pvalue = NA, df = NA, nGroup = NA)
  hlemeshow <- list(do = doHlemeshow, stat = NA, pvalue = NA, df = NA, overview = NULL, nGroup = hlemeshowGroups)

  # Get variables in formula
  vars <- all.vars(RRglmOutput$formula)

  # Create a data frame to work with
  df.work <- data.frame(y.obs = RRglmOutput$model[, vars[1]], RRglmOutput$model[, vars[2:length(vars)]])

  # Get fitted values
  y.fitted.tmp <- RRglmOutput$fitted.values

  # Handle NA's
  if (rm.na)
  {
    df.work <- na.omit(df.work)
    y.fitted.tmp <- na.omit(y.fitted.tmp)
  }

  # Attach fitted values at the beginning
  df.work <- data.frame(y.fitted = y.fitted.tmp, df.work)

  if (doPearson || doDeviance)
  {
    # Determine possible unique groups, or clusters in the data
    df.x <- data.frame(df.work[, 3:ncol(df.work)])
    factor.groups <- getUniqueGroups(df.x)

    # Number of unique groups
    nGroup <- length(levels(factor.groups))

    # Saved for print function
    pearson$nGroup <- nGroup
    deviance$nGroup <- nGroup

    # Number of estimated parameters
    nParam <- length(RRglmOutput$coeff)

    if(nGroup <= nParam)
    {
      cat("The Pearson Fit statistic is defined for an unsaturated model","\n")
      cat("The number of unique groups should be higher than the number of estimated parameters","\n")
    }
    else
    {
      # Expected cell proportions
      vec.pihat <- getCellMeans(y = df.work$y.fitted, factor.groups = factor.groups)

      # Observed cell proportions
      vec.prophat <- getCellMeans(y = df.work$y.obs, factor.groups = factor.groups)

      # Number of observations per group
      vec.groupN <- getCellSizes(n = length(df.work$y.obs), factor.groups = factor.groups)

      # Pearson statistic
      if (doPearson)
      {
        pearson$obs <- vec.prophat
        pearson$exp <- vec.pihat
        pearson$res <- (vec.prophat - vec.pihat) / sqrt(vec.pihat * (1 - vec.pihat) / vec.groupN)
        pearson$stat <- sum(vec.groupN * (((vec.prophat - vec.pihat)^2) / (vec.pihat * (1 - vec.pihat))))
        pearson$df <- nGroup - nParam
        pearson$pvalue = 1 - pchisq(pearson$stat, df = pearson$df)
      }

      # Deviance statistic
      if (doDeviance)
      {
        # 0 and 1 not possible
        vec.prophat[which(vec.prophat == 0)] <- 1e-15
        vec.prophat[which(vec.prophat == 1)] <- 1-1e-15
        deviance$obs <- vec.prophat
        deviance$exp <- vec.pihat

        sign <- rep(1, length(vec.prophat))
        sign[which(vec.prophat < vec.pihat)] <- -1
        deviance$res <- sign * sqrt(2 * vec.groupN * (vec.prophat * log(vec.prophat / vec.pihat) + (1 - vec.prophat) * log((1 - vec.prophat) / (1 - vec.pihat))))
        deviance$stat <- 2 * sum(vec.groupN * (vec.prophat * log(vec.prophat / vec.pihat) + (1 - vec.prophat) * log((1 - vec.prophat) / (1 - vec.pihat))))
        deviance$df <- nGroup - nParam
        deviance$pvalue = 1 - pchisq(deviance$stat, df = deviance$df)
      }
    }
  }

  if(doHlemeshow)
  {
    # 1: Expected probabilities
    # 2: Observed probabilities
    # 3: Index used to set cutting points for equally sized groups
    probmatrix = matrix(nrow = nrow(df.work), ncol = 2)
    probmatrix[, 1] = df.work$y.fitted
    probmatrix[, 2] = df.work$y.obs

    # Order by expected probabilities
    probmatrix <- probmatrix[order(probmatrix[, 1], decreasing=FALSE), ]

    # Set index
    probmatrix <- cbind(probmatrix, seq(1, nrow(df.work)))

    # Split matrix into given number of groups
    breaks <- quantile(probmatrix[, 3], probs = seq(0, 1, by = 1 / hlemeshowGroups))
    probmatrix[, 3] <- cut(probmatrix[, 3], breaks = breaks, include.lowest = TRUE)
    problistSplitted <- split(as.data.frame(probmatrix), probmatrix[, 3])

    # 1: Predicted probability success -> P
    # 2: Predicted probability failure -> 1 - P
    # 3: Observed probability success -> P
    # 4: Observed probability failure -> 1 - P
    # 5/6: Predicted successes/failures
    # 7/8: Observed successes/failures
    # 9: Hosmer Lemeshow Statistic for group n
    # 10: Residual for group n
    hlmatrix = matrix(nrow = hlemeshowGroups, ncol = 10)
    for(ii in 1:hlemeshowGroups)
    {

      predictedProbs <- problistSplitted[[ii]][[1]]
      observedProbs <- problistSplitted[[ii]][[2]]
      nObservations <- length(problistSplitted[[ii]][[2]])
      nObservedSuccesses <- sum(problistSplitted[[ii]][[2]])

      hlmatrix[ii, 1] <- mean(predictedProbs)
      hlmatrix[ii, 3] <- mean(observedProbs)
      hlmatrix[ii, 2] <- mean(1 - predictedProbs)
      hlmatrix[ii, 4] <- mean(1 - observedProbs)

      hlmatrix[ii, 5] <- hlmatrix[ii,1] * nObservations
      hlmatrix[ii, 6] <- nObservations - hlmatrix[ii, 5]
      hlmatrix[ii, 7] <- nObservedSuccesses
      hlmatrix[ii, 8] <- nObservations - hlmatrix[ii, 7]

      ## From book "Regression for Categorical Data" by Tutz
      nObservationsInGroup <- nObservations
      avgPredictedSuccesses <- hlmatrix[ii, 1] #hlmatrix[ii,5]/ nObservationsInGroup
      avgObservedSuccesses <- hlmatrix[ii, 3] #hlmatrix[ii,7]/ nObservationsInGroup

      hlmatrix[ii, 9] <- nObservationsInGroup * (((avgObservedSuccesses - avgPredictedSuccesses)^2) / (avgPredictedSuccesses * (1 - avgPredictedSuccesses)))
      hlmatrix[ii, 10] <- (avgObservedSuccesses - avgPredictedSuccesses)/sqrt(avgPredictedSuccesses * (1 - avgPredictedSuccesses) / nObservationsInGroup)

    }

    df.hlemeshow <- data.frame(obs = hlmatrix[, 7] + hlmatrix[, 8], exp.suc = hlmatrix[, 5], obs.suc = hlmatrix[, 7],
                               stat = hlmatrix[, 9], res = hlmatrix[, 10])
    colnames(df.hlemeshow) <- c("Observations", "Expected successes", "Observed successes", "H-L Statistic", "Pearson Residuals")
    rownames(df.hlemeshow) <- paste("Group", seq(1, hlemeshowGroups))

    hlemeshow$stat <- sum(hlmatrix[, 9])
    hlemeshow$df <- hlemeshowGroups - 2
    hlemeshow$pvalue <- 1 - pchisq(hlemeshow$stat, df = hlemeshow$df)
    hlemeshow$overview <- df.hlemeshow
  }

  ls.return <- list(pearson = pearson, deviance = deviance, hlemeshow = hlemeshow, vars = vars, n = nrow(df.work), family = RRglmOutput$family)
  class(ls.return) <- "RRglmGOF"
  return(ls.return)
}

Any scripts or data that you put into this service are public.

GLMMRR documentation built on Nov. 5, 2025, 6:20 p.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

GLMMRR
Generalized Linear Mixed Model (GLMM) for Binary Randomized Response Data

R/RRglmGOF.R
In GLMMRR: Generalized Linear Mixed Model (GLMM) for Binary Randomized Response Data

Defines functions RRglmGOF

Documented in RRglmGOF

Try the GLMMRR package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

GLMMRR Generalized Linear Mixed Model (GLMM) for Binary Randomized Response Data

R/RRglmGOF.R In GLMMRR: Generalized Linear Mixed Model (GLMM) for Binary Randomized Response Data

Defines functions RRglmGOF

Documented in RRglmGOF

Try the GLMMRR package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

GLMMRR
Generalized Linear Mixed Model (GLMM) for Binary Randomized Response Data

R/RRglmGOF.R
In GLMMRR: Generalized Linear Mixed Model (GLMM) for Binary Randomized Response Data