R/pulkrob.deviance.R
In generalhoslem: Goodness of Fit Tests for Logistic Regression Models

pulkrob.deviance <- function(model, catvars) {
  epi.cp.pulkrob <- function(dat){
    # adapted from epiR::epi.cp() by Mark Stevenson et al
    ndat <- data.frame(id = 1:nrow(dat), dat)
    if (!is.null(dim(ndat[, ncol(ndat):2]))) {
      ndat$indi <- apply(X = ndat[, ncol(ndat):2], MARGIN = 1, FUN = function(x) as.factor(paste(x, collapse = "")))
      cp.id <- tapply(ndat$id, ndat$indi, function(x) paste(x, collapse = ","))
      cp <- unique(ndat[, 2:ncol(ndat)])
      n <- as.numeric(unlist(lapply(strsplit(cp.id, ","), length)))
      id <- tapply(ndat$id, ndat$indi, function(x) (x)[1])
      lookup <- data.frame(id = 1:length(n), indi = row.names(id))
      cov.pattern <- data.frame(id = 1:length(n), n, cp[,-ncol(cp)])
      rownames(cov.pattern) <- rownames(cp)
      id <- lookup$id[match(ndat$indi, lookup$indi)]
    } else {
      cp.id <- tapply(ndat$id, ndat[2], function(x) paste(x, collapse = ","))
      cp <- unique(ndat[, 2:ncol(ndat)])
      n <- as.numeric(unlist(lapply(strsplit(cp.id, ","), length)))
      cov.pattern <- data.frame(id = 1:length(n), n, cp)
      id <- cov.pattern$id[match(as.vector(as.matrix(ndat[2])), cov.pattern$cp)]
    }
    list(cov.pattern = cov.pattern, id = id)
  }
  if (class(model) == "polr") {
    yhat <- as.data.frame(fitted(model))
  } else if (class(model) == "clm") {
    predprob <- model$model[, 2:ncol(model$model), drop = F]
    yhat <- as.data.frame(predict(model, newdata = predprob, type = "prob")$fit)
  } else warning("Model is not of class polr or clm. Test may fail.")
  formula <- formula(model$terms)
  DNAME <- paste("formula: ", deparse(formula))
  METHOD <- "Pulkstenis-Robinson deviance test"
  covars <- model$model[-1]
  covars <- covars[names(covars) %in% catvars]
  covpat <- epi.cp.pulkrob(covars)
  yhat$score <- apply(sapply(1:ncol(yhat), function(i) { yhat[, i] * i }), 1, sum)
  yhat <- cbind(id=1:nrow(yhat), yhat, covpat=covpat$id)
  medians <- cbind(covpat=covpat$cov.pattern$id,
                   med=sapply(covpat$cov.pattern$id, function(x) median(yhat[yhat$covpat==x, ]$score)))
  yhat <- merge(x = yhat, y = medians, by = "covpat", all.x = TRUE)
  yhat$covpatsplit <- sapply(1:nrow(yhat), function(i) ifelse(yhat[i, ]$score <= yhat[i, ]$med,
                                                              paste0(yhat[i, ]$covpat, "a"),
                                                              paste0(yhat[i, ]$covpat, "b")))
  dfobs <- cbind(id=1:nrow(model$model), model$model[1])
  dfobs <- merge(x = dfobs, y = yhat[, c("id", "covpatsplit")], by = "id", all.x = TRUE)
  dfobsmelt <- melt(dfobs[, -1], id.vars = "covpatsplit")
  observed <- cast(dfobsmelt, covpatsplit ~ value, length)
  observed.cols <- observed[, names(observed[, 2:ncol(observed)])]
  observed.cols <- observed.cols[order(names(observed[, 2:ncol(observed)]))]
  observed <- cbind(covatsplit = observed[, 1], observed.cols)
  dfexp <- yhat[, !colnames(yhat) %in% c("id", "covpat", "med", "score")]
  dfexpmelt <- melt(dfexp, id.vars = ncol(dfexp))
  expected <- cast(dfexpmelt, covpatsplit ~ variable, sum)                             
  expected.cols <- expected[, names(expected[, 2:ncol(expected)])]
  expected.cols <- expected.cols[order(names(expected[, 2:ncol(expected)]))]
  expected <- cbind(covatsplit = expected[, 1], expected.cols)
  stddiffs <- abs(observed[, 2:ncol(observed)] - expected[, 2:ncol(expected)]) / sqrt(expected[, 2:ncol(expected)])
  if (any(expected[, 2:ncol(expected)] < 1))
    warning("At least one cell in the expected frequencies table is < 1. Chi-square approximation may be incorrect.")
  deviancetable <- observed[, 2:ncol(observed)] * log(observed[, 2:ncol(observed)] / expected[, 2:ncol(expected)])
  deviancetable <- replace(deviancetable, is.na(deviancetable), 0)
  deviancesq <- 2*sum(deviancetable)
  I2 <- nrow(observed)
  J <- length(levels(as.factor(model$model[, 1])))
  k <- length(catvars)
  PARAMETER <- ((I2 - 1) * (J - 1)) - k - 1
  PVAL <- 1 - pchisq(deviancesq, PARAMETER)
  names(deviancesq) <- "Deviance-squared"
  names(PARAMETER) <- "df"
  structure(list(statistic = deviancesq, parameter = PARAMETER, 
                 p.value = PVAL, method = METHOD, data.name = DNAME, observed = observed, 
                 expected = expected, stddiffs = stddiffs), class = "htest")
}

Any scripts or data that you put into this service are public.

generalhoslem documentation built on June 3, 2019, 5:03 p.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

generalhoslem
Goodness of Fit Tests for Logistic Regression Models

R/pulkrob.deviance.R
In generalhoslem: Goodness of Fit Tests for Logistic Regression Models

Try the generalhoslem package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

generalhoslem Goodness of Fit Tests for Logistic Regression Models

R/pulkrob.deviance.R In generalhoslem: Goodness of Fit Tests for Logistic Regression Models

Try the generalhoslem package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

generalhoslem
Goodness of Fit Tests for Logistic Regression Models

R/pulkrob.deviance.R
In generalhoslem: Goodness of Fit Tests for Logistic Regression Models