PseudoR2 <- function(x, which = NULL) {
# this function will not work with weights, neither with cbind lhs!!
# http://stats.stackexchange.com/questions/183699/how-to-calculate-pseudo-r2-when-using-logistic-regression-on-aggregated-data-fil
# test: https://stats.idre.ucla.edu/other/mult-pkg/faq/general/faq-what-are-pseudo-r-squareds/
# library(haven)
# hsb2 <- as.data.frame(read_dta("https://stats.idre.ucla.edu/stat/stata/notes/hsb2.dta"))
# hsb2$honcomp <- hsb2$write >= 60
# r.logit <- glm(honcomp ~ female + read + science, hsb2, family="binomial")
# PseudoR2(r.logit, "a")
# http://digitalcommons.wayne.edu/cgi/viewcontent.cgi?article=2150&context=jmasm
# Walker, Smith (2016) JMASM36: Nine Pseudo R^2 Indices for Binary Logistic Regression Models (SPSS)
# fuer logit Korrektur https://langer.soziologie.uni-halle.de/pdf/papers/rc33langer.pdf
# check with pscl::pR2(x); rcompanion::nagelkerke(x)
# or with library(blorr)
# c(blr_rsq_mcfadden(r.glm),
# blr_rsq_cox_snell(r.glm),
# blr_rsq_nagelkerke(r.glm))
if(inherits(x, what="multinom")) modeltype <- "multinom"
else if(inherits(x, what="glm")) modeltype <- "glm"
else if(inherits(x, what="polr")) modeltype <- "polr"
else if(inherits(x, what="vglm")) modeltype <- "vglm"
else return(NA)
if(inherits(x, what="vglm") && !requireNamespace("VGAM", quietly=TRUE)) {
stop("Could not find package 'VGAM' - please install first") }
if (!(inherits(x, what="vglm")) && !is.null(x$call$summ) && !identical(x$call$summ, 0))
stop("can NOT get Loglik when 'summ' argument is not zero")
L.full <- logLik(x)
D.full <- -2 * L.full # deviance(x)
AIC_score <- AIC(x)
BIC_score <- BIC(x)
#Compute predicted values (do this before converting VGLM object to S3 list)
if(modeltype == "vglm" | modeltype == "glm"){
# remark Daniel Wollschlaeger, vglm would not dispatch correctly 30.11.2019
y.hat <- if(modeltype == "vglm") {
VGAM::predictvglm(x, type="link")
} else {
predict(x, type="link")
}
y.hat.resp <- predict(x, type="response")
}
# For compatibility with other method types, convert vglm S4 object into normal S3 object
# EG, it's easier if we can consistently use x$model to access the "model" data frame
# Note that this needs to be done after running logLik above
if(modeltype == "vglm"){
n_vglm <- nobs(x) #save for later
S4_xnames <- slotNames(x)
x <- lapply(S4_xnames, slot, object = x)
names(x) <- S4_xnames
if(!is.null(x$call$form2)) stop("Cannot compute PseudoR2 values for VGLM models with form2 parameter")
}
orig.formula <- deparse(unlist(list(x$formula, formula(x), x$call$formula))[[1]])
# ---- Get all parameters that we don't explicitly know what to do with ----
if(modeltype == "multinom"){other_params <- x$call[!(names(x$call) %in% c("formula", "data", "weights", "subset", "censored", "", #Parameters whose values are stored in model object
"na.action", "subset", #parameters that we can ignore if model = TRUE
"model", "contrasts"))] ##parameters that don't affect model results and can be dropped from null model call
}else if(modeltype == "glm"){ other_params <- x$call[!(names(x$call) %in% c("formula", "family", "data", "weights", "subset", "offset", "method", "control", "", #Parameters whose values are stored in model object
"na.action", "subset", #parameters that we can ignore if model = TRUE
"model", "x", "y", "contrasts"))] #parameters that don't affect model results and can be dropped from null model call
}else if(modeltype == "polr"){ other_params <- x$call[!(names(x$call) %in% c("formula", "data", "weights", "subset", "method", "", #Parameters whose values are stored in model object
"na.action", "subset", #parameters that we can ignore if model = TRUE
"model", "contrasts"))] #parameters that don't affect model results and can be dropped from null model call
}else if(modeltype == "vglm"){ other_params <- x$call[!(names(x$call) %in% c("formula", "weights", "family", "data", "control", "", #Parameters whose values are stored in model object
"na.action", "subset", #parameters that we can ignore if model = TRUE
"model", "contrasts", "qr.arg", "trace"))] #parameters that don't affect model results and can be dropped from null model call
if(nrow(x$model) == 0) stop("Can only calculate PseudoR2 for VGLM when model = TRUE, try refitting VGLM")
}
orig_call <- x$call
#Check whether the other parameters, when called, will evaluate in the current environment
other_params_exist.yn <- mapply(function(x, x.name){ #for each other_param (and the associated name)
tryCatch({ #return TRUE if the expression evaluates
eval(x)
TRUE
}, error = function(cond){
message("Could not evaluate '", as.character(x), "' for fitting PseudoR2 null model with parameter ", as.character(x.name), " = ", as.character((x)))
message("Will evaluate null model without parameter; results may not be valid if this parameter affects model fit")
return(FALSE)
})
}, x = other_params, x.name = names(other_params))
other_params <- other_params[other_params_exist.yn]
#If model parameter was not specified, add subset and na.action parameters to the other params list (we don't need to rerun model.frame if we already have a valid model object)
#These parameters *do* affect null model fit, and so we separately check for their existence when refitting model.frame (and generate an error, rather than warning, if they don't exist)
if(!(exists("model", x))) other_params <- c(other_params, x$call[names(x$call) %in% c("subset", "na.action")])
# ---- Construct appropriate data/model object for null model call ----
calltype.char <- as.character(orig_call[1])
#Get initial data
if(exists("model", x)){
data <- x$model #If x has a model frame component, use that - the safest bet
}else if(exists("data", x) & !("environment" %in% class(x$data))){ #If x has a data object (but no model), take it
data <- x$data
#may need to check for subset and na.action parameters to be included in model.frame as well
}else if(!is.null(x$call$data) & !("environment" %in% class(x$data))){ #If there is a data frame specified in the call
# if("environment" %in% class(x$data)) warning("Could not find model element of ", modeltype, " object for evaluating PseudoR2 null model. Will fit null model with new evaluation of variables in environment ", environmentName(x$data), ". Ensure variables have not changed since initial call, or try running ", calltype.char, " with 'model = TRUE'")
#This is a very lazy use of tryCatch (as we are effectively evaluating x$call$data once here, then once below); will fix at some point
isValidCallRef <- tryCatch(({
eval(x$call$data)
TRUE
}), error = function(cond){
return(FALSE)
})
if(!isValidCallRef) stop("Could not find model, data, or valid call element of ", modeltype, " object for evaluating PseudoR2 null model (could not find '", as.character(x$call$data), "'). Try running ", calltype.char, " with 'model = TRUE'")
warning("Could not find model or data element of ", modeltype, " object for evaluating PseudoR2 null model. Will fit null model with new evaluation of '", as.character(x$call$data), "'. Ensure object has not changed since initial call, or try running ", calltype.char, " with 'model = TRUE'")
data <- eval(x$call$data)
} else if(!is.null(x$call$formula)){ #if the call only references objects an environment
if("environment" %in% class(x$data)) eval_env <- x$data else eval_env <- parent.frame()
isValidCallRef <- tryCatch(({ #return TRUE if the variable has a valid evaluation, false otherwise
model.frame(x$call$formula, data = eval_env)
TRUE
}), error = function(cond){
return(FALSE)
})
if(!isValidCallRef) stop("Could not find model, data, or valid call element of ", modeltype, " object for evaluating PseudoR2 null model (objects in formula could not be found). Try running ", calltype.char, " with 'model = TRUE'")
warning("Could not find model or data element of ", modeltype, " object for evaluating PseudoR2 null mode. Will fit null model with new evaluation of objects in formula. Ensure object has not changed since initial call, or try running ", calltype.char, " with 'model = TRUE'")
data <- model.frame(x$call$formula, data = eval_env)
}else stop("Could not find model, data, or valid call element of ", modeltype, " object for evaluating PseudoR2 null model. Try running ", calltype.char, " with 'model = TRUE'")
#If data wasn't taken from a "model" object, we will need to re-run model.frame to drop NAs and evaluate subsets
if(!exists("model", x)){
#again, we are being very lazy in the implementation of tryCatch here - but this can be fixed another time
#evaluate "subset" calls to check if it's a valid value (including "NULL" as a valid value)
validSubset.yn <-
tryCatch(({
eval(x$call$subset)
TRUE
}), error = function(cond){
return(FALSE)
})
if(validSubset.yn == FALSE) stop("Could not evaluate '", as.character(x$call$subset), "' for fitting PseudoR2 null model with parameter subset = ", as.character(x$call$subset), ". Try running ", calltype.char, " with 'model = TRUE'")
if(!is.null(x$call$subset)) warning("Re-evaluating ", as.character(x$call$subset), " for fitting PseudoR2 null model with parameter subset = ", as.character(x$call$subset))
#evaluate "na.action" to see if it has a valid non-null value, and add to call only if non-null
validNaAction.yn <-
tryCatch(({
eval(x$call$na.action)
TRUE
}), error = function(cond){
return(FALSE)
})
if(!is.null(x$call$na.action) & validNaAction.yn == FALSE){
stop("Could not evaluate '", as.character(x$call$na.action), "' for fitting PseudoR2 null model with parameter na.action = ", as.character(x$call$na.action), ". Try running ", calltype.char, " with 'model = TRUE'")
} else if(!is.null(x$call$na.action)) warning("Re-evaluating ", as.character(x$call$na.action), " for fitting PseudoR2 null model with parameter na.action = ", as.character(x$call$na.action))
#if we are using an object type that doesn't not contain a prior.weights output, check that the weights call is valid
#for other model types, we extract weights from the model object instead
if(modeltype == "polr"){
validWeights.yn <-
tryCatch(({
eval(x$call$weights)
TRUE
}), error = function(cond){
return(FALSE)
})
if(validWeights.yn == FALSE) stop("Could not evaluate '", as.character(x$call$weights), "' for fitting PseudoR2 null model with parameter weights = ", as.character(x$call$weights), ". Try running ", calltype.char, " with 'model = TRUE'")
if(!is.null(x$call$weights)) warning("Re-evaluating ", as.character(x$call$weights), " for fitting PseudoR2 null model with parameter weights = ", as.character(x$call$weights))
weights.call <- x$call$weights
} else weights.call <- NULL
modelcall <- call('model.frame', formula = orig.formula, data = data, subset = x$call$subset, weights = weights.call)
if("na.action" %in% names(x$call)) modelcall$na.action <- x$call$na.action #check whether a na.action parameter was explicitly called (default value for na.action is NOT null, so is.null(x$call$na.action) does not work)
data <- eval(modelcall)
}
if(!is.null(x$prior.weights) & length(x$prior.weights) > 0) weights <- x$prior.weights
else if(modeltype == "multinom") weights <- x$weights #"weights' in multinom are equivalent to 'prior.weights' in glm
else if(!is.null(data$`(weights)`) & length(data$`(weights)` > 0)) weights <- data$`(weights)`
else weights <- NULL
#vglm saves prior.weights as a matrix, but then requires a vector as input
if(!is.null(weights) & modeltype == "vglm"){
if(ncol(weights) == 1) weights <- as.vector(weights)
}
#Drop other columns from data, to avoid literal names (eg, "factor(y)" as DV not matching any DV columns)
data <- data[,1,drop = FALSE]
names(data) <- "y"
null.formula <- as.formula("y ~ 1")
#Costruct the call, then evaluate
if(modeltype == "multinom")
nullcall <- call(calltype.char, formula = null.formula, data = data, weights = weights,
censored = x$censored, trace = FALSE) #specify elements that come from a known part of the multinom object
else if(modeltype == "glm")
nullcall <- call(calltype.char, formula = null.formula, data = data, weights = weights,
family = x$family, method = x$method, control = x$control,
offset = x$offset) #specify elements that come from a known part of the glm object
else if(modeltype == "polr")
nullcall <- call(calltype.char, formula = null.formula, data = data, weights = weights,
method = x$method) #specify elements that come from a known part of the polr object
else if(modeltype == "vglm"){
nullcall <- call(calltype.char, formula = null.formula, data = data, weights = weights,
control = x$control, family = x$family, trace = FALSE)}
if(length(other_params) > 0){
nullcall[names(other_params)] <- other_params
}
L.base <- logLik(eval(nullcall))
D.base <- -2 * L.base # deviance(update(x, ~1))
G2 <- -2 * (L.base - L.full)
# n <- if(length(weights(x)))
# sum(weights(x))
# else
n <- attr(L.full, "nobs") # alternative: n <- dim(x$residuals)[1]
if(modeltype == "multinom")
edf <- x$edf
else if(modeltype == "vglm"){
edf <- x$rank
n <- n_vglm # logLik does not return nobs for vglm
} else
edf <- x$rank
# McFadden
McFadden <- 1 - (L.full/L.base)
# adjusted to penalize for the number of predictors (k) in the model
McFaddenAdj <- 1 - ((L.full - edf)/L.base)
# Nagelkerke / CraggUhler
Nagelkerke <- (1 - exp((D.full - D.base)/n))/(1 - exp(-D.base/n))
# CoxSnell / Maximum Likelihood R2
CoxSnell <- 1 - exp(-G2/n)
res <- c(McFadden=McFadden, McFaddenAdj=McFaddenAdj,
CoxSnell=CoxSnell, Nagelkerke=Nagelkerke, AldrichNelson=NA,
VeallZimmermann=NA,
Efron=NA, McKelveyZavoina=NA, Tjur=NA,
AIC=AIC_score, BIC=BIC_score, logLik=L.full, logLik0=L.base, G2=G2)
if(modeltype == "glm" || modeltype == "vglm" ) {
if(modeltype == "vglm"){
fam <- x$family@vfamily
link <- if(all(x$extra$link == "logit")){
"logit"
} else if(all(x$extra$link == "probit")){
"probit"
} else {
NA
}
y <- x$y
} else {
fam <- x$family$family
link <- x$family$link
y <- x$y
}
s2 <- switch(link, probit = 1, logit = pi^2/3, NA)
# corrected based on mail by Chiroc Han, 2019-08-01 ******
# Aldrich/Nelson
# from:
# res["AldrichNelson"] <- G2 / (G2 + n * s2)
# to:
res["AldrichNelson"] <- G2 / (G2 + n)
# Veall/Zimmermann
# res["VeallZimmermann"] <- res["AldrichNelson"] * (2*L.base - n * s2)/(2*L.base)
res["VeallZimmermann"] <- res["AldrichNelson"] * (2*L.base - n)/(2*L.base)
# McKelveyZavoina
# y.hat <- predict(x, type="link")
sse <- sum((y.hat - mean(y.hat))^2)
res["McKelveyZavoina"] <- sse/(n * s2 + sse)
# EfronR2
res["Efron"] <- (1 - (sum((y - y.hat.resp)^2)) /
(sum((y - mean(y))^2)))
# Tjur's D
# compare with binomTools::Rsq.glm()
if(identical(fam, "binomial"))
res["Tjur"] <- unname(diff(tapply(y.hat.resp, y, mean, na.rm=TRUE)))
}
if(is.null(which))
which <- "McFadden"
else
which <- match.arg(which, c("McFadden","AldrichNelson","VeallZimmermann","McFaddenAdj", "CoxSnell", "Nagelkerke",
"Efron", "McKelveyZavoina", "Tjur","AIC", "BIC", "logLik", "logLik0","G2","all"),
several.ok = TRUE)
if(any(which=="all"))
return(res)
else
return(res[which])
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.