Nothing
# Miscellaneous functions
#' Sum of squares for Jackknife.
#'
#' \code{ss_jk} is an internal function called by \code{\link{jack_knife}}. It calculates the squared
#' difference of a numerical variable around a given value (for example, the mean).
#'
#' @param obs A numerical vector with no missing values (NA's).
#' @param stat The value of the statistic that is used as a reference.
#' @return The squared difference between a variable and a given value.
#' @examples
#' x <- rnorm(10, 170, 8)
#' x
#' mean(x)
#' ss_jk(x, mean(x))
#' jack_knife(x)
#' @export
ss_jk <- function(obs, stat) {
n <- length(obs)
ss <- numeric(n)
for (i in 1:n) ss[i] <- ((obs[i] - stat)^2)
ss
}
#' Ranks leverage observations from Jackknife method.
#'
#' \code{jack_knife} Ranks the squared differences between mean values from Jackknife analysis
#' (arithmetic mean estimated by removing one observation at a time) and the original mean value.
#'
#' @param x A numeric variable. Missing values are removed by default.
#' @return Data frame with the ranked squared differences.
#' @seealso \code{\link{rank_leverage}}.
#' @examples
#' x <- rnorm(10, 170, 8)
#' x
#' mean(x)
#' jack_knife(x)
#'
#' x <- rnorm(100, 170, 8)
#' mean(x)
#' head(jack_knife(x))
#' @export
jack_knife <- function(x) {
jk.means <- knife_mean(x)
mu <- mean(x, na.rm = TRUE)
jk.ss <- ss_jk(jk.means, mu)
jk.ord <- order(jk.ss, decreasing = TRUE)
Observation <- jk.ord
SS <- jk.ss[jk.ord]
Mean <- jk.means[jk.ord]
res <- data.frame(Observation, Value = x[jk.ord], Mean, SS)
rownames(res) <- NULL
res
}
#' Leverage.
#'
#' \code{leverage} is an internal function called by \code{\link{rank_leverage}}.
#'
#' Estimates the leverage of each observation around the arithmetic mean.
#'
#' @param x A numeric variable. Missing values are removed by default.
#' @return Variable with corresponding leverage estimations
#' @examples
#' x <- rnorm(10, 170, 8)
#' x
#' mean(x)
#' leverage(x)
#' rank_leverage(x)
#' @export
leverage <- function(x) {
inv <- 1 / length(x)
ssx <- (x - mean(x, na.rm = TRUE))^2
ssx2 <- sum(ssx)
lev <- inv + ssx / ssx2
lev
}
#' Ranks observations by leverage.
#'
#' \code{rank_leverage} ranks observations by their leverage (influence) on the arithmetic mean.
#'
#' @param x A numeric variable. Missing values are removed by default.
#' @return A data frame ranking observations by their leverage around the mean.
#' @seealso \code{\link{jack_knife}}.
#' @examples
#' x <- rnorm(10, 170, 8)
#' x
#' mean(x)
#' rank_leverage(x)
#'
#' x <- rnorm(100, 170, 8)
#' mean(x)
#' head(rank_leverage(x))
#' @export
rank_leverage <- function(x) {
lev <- leverage(x)
lev.ord <- order(lev, decreasing = TRUE)
Observation <- lev.ord
Leverage <- lev[lev.ord]
res <- data.frame(Observation, Leverage)
res
}
#' Given y solve for x in a simple linear model.
#'
#' \code{predict_inv} Calculates the value the predictor x that generates value y with a simple linear model.
#'
#' @param model A simple linear model object (class lm).
#' @param y A numerical scalar, the value of the outcome for which we want to calculate the predictor x.
#' @return The estimated value of the predictor.
#' @examples
#' ## Spectrophotometry example. Titration curve for riboflavin (nmol/ml). The sample has an absorbance
#' ## of 1.15. Aim is to estimate the concentration of riboflavin in the sample.
#'
#' Riboflavin <- seq(0, 80, 10)
#' OD <- 0.0125 * Riboflavin + rnorm(9, 0.6, 0.03)
#' titration <- data.frame(Riboflavin, OD)
#'
#' require(sjlabelled, quietly = TRUE)
#' titration <- titration |>
#' var_labels(
#' Riboflavin = "Riboflavin (nmol/ml)",
#' OD = "Optical density"
#' )
#'
#' titration |>
#' gf_point(OD ~ Riboflavin) |>
#' gf_smooth(col = "indianred3", se = TRUE, lwd = 0.5, method = "loess")
#'
#' ## Model with intercept different from zero:
#' model <- lm(OD ~ Riboflavin, data = titration)
#' glm_coef(model)
#'
#' predict_inv(model, 1.15)
#' @export
predict_inv <- function(model, y) {
param <- length(coef(model))
m <- ifelse(param == 1, as.numeric(coef(model)), coef(model)[2])
b <- ifelse(param == 1, 0, coef(model)[1])
x <- (y - b) / m
names(x) <- names(m)
x
}
#' Jackknife for means.
#'
#' \code{knife_mean} is an internal function. Calculates arithmetic means by removing one observation
#' at a time.
#'
#' @param x A numerical variable. Missing values are removed for the mean calculation.
#' @return A vector with the mean calculations.
#' @examples
#' x <- rnorm(10, 170, 8)
#' x
#' mean(x)
#' knife_mean(x)
#' @export
knife_mean <- function(x) {
n <- length(x)
jk <- numeric(n)
for (i in 1:n) jk[i] <- mean(x[-i], na.rm = TRUE)
jk
}
#' Relative and Cumulative Frequency.
#'
#' \code{freq_cont} tabulates a continuous variable by given classes.
#'
#' @param x A numerical (continuous) variable. Ideally, relatively long (greater than 100 observations).
#' @param bks Breaks defining the classes (see example).
#' @param dg Number of digits for rounding (default = 2).
#' @return A data frame with the classes, the mid-point, the frequencies, the relative and cumulative frequencies.
#' @examples
#' data(IgM, package = "ISwR")
#' Ab <- data.frame(IgM)
#' estat(~IgM, data = Ab)
#' freq_cont(IgM, seq(0, 4.5, 0.5))
#' @export
freq_cont <- function(x, bks, dg = 2) {
cx <- cut(x, breaks = bks)
n <- length(x)
x.tab <- as.data.frame(table(cx))
rel <- x.tab$Freq / n
n2 <- length(rel)
mids <- numeric(n2)
cum <- cumsum(rel)
for (i in 1:n2) mids[i] <- mean(c(bks[i], bks[i + 1]))
rel <- round(rel, digits = dg)
cum <- round(cum, digits = dg)
res <- data.frame(x.tab[, 1], mids, x.tab[, 2], rel, cum)
names(res) <- c("Class", "Mids", "Counts", "rel.freq", "cum.freq")
res
}
#' Goodness of fit for Logistic Regression.
#'
#' \code{logistic_gof} performs the Hosmer and Lemeshow test to test the goodness of fit of a logistic
#' regression model. This function is part of \code{residuals.lrm} from package \code{rms}.
#'
#' @author Frank Harell, Vanderbilt University <f.harrell@vanderbilt.edu>
#' @param model A logistic regression model object.
#' @references Hosmer DW, Hosmer T, Lemeshow S, le Cessie S, Lemeshow S. A comparison of goodness-of-fit
#' tests for the logistic regression model. Stat in Med 16:965–980, 1997.
#' @examples
#' data(diet, package = "Epi")
#' model <- glm(chd ~ fibre, data = diet, family = binomial)
#' glm_coef(model, labels = c("Constant", "Fibre intake (g/day)"))
#' logistic_gof(model)
#' @export
logistic_gof <- function(model) {
L <- model$linear.predictors
if (length(L) == 0) {
stop("you did not use linear.predictors=TRUE for the fit")
}
cof <- model$coefficients
P <- 1 / (1 + exp(-L))
Y <- model$y
rnam <- names(Y)
cnam <- names(cof)
if (!is.factor(Y)) {
Y <- factor(Y)
}
lev <- levels(Y)
Y <- unclass(Y) - 1
X <- glm(model$formula, family = binomial, x = TRUE, data = model$data)$x
y <- Y >= 1
p <- P
sse <- sum((y - p)^2)
wt <- p * (1 - p)
d <- 1 - 2 * p
z <- lm.wfit(X, d, wt)
res <- z$residuals * sqrt(z$weights)
sd <- sqrt(sum(res^2))
ev <- sum(wt)
z <- (sse - ev) / sd
P <- 2 * (1 - pnorm(abs(z)))
stats <- data.frame(sse, ev, sd, z, P)
names(stats) <- c(
"Sum of squared errors", "Expected value|H0",
"SD", "Z", "P"
)
return(drop(stats))
}
#' Coefficient of determination.
#'
#' \code{coef_det} estimates the coefficient of determination (r-squared) from fitted (predicted) and
#' observed values. Outcome from the model is assumed to be numerical.
#' @param obs Vector with observed values (numerical outcome).
#' @param fit Vector with fitted (predicted) values.
#' @return A scalar, the coefficient of determination (r-squared).
#' @examples
#' ## Linear regression:
#' Riboflavin <- seq(0, 80, 10)
#' OD <- 0.0125 * Riboflavin + rnorm(9, 0.6, 0.03)
#' titration <- data.frame(Riboflavin, OD)
#' model1 <- lm(OD ~ Riboflavin, data = titration)
#' summary(model1)
#' coef_det(titration$OD, fitted(model1))
#'
#' ## Non-linear regression:
#' library(nlme, quietly = TRUE)
#' data(Puromycin)
#' mm.tx <- gnls(rate ~ SSmicmen(conc, Vm, K),
#' data = Puromycin,
#' subset = state == "treated"
#' )
#' summary(mm.tx)
#' coef_det(Puromycin$rate[1:12], mm.tx$fitted)
#' @export
coef_det <- function(obs, fit) {
obm <- mean(obs, na.rm = TRUE)
SSres <- sum((obs - fit)^2, na.rm = TRUE)
SStot <- sum((obs - obm)^2, na.rm = TRUE)
res <- 1 - (SSres / SStot)
res
}
#' Ranks observations based upon influence measures on models.
#'
#' \code{rank_influence} calculates influence measures of each data observation on models and then ranks them.
#'
#' @param model A generalised linear model object.
#' @details \code{rank_influence} is a wrap function that calls \code{\link[stats]{influence.measures}}, ranks observations on
#' their significance influence on the model and displays the 10 most influential observations
#' (if they are significant).
#' @seealso \code{\link[stats]{influence.measures}}.
#' @examples
#' data(diet, package = "Epi")
#' model <- glm(chd ~ fibre, data = diet, family = binomial)
#' rank_influence(model)
#' @export
rank_influence <- function(model) {
infl <- influence.measures(model)
mat <- (infl$is.inf) * 1
signifa <- sort(apply(mat, 1, sum), decreasing = TRUE)
mat.sum <- signifa[signifa > 0]
mat.names <- as.numeric(names(mat.sum))
pos <- match(mat.names, row.names(infl$infmat), nomatch = 0)
if (length(pos) > 10) {
pos2 <- pos[1:10]
signifb <- as.vector(mat.sum)[1:10]
res <- data.frame(infl$infmat[pos2, ], signif = signifb)
}
if (length(pos) < 10 & length(pos) > 0) {
signifb <- as.vector(mat.sum)
res <- data.frame(infl$infmat[pos, ], signif = signifb)
}
if (length(pos) == 0) {
res <- "No significant influence measures"
}
res
}
#' Inverse of the logit
#'
#' \code{inv_logit} Calculates the inverse of the logit (probability in logistic regression)
#'
#' @param x Numerical value used to compute the inverse of the logit.
#' @export
inv_logit <- function(x) {
if (any(omit <- is.na(x))) {
lv <- x
lv[omit] <- NA
if (any(!omit)) {
lv[!omit] <- Recall(x[!omit])
}
return(lv)
}
exp(x) / (1 + exp(x))
}
#' Pseudo R2 (logistic regression)
#' \code{pseudo_r2} Calculates R2 analogues (pseudo R2) of logistic regression.
#'
#' @param model A logistic regression model.
#' @details \code{pseudo_r2} calculates three pseudo R2 of logistic regression models: 1) Nagelkerke, @0 Cox and Snell, 3) Hosmer and Lemeshow.
#' @return A data frame with the calculated pseudo R2 values.
#' @examples
#' data(Oncho)
#' model_oncho <- glm(mf ~ area, data = Oncho, binomial)
#' glm_coef(model_oncho, labels = c("Constant", "Area (rainforest/savannah)"))
#' pseudo_r2(model_oncho)
#' @export
pseudo_r2 <- function(model) {
if (!(model$family$family == "binomial" && (model$family$link ==
"logit" || model$family$link == "probit"))) {
stop("No logistic regression model, no pseudo R^2 computed.")
}
dev <- model$deviance
null_dev <- model$null.deviance
model_n <- length(model$fitted)
r_hl <- 1 - dev / null_dev
r_cs <- 1 - exp(-(null_dev - dev) / model_n)
r_n <- r_cs / (1 - (exp(-(null_dev / model_n))))
Index <- c("Nagelkerke", "Hosmer and Lemeshow", "Cox and Snell")
Estimate <- round(c(r_n, r_hl, r_cs), 3)
res <- data.frame(Index, Estimate)
res
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.