Nothing
# lsm.R
#' Estimation of the log Likelihood of the Saturated Model
#' @title Estimation of the log Likelihood of the Saturated Model
#' @description When the values of the outcome variable \code{Y} are either 0 or 1, the function \code{lsm()} calculates the estimation of the log likelihood in the saturated model. This model is characterized by Llinas (2006, ISSN:2389-8976) in section 2.3 through the assumptions 1 and 2. If \code{Y} is dichotomous and the data are grouped in \code{J} populations, it is recommended to use the function \code{lsm()} because it works very well for all \code{K}.
#' @param formula An expression of the form y ~ model, where y is the outcome variable (binary or dichotomous: its values are 0 or 1).
#' @param family an optional funtion for example binomial.
#' @param data an optional data frame, list or environment (or object coercible by as.data.frame to a data frame) containing the variables in the model. If not found in data, the variables are taken from environment(formula), typically the environment from which \code{lsm()} is called.
#' @param ... further arguments passed to or from other methods.
#'
#' @return \code{lsm} returns an object of class "\code{lsm}".
#'
#' An object of class "\code{lsm}" is a list containing at least the
#' following components:
#'
#' \item{coefficients}{Vector of coefficients estimations (intercept and slopes).}
#'
#' \item{coef}{Vector of coefficients estimations (intercept and slopes).}
#'
#' \item{Std.Error}{Vector of the coefficients’s standard error (intercept and slopes).}
#'
#' \item{ExpB}{Vector with the exponential of the coefficients (intercept and slopes).}
#'
#' \item{Wald}{Value of the Wald statistic (with chi-squared distribution).}
#'
#' \item{DF}{Degree of freedom for the Chi-squared distribution.}
#'
#' \item{P.value}{P-value calculated with the Chi-squared distribution. }
#'
#' \item{Log_Lik_Complete}{Estimation of the log likelihood in the complete model.}
#'
#' \item{Log_Lik_Null}{Estimation of the log likelihood in the null model.}
#'
#' \item{Log_Lik_Logit}{Estimation of the log likelihood in the logistic model.}
#'
#' \item{Log_Lik_Saturate}{Estimation of the log likelihood in the saturate model.}
#'
#' \item{Populations}{Number of populations in the saturated model.}
#'
#' \item{Dev_Null_vs_Logit }{Value of the test statistic (Hypothesis: null vs logistic models).}
#'
#' \item{Dev_Logit_vs_Complete}{Value of the test statistic (Hypothesis: logistic vs complete models).}
#'
#' \item{Dev_Logit_vs_Saturate}{Value of the test statistic (Hypothesis: logistic vs saturated models).}
#'
#' \item{Df_Null_vs_Logit }{Degree of freedom for the test statistic’s distribution (Hypothesis: null vs logistic models).}
#'
#' \item{Df_Logit_vs_Complete }{Degree of freedom for the test statistic’s distribution (Hypothesis: logistic vs saturated models).}
#'
#' \item{Df_Logit_vs_Saturate}{Degree of freedom for the test statistic’s distribution (Hypothesis: logistic vs saturated models).}
#'
#' \item{P.v_Null_vs_Logit}{P-value for the hypothesis test: null vs logistic models.}
#'
#' \item{P.v_Logit_vs_Complete }{P-value for the hypothesis test: logistic vs complete models.}
#'
#' \item{P.v_Logit_vs_Saturate}{P-value for the hypothesis test: logistic vs saturated models.}
#'
#' \item{Logit}{Vector with the log-odds.}
#'
#' \item{p_hat_complete}{Vector with the probabilities that the outcome variable takes the value 1, given the \code{jth} population (estimated with the complete model and without the logistic model).}
#'
#' \item{p_hat_null}{Vector with the probabilities that the outcome variable takes the value 1, given the \code{jth} population (estimated with the null model and without the logistic model).}
#'
#' \item{p_j}{Vector with the probabilities that the outcome variable takes the value 1, given the \code{jth} population (estimated with the logistic model).}
#'
#' \item{odd}{Vector with the values of the odd in each \code{jth} population.}
#'
#' \item{OR}{Vector with the values of the odd ratio for each coefficient of the variables.}
#'
#' \item{z_j}{Vector with the values of each \code{Zj} (the sum of the observations in the \code{jth} population).}
#'
#' \item{n_j}{Vector with the \code{nj} (the number of the observations in each \code{jth} population).}
#'
#' \item{p_j_tilde}{Vector with the estimation of each \code{pj} (the probability of success in the \code{jth} population) in the saturated model (without estimate the logistic parameters).}
#'
#' \item{v_j}{Vector with the variance of the Bernoulli variables in the \code{jth} population.}
#'
#' \item{m_j}{Vector with the expected values of \code{Zj} in the \code{jth} population.}
#'
#' \item{V_j}{Vector with the variances of \code{Zj} in the \code{jth} population.}
#'
#' \item{V}{Variance and covariance matrix of \code{Z}, the vector that contains all the \code{Zj}.}
#'
#' \item{S_p}{Score vector in the saturated model.}
#'
#' \item{I_p}{Information matrix in the saturated model.}
#'
#' \item{Zast_j}{Vector with the values of the standardized variable of \code{Zj}.}
#'
#' \item{mcov}{Variance and covariance matrix for coefficient estimates.}
#'
#' \item{mcor}{Correlation matrix for coefficient estimates.}
#'
#' \item{Esm}{Data frame with estimates in the saturated model. It contains for each population \code{j}: the value of the explanatory variables, \code{nj}, \code{Zj}, \code{pj} and Log-Likelihood \code{Lj_tilde}.}
#'
#' \item{Elm}{Data frame with estimates in the logistic model. It contains for each population \code{j}: the value of the explanatory variables, \code{nj}, \code{Zj}, \code{pj}, Log-Likelihood \code{Lj}, \code{Logit_pj} and the variance of logit (\code{var.logit}).}
#'
#' \item{call}{It displays the original call that was used to fit the model lsm.}
#'
#' \item{data}{data envarironment.}
#'
#' \item{\dots}{Additional arguments to be passed to methods.}
#'
#' @encoding UTF-8
#' @details An expression of the form \code{y ~ model} is interpreted as a specification that the response \code{y} is modelled by a linear predictor specified symbolically by \code{model} (systematic component). Such a model consists of a series of terms separated by \code{+} operators. The terms themselves consist of variable and factor names separated by \code{:} operators. Such a term is interpreted as the interaction of all the variables and factors appearing in the term. Here, \code{y} is the outcome variable (binary or dichotomous: its values are 0 or 1).
#'
#' @references [1] LLinás, H. J. (2006). Precisiones en la teoría de los modelos logísticos. Revista Colombiana de Estadística, 29(2), 239–265. https://revistas.unal.edu.co/index.php/estad/article/view/29310
#' @references [2] Hosmer, D.W., Lemeshow, S. and Sturdivant, R.X. (2013). Applied Logistic Regression, 3rd ed., New York: Wiley.
#' @references [3] Chambers, J. M. and Hastie, T. J. (1992). Statistical Models in S. Wadsworth & Brooks/Cole.
#'
#' @author Dr. rer. nat. Humberto LLinás Solano [aut] (Universidad del Norte, Barranquilla-Colombia); MSc. Omar Fábregas Cera [aut] (Universidad del Norte, Barranquilla-Colombia); MSc. Jorge Villalba Acevedo [cre, aut] (Universidad Tecnológica de Bolívar, Cartagena-Colombia).
#'
#' @examples
#' #library(lsm)
#'
#' #1. AGE and Coronary Heart Disease (CHD) Status of 20 subjects:
#'
#' #AGE <- c(20,23,24,25,25,26,26,28,28,29,30,30,30,30,30,30,30,32,33,33)
#' #CHD <- c(0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0)
#' #data <- data.frame (CHD, AGE )
#' #lsm(CHD ~ AGE , data)
#'
#' #2.You can use the following notation:
#'
#' #lsm(y~., data)
#'
#' #3. Other example:
#'
#' #y <- c(1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1)
#' #x1 <- c(2, 2, 2, 5, 5, 5, 5, 8, 8, 11, 11, 11)
#' #data <- data.frame (y, x1)
#' #ELAINYS <-lsm(y ~ x1, data)
#' #summary(ELAINYS)
#'
#' #4. Other example:
#'
#' #y <- as.factor(c(1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1))
#' #x1 <- as.factor(c(2, 2, 2, 5, 5, 5, 5, 8, 8, 11, 11, 11))
#' #data <- data.frame (y, x1)
#' #ELAINYS1 <-lsm(y ~ x1, family=binomial, data)
#' #summary(ELAINYS1)
#' @seealso
#' \code{\link{lsm}}
#' @export
lsm <- function(formula, family=binomial, data = environment(formula), ...)
{
# Verificar si se han proporcionado argumentos adicionales
if (length(list(...)) > 0) {
stop("This function does not accept additional arguments. Please review the documentation.")
}
ndata <- as.data.frame(data)
if(anyNA(ndata)==TRUE){
ndata = ndata[complete.cases(ndata),]
}
mf <- model.frame(formula = formula, data = ndata)
mf <- eval(mf, parent.frame())
mt <- attr(mf, "terms")
y <- model.response(mf)
X <- model.matrix(mf)
varclass <- (attr(mt,"dataClasses"))[-1]
xnames <- names(attr(mt,"dataClasses"))[-1]
yy <- unique(mf[,1])
u <- NULL
dat <- as.data.frame(mf)
a <- dim(dat)[2]
for(i in 1:length(yy)){
u[[i]] <- ifelse(mf[,1] == yy[i], 1, 0)
dat[,i+a] <- u[[i]]
}
dat[,-1] %>%
group_by(mf[, names(mf)[-1], drop = FALSE]) %>%
count() -> nn
dat[,-1] %>%
group_by(mf[, names(mf)[-1], drop = FALSE]) %>%
summarize(across(everything(),list(z=sum, p=mean)), .groups = 'drop') -> zz
zj <- select(zz, contains("z"))
zj <- as.matrix(zj[2])
colnames(zj) <- " "
pj <- select(zz, contains("p"))
pj <- as.matrix(pj[2])
colnames(pj) <- " "
nj <- as.matrix(nn["n"])
colnames(nj) <- " "
nn <- as.matrix(nn)
Lp = ifelse(zj == 0 | zj == nj, 0, zj*log(pj)+(nj-zj)*log(1-pj))
Sat <- sum(Lp)
J <- dim(nj)[1]
tab1 <- cbind(nn,zj,pj,Lp)
colnames(tab1)[dim(nn)[2]:dim(tab1)[2]] <-c("nj","zj","pj","Lp")
tab1
vj <- pj * (1 - pj)
mj <- nj * pj
Vj <- nj * vj
V <- diag(vj)
sp <- ( zj - nj * pj)/ vj
sp[is.nan(sp)] <- 0
ip <- diag(nj / vj)
Zj <- (zj - nj * pj) / sqrt(nj * vj)
Zj[is.nan(Zj)] <- 0
####################################################
if(! is.factor(y)) y <- as.factor(y)
y <- unclass(y)
ylevels <- levels(y)
y <- as.integer(y) - 1
#names(y) <- names(mf)[1]
Media <- mean(y)
n1 <- length(mf[, 1])
Nul <- n1 * (Media * log(Media) + (1 - Media) * log(1 - Media))
Nul
####################################################
Com <- 0
####################################################
fmula <- as.formula(paste("y ~ ", paste(xnames, collapse= "+")))
coef <- coefficients(glm(fmula, family , mf))
B <- as.matrix(coef)
#X <- as.matrix(cbind(1,mf[,-1]))
if(anyNA(B)==TRUE){
aa <- which(is.na(B))
B <- na.omit(B)
X <- X[,-aa]
}
cod <- function(x){
if (is.factor(x) | is.character(x))
x <- as.factor(x)
x <- unclass(x)
}
x_n <- as.data.frame(sapply(mf, cod))
x_n %>%
group_by(x_n[, names(mf)[-1], drop = FALSE]) %>%
count() -> ncj
if(anyNA(B)==TRUE){
aa <- which(is.na(B))
B <- na.omit(B)
X <- X[,-aa]
}
g_t <- X %*% B
p_ <- function(g_){exp(g_t)/(1+exp(g_t))}
p_t <- p_(g_t)
q_i <- 1 - p_t
Logi <- sum((y * log(p_t) + (1 - y) * log(q_i)))
n <- length(p_t)
####################################################
I <- p_t * q_i
h <- as.vector(I)
V <- diag(h, length(h))
o <-(t(X) %*% V %*% X)
varB <- solve(o)
dimnames(varB)<- list(names(coef),names(coef))
SEBj <- (diag(varB))^(1/2)
names(SEBj) <- names(coef)
out1 <- dim(nj)[2]
if (is.factor(varclass) | is.character(varclass)){
njj = unique(X)
g_gj <- njj %*% B
p2_ <- function(g_){exp(g_gj) / (1 + exp(g_gj))}
p_gj <- p2_(g_gj)
Lgj <- zj * g_gj - ncj[,"n"] * log(1 + exp(g_gj))
cov <- varB[1,2]
V_Logit <- 2*ncj[,1]*cov + ncj[,1]^2*SEBj[2]^2 + SEBj[1]^2
SE_Logit <- sqrt(V_Logit)
tab2 <- cbind(nj, zj, g_gj, p_gj, Lgj, V_Logit)
colnames(tab2)[dim(nj)[2]:dim(tab2)[2]] <-c("nj","zj","gj","pj","Lj","Var.logit")
} else{
njj = as.matrix(cbind(1,nj[,-out1]))
g_gj <- njj %*% B
p2_ <- function(g_){exp(g_gj) / (1 + exp(g_gj))}
p_gj <- p2_(g_gj)
Lgj <- zj * g_gj - nj[,"n"] * log(1 + exp(g_gj))
cov <- varB[1,2]
V_Logit <- 2*nj[,1]*cov + nj[,1]^2*SEBj[2]^2 + SEBj[1]^2
SE_Logit <- sqrt(V_Logit)
tab2 <- cbind(nj, zj, g_gj, p_gj, Lgj, V_Logit)
colnames(tab2)[dim(nj)[2]:dim(tab2)[2]] <-c("nj","zj","gj","pj","Lj","Var.logit")
}
#W <- t(B) %*% o %*% B
z <- (B/SEBj)^2
#p_vz <- 2*pnorm(abs(z), lower.tail=FALSE)
d_f <- (rep(1, length(z)))
P_valor <- 1 - pchisq(z, d_f)
exb <- exp(B)
OR <- exp(coef[-1])
odd <- p_t/(1 - p_t)
#Comparativo de Modelos#
#Logístico vs Completo#
k <-1
Dvu <- 2*(Com - Logi)
gu <- (n-(k + 1))
p_vu <- pchisq(c(Dvu), df=gu, lower.tail=FALSE)
#Decisi1<-ifelse(p_val1<0.05,"Se rechaza H_0","No se rechaza H_0")
#Nulo vs Logístico#
Dvd <- 2*(Logi - Nul)
p_vd <- pchisq(c(Dvd), df=k, lower.tail=FALSE)
#Decisi<-ifelse(p_val<0.05,"Se rechaza H_0","No se rechaza H_0")
#Logítico vs Saturado#
Dvt <- 2*(Sat - Logi)
gt <- (J-(k + 1))
p_vt <- pchisq(c(Dvt), df=gt, lower.tail=FALSE)
#Decisi2<-ifelse(p_val2<0.05,"Se rechaza H_0","No se rechaza H_0")
Ela <- list(coefficients = B,
coef = coef,
Std.Error = SEBj,
ExpB = exb,
Wald = z,
DF = d_f,
P.value = P_valor,
############################################
Log_Lik_Complete = Com,
Log_Lik_Null = Nul,
Log_Lik_Logit = Logi,
Log_Lik_Saturate = Sat,
Populations = J,
########################################
Dev_Null_vs_Logit = Dvd,
Dev_Logit_vs_Complete = Dvu,
Dev_Logit_vs_Saturate = Dvt,
Df_Null_vs_Logit = k,
Df_Logit_vs_Complete = gu,
Df_Logit_vs_Saturate = gt,
P.v_Null_vs_Logit = p_vd,
P.v_Logit_vs_Complete = p_vu,
P.v_Logit_vs_Saturate = p_vt,
########################################
Logit = g_t,
p_j = p_t,
p_hat_complete = y,
p_hat_null = mean(y) ,
odd = odd,
OR = OR,
########################################
n_j = nj,
z_j = zj,
p_j_tilde = pj,
L_p = Lp,
v_j = vj,
m_j = mj,
V_j = Vj,
V = V ,
S_p = sp,
I_p = ip,
Zast_j = Zj,
Esm = tab1,
Elm = tab2,
########################################
mcov = varB,
mcor = cor(varB),
data = as.data.frame(mf),
formula = formula
)
Ela$call <- match.call()
class(Ela) <- "lsm"
return(Ela)
}
#' @export
print.lsm <- function(x, ...)
{
TB <- cbind(x$coefficients, x$Std.Error, x$ExpB)
colnames(TB) <- c("CoefB", "Std.Error", "ExpB")
cat("\nCall:\n")
print(x$call)
cat("\nPopulations in Saturate Model: ", x$Populations, "\n", sep = "")
cat("\nCoefficients: \n", sep = "")
if(anyNA(x$coef)==TRUE){
cat("(", sum(is.na(x$coef)), " not defined because of singularities)\n", sep = "")
}
print(TB, P.values=TRUE, has.Pvalue=TRUE)
cat("\nLog_Likelihood: \n")
LL <- cbind(x$Log_Lik_Complete, x$Log_Lik_Null, x$Log_Lik_Logit, x$Log_Lik_Saturate)
dimnames(LL) <- list("Estimation", c("Complete", "Null", "Logit", "Saturate"))
print(t(LL))
if(anyNA(unlist(x$data))==TRUE){
cat("(",nrow(x$data) - nrow(na.omit(x$data)) , " observations deleted due to missingness)\n", sep = "")
}
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.