#' @export
mlewrap <-
function(y, # n:1-Matrix (DV-Values)
X, # n:k-matrix (includes Int. & IV-Values
Z, # Predictor Variables for Variance
theta, # Parameter-Vector
par = if(ll=="linear"){
if(hetero){
rep(0, (ncol(X) + ncol(Z)))
} else{
rep(0, ncol(X) + 1)
}
} else{rep(0, ncol(X))}, # Creates vector of 0s as start values
par_scobit = c(res_logit$par, 0), # Sets scobit start values per default as parameters estimated by logistic regression
control, # Defines control for optim()
method, # Defines method of optim()
lower=-Inf, # Bounds on the variables for the "L-BFGS-B" method
seed = F, # If T, seed = 1312
digits, # Defines decimal points in output
ci=95, # Define CI Level
bootstrap=F, # If T, bootstrapped SE, p- and t-values included
samplesize = nrow(X), # Defines Size of Bootstrap Samples
bs.no = 1000, # Defines No. of Bootstrap Samples
IV = c(paste("Var", 1:(ncol(X) - 1))), # Character-Vector of IV-Names
ZV = c(paste("Z", 1:(ncol(Z) - 1))), # Character-Vector of Variable-Names predicting sigma2
ll="linear", # Specify log likelihood-function (default = linear)
hetero = F, # Specify whether heteroskedastic regression or not
parallel = F # If specified, test proportional odds assumption
) {
# Making the function smarter
if(ll!="ologit" & ll!="oprobit"){
if (!all(X[, 1] == 1)) {
X <- cbind(1, X)
}
}
if(hetero){
if (!all(Z[, 1] == 1)) {
Z <- cbind(1, Z)
}}
##### Likelihood Function ---
if (ll=="linear"){
if(!hetero){
lik_func <- function(y, X, theta) {# Linear-Regression, Not Heteroskedastic
n <- nrow(X) # No. of observations
k <- ncol(X) # No. of betas
beta <- theta[1:k] # Subset the parameter vector theta
gamma <- theta[k+1]
sigma2 <- exp(gamma) # Ensure that sigma^2 is positive
e <- y - X %*% beta # Calculate the residuals
llik <- - (n/2)* log(sigma2) - # Estimate Log-Likelihood
(t (e) %*% (e) / (2*sigma2) )
return(llik) # Return Result for optim()
}
} else{
lik_func <- function(theta, y, X, Z) {# Linear-Regression, Heteroskedastic
n <- nrow(X) # No. of observations
k <- ncol(X) # No. of betas
l <- ncol(Z) # No. of gammas
beta <- theta[1:k] # Subset the parameter vector theta
gamma <- theta[(k+1):(k+l)]
sigma2 <- exp(Z %*% gamma) # Parameterize sigma squared
e <- y - X %*% beta # Calculate the residuals
ll <- -1/2*log(sigma2) - # Estimate Log-Likelihood
1/2*(e^2/(sigma2))
ll <- sum(ll)
return(ll) # Return Result for optim()
}
}
} else if(ll=="logit" | ll=="scobit"){# Logit
if(length(unique(y)) > 2){stop("Dependent Variable not Binary")}
lik_func <- function(theta, y, X) {
# theta consists merely of beta (dim is ncol(X))
beta <- theta[1:ncol(X)]
# linear predictor; make sure that X is stored as.matrix
mu <- X %*% beta
# link function
p <- 1/(1 + exp(-mu))
# log-likelihood
ll <- y * log(p) + (1 - y) * log(1 - p)
# sum
ll <- sum(ll)
return(ll)
}
} else if(ll=="ologit" | ll=="oprobit"){# Ordered Logit or probit
cats <- sort(unique(y)) # Different categories in dep.var
K <- length(unique(y)) # Number of categories
L <- matrix(NA, nrow = length(y), ncol = K) # Empty indicator matrix
for (j in 1:K) { # Fills indicator matrix
L[, j] <- y == cats[j]
}
lik_func <- function(theta, L, X) {# Log-Likelihood Function
k <- ncol(X)
J <- ncol(L)
beta <- theta[1:k] # Subset theta
tau <- theta[(k + 1):(k + J - 1)] # Subset theta
U <- X %*% beta # Linear Predictor
probs <- matrix(nrow = length(U), ncol = J) # Probabilities in a matrix
# Calculate Probabilities for the different tau values
if(ll=="ologit"){
probs[, 1] <- 1 / (1 + exp(-(tau[1] - U))) # First Category
for (j in 2:(J - 1)) { # Between categories
probs[, j] <- 1 / (1 + exp(-(tau[j] - U))) -
1 / (1 + exp(-(tau[j - 1] - U)))
}
probs[, J] <- 1 - 1 / (1 + exp(-(tau[J - 1] - U))) # Final Category
}
if(ll=="oprobit"){
probs[, 1] <- pnorm(tau[1] - U) # First Category
for (j in 2:(J - 1)) { # Between categories
probs[, j] <- pnorm(tau[j] - U) -
pnorm(tau[j - 1] - U)
}
probs[, J] <- 1 - pnorm(tau[J - 1] - U) # Final Category
}
ll <- sum(log(probs[L])) # sum over probabilities
return(ll)
}
} else{ # Probit
lik_func <- function(theta, y, X) {
# theta consists merely of beta (dim is ncol(X))
beta <- theta[1:ncol(X)]
# linear predictor; make sure that X is stored as.matrix
mu <- X %*% beta
# link function
p <- pnorm(mu)
# log-likelihood
ll <- y * log(p) + (1 - y) * log(1-p)
# sum
ll <- sum(ll)
return(ll)
}
}
##### Optimization ---
if(hetero){
res <- stats::optim(
par = par, # Define Startvalues for Parameters
fn = lik_func, # Input function from above
y = y, # Input DV
X = X,
Z = Z, # Input Intercept and IVs
control = control, # Define Control-Option
method = method, # Define Climbing-Option
hessian = T, # Include Hessian Matrix in Return
lower = lower)
} else if (ll=="ologit" | ll=="oprobit"){
if(missing(par)){# Sets 0s for betas and increasing values for taus
par = c(rep(0,ncol(X)), seq(-1,0,length.out = K-1))
}
suppressWarnings(
res <- stats::optim(
par = par, # Define Startvalues for Parameters
fn = lik_func, # Input function from above
L = L, # Input DV
X = X, # Input Intercept and IVs
control = control, # Define Control-Option
method = method, # Define Climbing-Option
hessian = T, # Include Hessian Matrix in Return
lower = lower)
)
} else {
res <- stats::optim(
par = par, # Define Startvalues for Parameters
fn = lik_func, # Input function from above
y = y, # Input DV
X = X, # Input Intercept and IVs
control = control, # Define Control-Option
method = method, # Define Climbing-Option
hessian = T, # Include Hessian Matrix in Return
lower = lower)
}
if(ll=="scobit"){ # Scobit
res_logit <- res
lik_func <- function(theta, y, X) {
beta <- theta[1:ncol(X)]
gamma <- theta[ncol(X)+1]
mu <- X %*% beta
alpha <- exp(gamma)
p <- 1/((1+exp(-mu))^alpha)
ll <- y * log(p) + (1 - y) * log(1 - p)
ll <- sum(ll)
return(ll)
}
res <- stats::optim(
par = par_scobit, # Define Startvalues for Parameters (Defualt: logit parameters)
fn = lik_func, # Input function from above
y = y, # Input DV
X = X, # Input Intercept and IVs
control = control, # Define Control-Option
method = method, # Define Climbing-Option
hessian = T, # Include Hessian Matrix in Return
lower = lower)
}
##### Adding information ---
if(ll=="linear" & !hetero | ll=="scobit"){
res$par[length(res$par)] <- # Transform gamma to sigma2/alpha
exp(res$par[length(res$par)])}
res$se <- sqrt(diag(solve(-(res$hessian)))) # Standard errors
t <- res$par / res$se # t-values
n <- nrow(X) # No. of Observations
df <- n - length(res$par) # Degrees of freedom
p <- 2 * pt(-abs(t), df) # p-values
low_ci <- res$par - qnorm((100-ci)/2/100, # lower bound ci
lower.tail=FALSE)*res$se
up_ci <- res$par + qnorm((100-ci)/2/100, # upper bound ci
lower.tail=FALSE)*res$se
res$table <- cbind(Est. = res$par, # Combine results in table format
St.Err. =res$se,
lowci = low_ci,
upci = up_ci,
T.Val = t,
P.Val = p)
res$betas <- res$table[1:ncol(X),] # Take only Beta-Coefficients
if(ll=="ologit" | ll=="oprobit"){
if(missing(IV)){IV = c(paste("Var", 1:(ncol(X))))}
rownames(res$table) <- c(IV, paste("Tau", 1:(ncol(L)-1)))
rownames(res$betas) <- c(IV)
}
else{
rownames(res$betas) <- c("Int.", IV) # Give Table Row-Names
}
colnames(res$betas)[3] <- colnames(res$table)[3] <- # Label CIs correctly
paste("low", paste0(ci,"%"), "CI")
colnames(res$betas)[4] <- colnames(res$table)[4] <-
paste("up", paste0(ci,"%"), "CI")
if(ll=="logit" | ll=="probit"){
res$aic = -2 * res$value + 2 * (ncol(X)+1)
res$bic = -2 * res$value + log(n) * (ncol(X)+1)
}
if(ll=="ologit" | ll=="oprobit"){
res$aic = -2 * res$value + 2 * (ncol(X)+length(unique(y))-1)
res$bic = -2 * res$value + log(n) * (ncol(X)+length(unique(y))-1)
}
if(hetero){
res$gammas <- res$table[(ncol(X)+1):(ncol(X)+ncol(Z)),] # Take only Gamma-Coefficients
rownames(res$gammas) <- c("Int.", ZV) # Give Table Row-Names
colnames(res$gammas)[3:4] <- colnames(res$betas)[3:4] # Label CIs correctly
res$meanvar <- apply(exp(Z %*% res$par[(ncol(X)+1):(ncol(X)+ncol(Z))]),2,mean)
}
if(ll=="scobit"){
# Compute log-likelihood ratio between scobit and logit models
R <- 2 * (res$value - res_logit$value)
llratio <- pchisq(R,
df = 1,
lower.tail = FALSE)
# --> Significant Result indicates that the scobit model is better
}
##### Optional: Add bootstrapped SE, p- and t-values ---
if(bootstrap){
if (seed==T){set.seed(1312)} # Set Seed for replicability
if(hetero){
mle_bs <- function(){
data <- cbind(y,X,Z) # Prepare Data Set
bs_samp <- # Draw Bootstrap Sample
data[sample(1:samplesize, replace = TRUE),]
y_bs <- bs_samp[,1] # Index Bootstrap DV
X_bs <- bs_samp[,2:(ncol(X)+1)] # Index Int. and Bootstrap IVs
Z_bs <- bs_samp[,-c(1:(ncol(X)+1))] # Index Sigma2 IVs
res_bs <- stats::optim( # Running similar optimization as above:
par = par, # Define Startvalues for Parameters
fn = lik_func, # Input function from above
y = y_bs, # Input Bootstrap DV
X = X_bs, # Input Bootstrap Intercept and IVs
Z = Z_bs, # Input Bootstrap Sigma2 IVs
control = control, # Define Control-Option
method = method, # Define Climbing-Option
lower = lower)
return(res_bs$par)}
} else if(ll=="ologit" | ll=="oprobit"){
mle_bs <- function(){
data <- cbind(y,X) # Prepare Data Set
bs_samp <- # Draw Bootstrap Sample
data[sample(1:samplesize, replace = TRUE),]
if(length(unique(bs_samp[,1])) != length(unique(y))){
stop("Bootstrapped samples do not include all DV categories. \n
I suggest using a different seed or specify higher samplesize.")
}
y_bs <- bs_samp[,1] # Index Bootstrap DV
X_bs <- bs_samp[,2:(ncol(X)+1)] # Index Bootstrap IVs
cats_bs <- sort(unique(y_bs)) # Different categories in dep.var
K_bs <- length(unique(y_bs)) # Number of categories
L_bs <- matrix(NA, nrow = length(y_bs), ncol = K_bs) # Empty indicator matrix
for (j in 1:K_bs) { # Fills indicator matrix
L_bs[, j] <- y_bs == cats[j]
}
res_bs <- stats::optim( # Running similar optimization as above:
par = par, # Define Startvalues for Parameters
fn = lik_func, # Input function from above
L = L_bs, # Input Bootstrap DV
X = X_bs, # Input Bootstrap Intercept and IVs
control = control, # Define Control-Option
method = method, # Define Climbing-Option
lower = lower)
return(res_bs$par)}
} else {
mle_bs <- function(){
data <- cbind(y, X) # Prepare Data Set
bs_samp <- # Draw Bootstrap Sample
data[sample(1:samplesize, replace = TRUE),]
y_bs <- bs_samp[,1] # Index Bootstrap DV
X_bs <- bs_samp[,2:ncol(bs_samp)] # Index Int. and Bootstrap IVs
suppressWarnings(
res_bs <- stats::optim( # Running similar optimization as above:
if(ll=="scobit"){par = par_scobit}
else{par = par}, # Define Startvalues for Parameters
fn = lik_func, # Input function from above
y = y_bs, # Input Bootstrap DV
X = X_bs, # Input Bootstrap Intercept and IVs
control = control, # Define Control-Option
method = method, # Define Climbing-Option
lower = lower) )
return(res_bs$par)
}
}
suppressWarnings(
bs_res <- t(replicate(bs.no, mle_bs())) # Repeat this procedure bs.no times
)
res$bs_se <- sqrt(apply(bs_res, 2, var)) # Extract the Bootstrap standard errors
t_bs <- res$par / res$bs_se # Bootstrapped t-values
p_bs <- 2 * pt(-abs(t_bs), df) # Bootstrapped p-values
low_ci_bs <- res$par - qnorm((100-ci)/2/100, # lower bound ci
lower.tail=FALSE)*res$bs_se
up_ci_bs <- res$par + qnorm((100-ci)/2/100, # upper bound ci
lower.tail=FALSE)*res$bs_se
res$table_bs <- cbind(Est. = res$par, # Combine results in table format
Boot.SE =res$bs_se,
lowci = low_ci_bs,
upci = up_ci_bs,
Boot.T.Val = t_bs,
Boot.P.Val = p_bs)
if(ll=="ologit" | ll=="oprobit"){
res$betas_bs <- res$table_bs
rownames(res$table_bs) <- rownames(res$table)
} else {res$betas_bs <- res$table_bs[1:ncol(X),] # Exclude Sigma^2-Row
rownames(res$betas_bs) <- rownames(res$betas) # Give Table Row-Names
}
colnames(res$betas_bs)[3:4] <- colnames(res$table_bs)[3:4] <- colnames(res$betas)[3:4] # Label CIs correctly
if(hetero){
res$gammas_bs <- res$table_bs[(ncol(X)+1):(ncol(X)+ncol(Z)),] # Take only Gamma-Coefficients
rownames(res$gammas_bs) <- c("Int.", ZV) # Give Table Row-Names
colnames(res$gammas_bs)[3:4] <- colnames(res$betas)[3:4] # Label CIs correctly
res$meanvar <- apply(exp(Z %*% res$par[(ncol(X)+1):(ncol(X)+ncol(Z))]),2,mean)
}
}
# Add Test for proportional odds assumption (logit)
if(parallel){
M <- sort(unique(y)) # Creates Ordered Vector with all values in y
M <- M[c(-1)] # Excludes first value
Y <- matrix(NA, nrow = length(y), ncol = length(M))
X_test <- cbind(1, X) # Add intercept
for(i in M){
Y[,i] <- ifelse(as.numeric(y) < i, 0, 1)
}
test_lik <- function(theta, y, X) {
# theta consists merely of beta (dim is ncol(X))
beta <- theta[1:ncol(X)]
# linear predictor; make sure that X is stored as.matrix
mu <- X %*% beta
# link function
if(ll=="ologit"){p <- 1/(1 + exp(-mu))}
if(ll=="oprobit"){p <- pnorm(mu)}
# log-likelihood
ll <- y * log(p) + (1 - y) * log(1 - p)
# sum
ll <- sum(ll)
return(ll)
}
prop <- list()
res$prop_beta <- matrix(NA, nrow = (1+ncol(X)+length(M)), ncol = (1 + length(M)))
res$prop_se <- matrix(NA, nrow = (1+ncol(X)+length(M)), ncol = (1 + length(M)))
if(missing(IV)){IV = c(paste("Var", 1:(ncol(X))))}
rownames(res$prop_se) <- rownames(res$prop_beta) <- c("Int.", IV, paste("Tau", 1:(length(M))))
res$prop_beta[,1] <- c(NA, res$par)
res$prop_se[,1] <- c(NA, res$se)
for(i in 1:ncol(Y)){
prop[[i]] <- stats::optim(
par = c(0, par[1:ncol(X)]), # Define Startvalues for Parameters
fn = test_lik, # Input function from above
y = Y[,i], # Input DV
X = X_test, # Input Intercept and IVs
control = control, # Define Control-Option
method = method, # Define Climbing-Option
hessian = T,
lower = lower)
prop[[i]]$se <- sqrt(diag(solve(-(prop[[i]]$hessian)))) # Standard error
res$prop_beta[,1+i] <- c(prop[[i]]$par, rep(NA, length(M)))
res$prop_se[,1+i] <- c(prop[[i]]$se, rep(NA, length(M)))
}
colnames(res$prop_beta) <- colnames(res$prop_se) <-
c("Ordered", paste(paste0("<", min(M):max(M)), "= 0"))
}
##### Format Output ---
if (missing(digits)) { # Prints Coefficient-Table,
cat("Beta-Coefficients \n") # Log.Likelihood and Variance &
if(ll=="ologit" | ll=="oprobit"){print(res$table)} else {print(res$betas)} # saves output
cat("\n")
if(hetero){
cat("Gamma-Coefficients \n")
print(res$gammas)
cat("\n")}
if(bootstrap){
cat("Bootstrapping \n")
if(ll=="ologit" | ll=="oprobit"){print(res$table_bs)} else {print(res$betas_bs)}
cat("\n")
if(hetero){
print(res$gammas_bs)
cat("\n")}}
cat("Log. Likelihood:", res$value, "\n")
if(ll=="linear"){
if(hetero){
cat("Mean Var.", res$meanvar)
} else{ cat("Variance:", res$par[length(res$par)])}
} else if(ll=="scobit"){
cat("Alpha:", res$par[length(res$par)])
cat("\n")
cat("Likelihood-ratio test of alpha=1: Prob > chi2 =", llratio)
if(llratio <= 0.05){
cat("\n")
cat(" --> Scobit-Model should be prefered")
} else{
cat("\n")
cat(" --> Logit-Model should be prefered")
}
} else {
cat("AIC:", res$aic)
cat("\n")
cat("BIC:", res$bic)
}
if(parallel == T){
cat("\n")
cat("\n", "Coefficients Proportional?", "\n")
print(res$prop_beta, na.print = "")
cat("Standard Errors Proportional?", "\n")
print(res$prop_se, na.print = "")
}
invisible(res)
}
else{
cat("Beta-Coefficients \n") # Prints Coefficient-Table,
if(ll=="ologit" | ll=="oprobit"){print(apply(res$table, c(1,2),round,digits)) # Log.Likelihood and Variance
} else {print(apply(res$betas, c(1,2),round,digits))} # rounded to digits-argument &
cat("\n") # saves output
if(hetero){
cat("Gamma-Coefficients \n")
res$gammas <- apply(res$gammas, c(1,2),round,digits)
print(res$gammas)
cat("\n")}
if(bootstrap){
cat("Bootstrapping \n")
if(ll=="ologit" | ll=="oprobit"){print(apply(res$table_bs, c(1,2),round,digits))
} else {print(apply(res$betas_bs, c(1,2),round,digits))}
cat("\n")
if(hetero){
print(apply(res$gammas_bs, c(1,2),round,digits))
cat("\n")}}
cat("Log. Likelihood:", round(res$value,digits), "\n")
if(ll=="linear"){
if(hetero){
cat("Mean Var.", round(res$meanvar, digits))
} else{cat("Variance:", round(res$par[length(res$par)], digits))}
} else if(ll=="scobit"){
cat("Alpha:", round(res$par[length(res$par)], digits))
cat("\n")
cat("Likelihood-ratio test of alpha=1: Prob > chi2 =", round(llratio, digits))
if(llratio <= 0.05){
cat("\n")
cat(" --> Scobit-Model should be prefered")
} else{
cat("\n")
cat(" --> Logit-Model should be prefered")
}
} else {
cat("AIC:", round(res$aic, digits))
cat("\n")
cat("BIC:", round(res$bic, digits))
}
if(parallel == T){
cat("\n")
cat("\n", "Coefficients Proportional?", "\n")
print(apply(res$prop_beta, c(1,2),round,digits), na.print = "--")
cat("Standard Errors Proportional?", "\n")
print(apply(res$prop_se, c(1,2),round,digits), na.print = "--")
}
invisible(res)
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.