Nothing
#' Master Estimation Function
#'
#' The primary estimation function for conducting the optimization. The function
#' is typically called through the \link{mars} function, but can be called here
#' directly.
#'
#' @param formula The formula used for specifying the fixed and random structure.
#' Used for univariate and multilevel structures.
#' @param effect_name Character string representing the name of the effect size
#' column in the data.
#' @param studyID Character string representing the study ID
#' @param estimation_method Type of estimation used, either "REML" or "MLE", REML is the default
#' @param optim_method Optimization method that is passed to the optim function.
#' Default is 'L-BFGS-B'.
#' @param structure Between studies covariance structure, default is "UN" or unstructured. See details for more specifics.
#' @param tol Tolerance of the optimization, default is 1E-10.
#' @param data Data used for analysis
#' @param effectID Character string representing the effect size ID
#' @param variance Character string representing the name of the variance of the effect size in the data.
#' @param varcov_type Type of variance covariance matrix computed. Default is
#' 'cor_weighted' for correlations or 'smd_outcome' for standardized mean differences.
#' @param weights User specified matrix of weights.
#' @param intercept Whether a model intercept should be specified, default is FALSE
#' meaning no intercept. See details for more information.
#' @param N Character string representing the sample size of the studies.
#' @param missing What to do with missing data, default is 'remove'
#' @param robustID A character vector specifying the cluster group to use for computing the robust standard errors.
#' @param multivariate_covs A one-sided formula to specify the covariates used in a multivariate analysis.
#' @param tol Tolerance for estimating, passed to `optim`
#' @param ... Additional arguments to pass to `optim`.
#'
#' @importFrom stats na.omit optim
#'
#' @returns Output is a named list; The output returns the estimated parameters,
#' fit statistics, estimation inputs.
#'
#' @export
estimation <- function(formula = NULL,
effect_name = NULL, studyID = NULL,
effectID = NULL,
variance = NULL,
data,
estimation_method = 'REML',
optim_method = "L-BFGS-B",
structure = 'UN',
varcov_type,
weights = NULL,
intercept = FALSE,
N = NULL,
missing = 'remove',
robustID = NULL,
multivariate_covs = NULL,
tol = 1E-10, ...) {
model_formula_chunks <- parse_formula(formula)
# effects <- model_formula_chunks[['outcome']]
if(length(model_formula_chunks[['randomeffect']]) > 0) {
random_components <- parse_randomeffect(model_formula_chunks[['randomeffect']])
random_names <- parse_randomname(model_formula_chunks[['randomeffect']])
Z_list <- Z_matrix(random_components, data = data)
dim_random <- lapply(seq_along(random_names), function(xx)
length(unique(data[, random_names][,xx])))
names(dim_random) <- random_names
random_data <- unique(data[, random_names])
} else {
Z_list <- NULL
random_names <- NULL
dim_random <- NULL
random_data <- NULL
}
if(!varcov_type %in% c('outcome', 'multilevel', 'univariate')) {
unique_n <- unique(data[[N]])
} else {
unique_n <- NULL
}
Ss <- within_varcov(data, effect_name = effect_name, study = studyID,
N = unique_n,
type = varcov_type,
variance = variance)
if(varcov_type != 'univariate') {
data <- cbind(data, var = do.call("c", lapply(seq_along(Ss),
function(xx) diag(Ss[[xx]]))))
}
if(missing == 'remove') {
data <- na.omit(data)
}
nStudy <- length(unique(data[[studyID]]))
if(!is.null(effectID)) {
yID <- split(as.numeric(data[[effectID]]), data[[studyID]])
q <- length(unique(data[[effectID]]))
X_full <- design_matrix(data = data, effectID = effectID,
studyID = studyID, intercept = intercept,
unlist = TRUE,
multivariate_covs = NULL)
X_full_int <- design_matrix(data = data, effectID = effectID,
studyID = studyID, intercept = TRUE,
unlist = TRUE)
X_design <- design_matrix(data = data, effectID = effectID,
studyID = studyID, intercept = intercept,
unlist = FALSE,
multivariate_covs = multivariate_covs)
if(!is.null(multivariate_covs)) {
q_f <- ncol(X_design[[1]])
} else {
q_f <- q
}
# q <- ncol(X_full)
QE_w <- QE_within(data, effectID = effectID,
effectsize_name = effect_name,
S_name = 'var')
}
if(!is.null(formula)) {
if(structure == 'multilevel') {
yID <- lapply(Ss, seq_length_ncol)
} else {
yID <- lapply(Ss, function(xx) !is.na(xx))
}
X_full <- X_full_int <- design_matrix(data = data, formula = model_formula_chunks[['fixed']],
unlist = TRUE, studyID = studyID)
X_design <- design_matrix(data = data, formula = model_formula_chunks[['fixed']],
unlist = FALSE, studyID = studyID)
effect_name <- model_formula_chunks[['outcome']]
QE_w <- NULL
}
invS <- inverse_psi(varcov = Ss, Tau_est = NULL,
num_studies = nStudy,
yID = yID,
structure = structure)
beta_fe <- b_fe(X = X_full, invS = invS, Y = data[[effect_name]])
QE <- QE_compute(X = X_full_int, b = beta_fe,
Y = data[[effect_name]], invS = invS)
if(structure == 'multilevel') {
q_f <- ncol(X_full)
q_r <- Z_mat_helper(random_components, data = data)
q <- q_f + q_r
params <- params_helper(q_f = q_f,
q_r = q_r,
structure = structure)
} else if(structure == 'univariate') {
q_f <- ncol(X_full)
q_r <- 1
q <- q_f + q_r
params <- params_helper(q_f = q_f,
q_r = q_r,
structure = structure)
} else {
q_r <- length(unique(data[[effectID]]))
params <- params_helper(q_f = q_f, q_r = q_r, structure = structure)
}
if(estimation_method == "REML") {
if(!is.null(formula)) {
X <- design_matrix(data = data, formula = model_formula_chunks[['fixed']],
unlist = FALSE, studyID = studyID)
} else {
X <- design_matrix(data, effectID, studyID, intercept = intercept)
}
} else {
X <- NULL
}
if(!estimation_method == 'FE') {
if(is.null(Z_list)) {
optim_values <- optim(par = params[['params']], q_f = q_f, q_r = q_r,
nStudy = nStudy, yID = yID,
effect_name = data[[effect_name]], Ss = Ss, StudyID = data[[studyID]], X = X,
X_design = X_design,
estimation_method = estimation_method, structure = structure,
fn = loglikef,
lower = params[['lower']], upper = params[['upper']],
method = optim_method, control = list(factr = tol),
...)
if(structure == 'univariate') {
est_values <- return_estimates(q_f = q_f, q_r = q_r,
estimated_pars = optim_values[['par']],
structure = structure)
} else {
est_values <- return_estimates(q_f = q_f, q_r = q_r,
estimated_pars = optim_values[['par']],
structure = structure)
}
} else {
optim_values <- optim(par = params[['params']], nStudy = nStudy, yID = yID,
effect_name = data[[effect_name]], Ss = Ss, StudyID = data[[studyID]], X = X,
X_design = X_design,
estimation_method = estimation_method, structure = structure,
q_f = q_f, q_r = q_r, Z = Z_list,
fn = loglikef_mixed,
lower = params[['lower']], upper = params[['upper']],
method = optim_method, control = list(factr = tol),
hessian = FALSE,
...)
est_values <- return_estimates(q_f = q_f, q_r = q_r,
estimated_pars = optim_values[['par']],
structure = structure)
}
invPsi <- inverse_psi(varcov = Ss, Tau_est = est_values[['Tau']],
num_studies = nStudy,
yID = yID,
Z_list = Z_list,
structure = structure)
beta_r <- b_mix(X = do.call("rbind", X_design), invPsi = invPsi,
Y = data[[effect_name]])
varcov_beta <- covb_mix(do.call("rbind", X_design), invPsi)
beta_r_int <- b_mix(X = X_full_int, invPsi = invPsi,
Y = data[[effect_name]])
varcov_beta_int <- covb_mix(X_full_int, invPsi)
QM <- QM(mu_est = beta_r_int,
mu_cov = varcov_beta_int)
F_test <- f_test(QMmix = QM,
QEmix = QE_mix(X = X_full_int,
bmix = beta_r_int,
Y = data[[effect_name]],
invPsi = invPsi),
num_obs = nrow(data))
I2_within <- I2_within(X = X_full_int, invS = invS,
Tau_est = est_values[['Tau']],
q = q)
I2_jackson <- I2_jackson(X = X_full, invS = invS,
mu_cov = varcov_beta)
I2_between <- I2_between(X = X_full_int, invS = invS,
Tau_est = est_values[['Tau']],
q = q)
fit_stats <- fit_statistics(loglik = optim_values[['value']], estimation_method = estimation_method,
design_matrix = X_full, q = q,
num_params = length(optim_values[['par']]))
fitted_values <- predicted_values(beta_r, do.call("rbind", X_design))
residuals <- residual_values(fitted_values, data[[effect_name]])
if(!is.null(robustID)) {
Psi <- inverse_psi(varcov = Ss, Tau_est = est_values[['Tau']],
num_studies = nStudy,
yID = yID,
Z_list = Z_list,
structure = structure,
inverse = FALSE)
robust <- robust_se(data = data,
clusterID = robustID,
Psi = Psi,
X = X_full,
cov_b = varcov_beta,
residuals = residuals)
} else {
robust <- NULL
}
} else {
est_values <- NULL
formula <- NULL
beta_r <- NULL
varcov_beta <- covb_mix(X_full, invS)
varcov_beta_int <- NULL
random_names <- NULL
dim_random <- NULL
random_data <- NULL
QM <- NULL
F_test = NULL
I2_within = NULL
I2_between = NULL
I2_jackson = NULL
fit_stats = NULL
fitted_values <- predicted_values(beta_fe, X_full)
residuals <- residual_values(fitted_values, data[[effect_name]])
robust = NULL
structure = structure
}
list(formula = formula,
estimator = estimation_method,
est_values = est_values,
beta_r = beta_r,
beta_fe = beta_fe,
varcov_beta = varcov_beta,
varcov_beta_int = varcov_beta_int,
random_names = random_names,
dim_random = dim_random,
random_data = random_data,
QE = QE,
QE_w = QE_w,
QM = QM,
F_test = F_test,
I2_within = I2_within,
I2_jackson = I2_jackson,
I2_between = I2_between,
fit_stats = fit_stats,
effect_size = data[[effect_name]],
effect_size_name = effect_name,
fitted = fitted_values,
residuals = residuals,
robust_se = robust,
structure = structure,
q_f = q_f,
q_r = q_r,
design_matrix =
if(intercept) {
X_full_int
} else {
do.call("rbind", X_design)
},
weight_matrix =
if(estimation_method != 'FE') {
invPsi
} else {
invS
},
Ss = Ss
)
}
loglikef <- function(params, X, effect_name, q_f, q_r,
yID, Ss, nStudy, StudyID,
X_design,
estimation_method = 'MLE',
structure = "UN") {
if(structure == 'univariate') {
Tau <- params[(q_f+1):(q_f + q_r)]
mu <- params[1:q_f]
#mu <- 0
initial_matrix <- 0
} else {
Tau <- correlation_structures(params, q_r = q_r, q_f = q_f,
structure = structure)
mu <- params[1:q_f]
initial_matrix <- matrix(0, q_f, q_f)
}
initial_value <- 0
effect_size <- split(effect_name, StudyID)
for(i in 1:nStudy) {
if(structure == 'univariate') {
cov <- Ss[[i]][yID[[i]]] + Tau[yID[[i]]]
} else {
cov <- Ss[[i]][yID[[i]], yID[[i]]] + Tau[yID[[i]], yID[[i]]]
}
# es_mean <- effect_size[[i]] - X_design[[i]] %*% as.matrix(mu[yID[[i]]])
es_mean <- effect_size[[i]] - X_design[[i]] %*% as.matrix(mu)
es_mean <- as.matrix(es_mean)
logdet <- determinant(as.matrix(cov), logarithm = TRUE)
if(logdet$sign <= 0) logdet <- -Inf else logdet <- logdet$modulus
temp <- logdet + as.numeric(t(es_mean) %*% chol2inv(chol(cov)) %*% es_mean)
initial_value <- initial_value + temp
if(estimation_method == 'REML') {
if(length(yID[[i]]) == 1) {
cov <- as.numeric(cov)
# initial_matrix <- initial_matrix + tcrossprod(X_design[[i]]) / cov
initial_matrix <- initial_matrix + crossprod(X_design[[i]], (X_design[[i]] / cov))
} else {
initial_matrix <- initial_matrix + t(X_design[[i]]) %*% chol2inv(chol(cov)) %*% X_design[[i]]
}
logdet_reml <- determinant(as.matrix(initial_matrix), logarithm = TRUE)
if(logdet_reml$sign <= 0) logdet_reml <- -Inf else logdet_reml <- logdet_reml$modulus
initial_value_reml <- initial_value + logdet_reml
}
}
if(estimation_method == 'MLE') {
as.numeric(initial_value)
} else {
as.numeric(initial_value_reml)
}
}
loglikef_mixed <- function(params, X, Z, effect_name, Ss, nStudy,
StudyID, X_design, q_f, q_r, yID,
estimation_method = 'MLE',
structure) {
mu <- params[1:q_f]
#mu <- 0
Tau <- params[(q_f+1):(q_f + q_r)]
initial_value <- 0
initial_matrix <- matrix(0, q_f, q_f)
effect_size <- split(effect_name, StudyID)
if(is.null(Z)) {
# to come...
} else {
Tau <- lapply(seq_along(Z), function(i)
sum_list(lapply(seq_along(Tau),function(j)
as.matrix(Matrix::bdiag(lapply(Z[[i]][[j]],function(x)
x %*% Tau[[j]] %*% t(x)))))))
}
for(i in 1:nStudy) {
cov <- Ss[[i]] + Tau[[i]]
es_mean <- effect_size[[i]] - X_design[[i]] %*% as.matrix(mu)
es_mean <- as.matrix(es_mean)
logdet <- determinant(as.matrix(cov), logarithm = TRUE)
if(logdet$sign <= 0) logdet <- -Inf else logdet <- logdet$modulus
temp <- logdet + as.numeric(t(es_mean) %*% chol2inv(chol(cov)) %*% es_mean)
initial_value <- initial_value + temp
if(estimation_method == 'REML') {
if(length(yID[[i]]) == 1) {
cov <- as.numeric(cov)
initial_matrix <- initial_matrix + tcrossprod(X[[i]]) / cov
} else {
initial_matrix <- initial_matrix + t(X[[i]]) %*% chol2inv(chol(cov)) %*% X[[i]]
}
logdet_reml <- determinant(as.matrix(initial_matrix), logarithm = TRUE)
if(logdet_reml$sign <= 0) logdet_reml <- -Inf else logdet_reml <- logdet_reml$modulus
initial_value_reml <- initial_value + logdet_reml
}
}
if(estimation_method == 'MLE') {
as.numeric(initial_value)
} else {
as.numeric(initial_value_reml)
}
}
return_estimates <- function(q_f = NULL, q_r = NULL,
estimated_pars, structure) {
if(structure != 'multilevel') {
if(structure == 'univariate') {
Tau_estimates <- correlation_export(estimated_pars = estimated_pars,
q_f = q_f,
q_r = q_r,
structure = structure)
mu_estimates <- estimated_pars[1:q_f]
} else {
Tau_estimates <- correlation_export(estimated_pars = estimated_pars,
q_f = q_f,
q_r = q_r,
structure = structure)
mu_estimates <- estimated_pars[1:q_f]
}
} else {
Tau_estimates <- correlation_export(estimated_pars = estimated_pars,
q_f = q_f, q_r = q_r,
structure = structure)
mu_estimates <- estimated_pars[1:q_f]
}
list(mu = mu_estimates,
Tau = Tau_estimates)
}
design_matrix <- function(data, effectID, studyID, intercept,
formula = NULL,
unlist = FALSE,
multivariate_covs = NULL) {
num_effects <- as.numeric(table(data[[studyID]]))
start <- c(1, cumsum(num_effects)[1:(length(num_effects)-1)]+1)
if(is.null(formula)) {
if(intercept) {
dm <- model.matrix(~ factor(data[[effectID]]))
} else {
dm <- model.matrix(~ factor(data[[effectID]]) - 1)
}
} else {
dm <- model.matrix(formula, data = data)
}
if(!is.null(multivariate_covs)) {
dm2 <- model.matrix(as.formula(paste0('~', multivariate_covs, '-1')[2]), data = data)
dm <- cbind(dm, dm2)
}
fixed_names <- colnames(dm)
dm_list <- lapply(seq_along(num_effects), function(xx)
dm[start[xx]:cumsum(num_effects)[xx], , drop = FALSE]
)
if(unlist) {
# dm
dm_df <- do.call('rbind', dm_list)
names(dm_df) <- fixed_names
} else {
dm_df <- dm_list
for(ii in seq_along(dm_df)) {
names(dm_df[[ii]]) <- fixed_names
}
}
dm_df
}
var_cov_fe <- function(q, nStudy, Ss, Tau, yID, X) {
cov_matrix <- matrix(0, q, q)
for(i in 1:nStudy){
cov <- Ss[[i]][yID[[i]], yID[[i]] ] + Tau[yID[[i]], yID[[i]] ]
if(length(yID[[i]]) == 1){
cov <- as.numeric(cov)
cov_matrix <- cov_matrix + t(X[[i]]) %*% X[[i]] / cov
}else{
cov_matrix <- cov_matrix + t(X[[i]]) %*% chol2inv(chol(cov)) %*% X[[i]]
}
}
mu.cov <- chol2inv(chol(cov_matrix))
list(
var_cov = mu.cov,
SE = sqrt(diag(mu.cov))
)
}
params_helper <- function(q_f = NULL,
q_r = NULL,
structure = 'UN') {
switch(structure,
UN = unstructured_param(q_f, q_r),
DIAG1 = diag1_param(q_f, q_r),
DIAG2 = diag2_param(q_f, q_r),
CS = cs_param(q_f, q_r),
HCS = hcs_param(q_f, q_r),
AR1 = ar1_param(q_f, q_r),
multilevel = multilevel_param(q_f, q_r),
univariate = multilevel_param(q_f, q_r)
)
}
unstructured_param <- function(q_f, q_r) {
params <- rep(0, q_f + q_r*(q_r + 1)/2)
lower <- rep(-Inf, q_f + q_r*(q_r + 1)/2)
upper <- rep(Inf, q_f + q_r*(q_r + 1)/2)
params[q_f + cumsum(1:q_r)] <- 1
lower[q_f + cumsum(1:q_r)] <- 10^(-6)
list(
params = params,
lower = lower,
upper = upper
)
}
# Unique variance across random dimensions
diag1_param <- function(q_f, q_r) {
params <- rep(0, q_f + q_r)
lower <- rep(-Inf, q_f + q_r)
upper <- rep(Inf, q_f + q_r)
params[q_f + 1:q_r] <- 1
lower[q_f + 1:q_r] <- 10^(-6)
list(
params = params,
lower = lower,
upper = upper
)
}
# Pooled variance
diag2_param <- function(q_f, q_r) {
params <- rep(0, q_f + 1)
lower <- rep(-Inf, q_f + 1)
upper <- rep(Inf, q_f + 1)
params[q_f + 1] <- 1
lower[q_f + 1] <- 10^(-6)
list(
params = params,
lower = lower,
upper = upper
)
}
cs_param <- function(q_f, q_r) {
params <- rep(0, q_f + 2)
lower <- rep(-Inf, q_f + 2)
upper <- rep(Inf, q_f + 2)
upper[q_f + 2] <- 1
params[q_f + 1:2] <- 1
lower[q_f + 1] <- 10^(-6)
lower[q_f + 2] <- -1
list(
params = params,
lower = lower,
upper = upper
)
}
hcs_param <- function(q_f, q_r) {
params <- rep(0, q_f + q_r + 1)
lower <- rep(-Inf, q_f + q_r + 1)
upper <- rep(Inf, q_f + q_r + 1)
upper[q_f + q_r + 1] <- 1
params[-1:-q_f] <- 1
lower[(q_f+1):(q_f + q_r)] <- 10^(-6)
lower[q_f + q_r + 1] <- -1
list(
params = params,
lower = lower,
upper = upper
)
}
ar1_param <- function(q_f, q_r) {
params <- rep(0, q_f + 2)
lower <- rep(-Inf, q_f + 2)
upper <- rep(Inf, q_f + 2)
upper[q_f + 2] <- 1
params[-1:-q_f] <- 1
lower[q_f + 1 ] <- 10^(-6)
lower[q_f + 2] <- -1
list(
params = params,
lower = lower,
upper = upper
)
}
multilevel_param <- function(q_f, q_r) {
params <- rep(0, q_f + q_r)
lower <- rep(-Inf, q_f + q_r)
upper <- rep(Inf, q_f + q_r)
params[(q_f + 1):(q_f + q_r)] <- 1
lower[(q_f + 1):(q_f + q_r)] <- 10^(-6)
list(
params = params,
lower = lower,
upper = upper
)
}
fit_statistics <- function(loglik, estimation_method,
design_matrix, q, num_params) {
if(estimation_method == 'REML') {
part1 <- -0.5 * (nrow(design_matrix) - ncol(design_matrix)) * log(2 * base::pi) + 0.5 * log(det(t(design_matrix) %*% design_matrix))
part2 <- .5 * loglik
logLik <- part1 - part2
Dev <- -2 * logLik
AIC <- -2 * logLik + 2 * num_params
BIC <- -2 * logLik + num_params * log(nrow(design_matrix) - ncol(design_matrix))
k <- ncol(design_matrix) + q + 2
AICc <- AIC + (2 * k * (k + 1)) / (nrow(design_matrix) - k - 1)
reml.fit <- as.data.frame(cbind(logLik, Dev, AIC, BIC, AICc))
names(reml.fit) <- c("logLik", "Dev", "AIC", "BIC", "AICc")
reml.fit
} else {
part1 <- -0.5 * nrow(design_matrix) * log(2 * base::pi)
part2 <- .5 * loglik
logLik <- part1 - part2
Dev <- -2 * logLik + 2 * (ncol(design_matrix) + q)
AIC <- -2 * logLik + 2* num_params
BIC <- -2 * logLik + num_params * log(nrow(design_matrix))
k <- ncol(design_matrix) + q + 2
AICc <- AIC + (2 * k * (k + 1)) / (nrow(design_matrix) - k - 1)
mle.fit <- as.data.frame(cbind(logLik, Dev, AIC, BIC, AICc))
names(mle.fit) <- c("logLik", "Dev", "AIC", "BIC", "AICc")
mle.fit
}
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.