R/estimation.r
In mars: Meta Analysis and Research Synthesis

Documented in estimation

#' Master Estimation Function
#'
#' The primary estimation function for conducting the optimization. The function
#' is typically called through the \link{mars} function, but can be called here
#' directly.
#'
#' @param formula The formula used for specifying the fixed and random structure.
#' Used for univariate and multilevel structures.
#' @param effect_name Character string representing the name of the effect size
#'   column in the data.
#' @param studyID Character string representing the study ID
#' @param estimation_method Type of estimation used, either "REML" or "MLE", REML is the default
#' @param optim_method Optimization method that is passed to the optim function.
#'  Default is 'L-BFGS-B'.
#' @param structure Between studies covariance structure, default is "UN" or unstructured. See details for more specifics.
#' @param tol Tolerance of the optimization, default is 1E-10.
#' @param data Data used for analysis
#' @param effectID Character string representing the effect size ID
#' @param variance Character string representing the name of the variance of the effect size in the data.
#' @param varcov_type Type of variance covariance matrix computed. Default is
#' 'cor_weighted' for correlations or 'smd_outcome' for standardized mean differences.
#' @param weights User specified matrix of weights.
#' @param intercept Whether a model intercept should be specified, default is FALSE
#'  meaning no intercept. See details for more information.
#' @param N Character string representing the sample size of the studies.
#' @param missing What to do with missing data, default is 'remove'
#' @param robustID A character vector specifying the cluster group to use for computing the robust standard errors.
#' @param multivariate_covs A one-sided formula to specify the covariates used in a multivariate analysis.
#' @param tol Tolerance for estimating, passed to `optim`
#' @param ... Additional arguments to pass to `optim`.
#'
#' @importFrom stats na.omit optim
#'
#' @returns Output is a named list; The output returns the estimated parameters,
#'    fit statistics, estimation inputs.
#'
#' @export
estimation <- function(formula = NULL,
                       effect_name = NULL, studyID = NULL,
                       effectID = NULL,
                       variance = NULL,
                       data,
                       estimation_method = 'REML',
                       optim_method = "L-BFGS-B",
                       structure = 'UN',
                       varcov_type,
                       weights = NULL,
                       intercept = FALSE,
                       N = NULL,
                       missing = 'remove',
                       robustID = NULL,
                       multivariate_covs = NULL,
                       tol = 1E-10, ...) {

  model_formula_chunks <- parse_formula(formula)
  # effects <- model_formula_chunks[['outcome']]
  if(length(model_formula_chunks[['randomeffect']]) > 0) {
    random_components <- parse_randomeffect(model_formula_chunks[['randomeffect']])
    random_names <- parse_randomname(model_formula_chunks[['randomeffect']])
    Z_list <- Z_matrix(random_components, data = data)
    dim_random <- lapply(seq_along(random_names), function(xx)
      length(unique(data[, random_names][,xx])))
    names(dim_random) <- random_names
    random_data <- unique(data[, random_names])
  } else {
    Z_list <- NULL
    random_names <- NULL
    dim_random <- NULL
    random_data <- NULL
  }


  if(!varcov_type %in% c('outcome', 'multilevel', 'univariate')) {
    unique_n <- unique(data[[N]])
  } else {
    unique_n <- NULL
  }

  Ss <- within_varcov(data, effect_name = effect_name, study = studyID,
                      N = unique_n,
                      type = varcov_type,
                      variance = variance)

  if(varcov_type != 'univariate') {
    data <- cbind(data, var = do.call("c", lapply(seq_along(Ss),
                                                  function(xx) diag(Ss[[xx]]))))
  }


  if(missing == 'remove') {
    data <- na.omit(data)
  }

  nStudy <- length(unique(data[[studyID]]))

  if(!is.null(effectID)) {
    yID <- split(as.numeric(data[[effectID]]), data[[studyID]])
    q <- length(unique(data[[effectID]]))

    X_full <- design_matrix(data = data, effectID = effectID,
                            studyID = studyID, intercept = intercept,
                            unlist = TRUE,
                            multivariate_covs = NULL)
    X_full_int <- design_matrix(data = data, effectID = effectID,
                                studyID = studyID, intercept = TRUE,
                                unlist = TRUE)
    X_design <- design_matrix(data = data, effectID = effectID,
                              studyID = studyID, intercept = intercept,
                              unlist = FALSE,
                              multivariate_covs = multivariate_covs)
    if(!is.null(multivariate_covs)) {
      q_f <- ncol(X_design[[1]])
    } else {
      q_f <- q
      }
    # q <- ncol(X_full)
    QE_w <- QE_within(data, effectID = effectID,
                      effectsize_name = effect_name,
                      S_name = 'var')
  }
  if(!is.null(formula)) {
    if(structure == 'multilevel') {
      yID <- lapply(Ss, seq_length_ncol)
    } else {
      yID <- lapply(Ss, function(xx) !is.na(xx))
    }

    X_full <- X_full_int <- design_matrix(data = data, formula = model_formula_chunks[['fixed']],
                        unlist = TRUE, studyID = studyID)
    X_design <- design_matrix(data = data, formula = model_formula_chunks[['fixed']],
                              unlist = FALSE, studyID = studyID)
    effect_name <- model_formula_chunks[['outcome']]
    QE_w <- NULL
  }


  invS <- inverse_psi(varcov = Ss, Tau_est = NULL,
                      num_studies = nStudy,
                      yID = yID,
                      structure = structure)
  beta_fe <- b_fe(X = X_full, invS = invS, Y = data[[effect_name]])

  QE <- QE_compute(X = X_full_int, b = beta_fe,
                   Y = data[[effect_name]], invS = invS)


  if(structure == 'multilevel') {
    q_f <- ncol(X_full)
    q_r <- Z_mat_helper(random_components, data = data)
    q <- q_f + q_r
    params <- params_helper(q_f = q_f,
                            q_r = q_r,
                            structure = structure)
  } else if(structure == 'univariate') {
    q_f <- ncol(X_full)
    q_r <- 1
    q <- q_f + q_r
    params <- params_helper(q_f = q_f,
                            q_r = q_r,
                            structure = structure)
  } else {
    q_r <- length(unique(data[[effectID]]))
    params <- params_helper(q_f = q_f, q_r = q_r, structure = structure)
  }


  if(estimation_method == "REML") {
    if(!is.null(formula)) {
      X <- design_matrix(data = data, formula = model_formula_chunks[['fixed']],
                         unlist = FALSE, studyID = studyID)
    } else {
      X <- design_matrix(data, effectID, studyID, intercept = intercept)
    }
  } else {
    X <- NULL
  }

  if(!estimation_method == 'FE') {

    if(is.null(Z_list)) {
      optim_values <- optim(par = params[['params']], q_f = q_f, q_r = q_r,
                            nStudy = nStudy, yID = yID,
                            effect_name = data[[effect_name]], Ss = Ss, StudyID = data[[studyID]], X = X,
                            X_design = X_design,
                            estimation_method = estimation_method, structure = structure,
                            fn = loglikef,
                            lower = params[['lower']], upper = params[['upper']],
                            method = optim_method, control = list(factr = tol),
                            ...)
      if(structure == 'univariate') {
        est_values <- return_estimates(q_f = q_f, q_r = q_r,
                                       estimated_pars = optim_values[['par']],
                                       structure = structure)
      } else {
        est_values <- return_estimates(q_f = q_f, q_r = q_r,
                                       estimated_pars = optim_values[['par']],
                                       structure = structure)
      }
    } else {
      optim_values <- optim(par = params[['params']], nStudy = nStudy, yID = yID,
                            effect_name = data[[effect_name]], Ss = Ss, StudyID = data[[studyID]], X = X,
                            X_design = X_design,
                            estimation_method = estimation_method, structure = structure,
                            q_f = q_f, q_r = q_r, Z = Z_list,
                            fn = loglikef_mixed,
                            lower = params[['lower']], upper = params[['upper']],
                            method = optim_method, control = list(factr = tol),
                            hessian = FALSE,
                            ...)
      est_values <- return_estimates(q_f = q_f, q_r = q_r,
                                     estimated_pars = optim_values[['par']],
                                     structure = structure)
    }

      invPsi <- inverse_psi(varcov = Ss, Tau_est = est_values[['Tau']],
                            num_studies = nStudy,
                            yID = yID,
                            Z_list = Z_list,
                            structure = structure)

      beta_r <- b_mix(X = do.call("rbind", X_design), invPsi = invPsi,
                             Y = data[[effect_name]])
      varcov_beta <- covb_mix(do.call("rbind", X_design), invPsi)

      beta_r_int <- b_mix(X = X_full_int, invPsi = invPsi,
                          Y = data[[effect_name]])
      varcov_beta_int <- covb_mix(X_full_int, invPsi)

      QM <- QM(mu_est = beta_r_int,
               mu_cov = varcov_beta_int)

      F_test <- f_test(QMmix = QM,
                       QEmix = QE_mix(X = X_full_int,
                                      bmix = beta_r_int,
                                      Y = data[[effect_name]],
                                      invPsi = invPsi),
                       num_obs = nrow(data))

      I2_within <- I2_within(X = X_full_int, invS = invS,
                             Tau_est = est_values[['Tau']],
                             q = q)
      I2_jackson <- I2_jackson(X = X_full, invS = invS,
                               mu_cov = varcov_beta)
      I2_between <- I2_between(X = X_full_int, invS = invS,
                               Tau_est = est_values[['Tau']],
                               q = q)

      fit_stats <- fit_statistics(loglik = optim_values[['value']], estimation_method = estimation_method,
                                  design_matrix = X_full, q = q,
                                  num_params = length(optim_values[['par']]))

      fitted_values <- predicted_values(beta_r, do.call("rbind", X_design))
      residuals <- residual_values(fitted_values, data[[effect_name]])
      if(!is.null(robustID)) {
        Psi <- inverse_psi(varcov = Ss, Tau_est = est_values[['Tau']],
                              num_studies = nStudy,
                              yID = yID,
                              Z_list = Z_list,
                              structure = structure,
                              inverse = FALSE)

        robust <- robust_se(data = data,
                            clusterID = robustID,
                            Psi = Psi,
                            X = X_full,
                            cov_b = varcov_beta,
                            residuals = residuals)
      } else {
        robust <- NULL
      }


  } else {
    est_values <- NULL
    formula <- NULL
    beta_r <- NULL
    varcov_beta <- covb_mix(X_full, invS)
    varcov_beta_int <- NULL
    random_names <- NULL
    dim_random <- NULL
    random_data <- NULL
    QM <- NULL
    F_test = NULL
    I2_within = NULL
    I2_between = NULL
    I2_jackson = NULL
    fit_stats = NULL
    fitted_values <- predicted_values(beta_fe, X_full)
    residuals <- residual_values(fitted_values, data[[effect_name]])
    robust = NULL
    structure = structure
  }

  list(formula = formula,
       estimator = estimation_method,
       est_values = est_values,
       beta_r = beta_r,
       beta_fe = beta_fe,
       varcov_beta = varcov_beta,
       varcov_beta_int = varcov_beta_int,
       random_names = random_names,
       dim_random = dim_random,
       random_data = random_data,
       QE = QE,
       QE_w = QE_w,
       QM = QM,
       F_test = F_test,
       I2_within = I2_within,
       I2_jackson = I2_jackson,
       I2_between = I2_between,
       fit_stats = fit_stats,
       effect_size = data[[effect_name]],
       effect_size_name = effect_name,
       fitted = fitted_values,
       residuals = residuals,
       robust_se = robust,
       structure = structure,
       q_f = q_f,
       q_r = q_r,
       design_matrix =
       if(intercept) {
         X_full_int
       } else {
         do.call("rbind", X_design)
       },
       weight_matrix =
         if(estimation_method != 'FE') {
           invPsi
         } else {
           invS
         },
       Ss = Ss

  )

}

loglikef <- function(params, X, effect_name, q_f, q_r,
                     yID, Ss, nStudy, StudyID,
                     X_design,
                     estimation_method = 'MLE',
                     structure = "UN") {

  if(structure == 'univariate') {
    Tau <- params[(q_f+1):(q_f + q_r)]
    mu <- params[1:q_f]
    #mu <- 0
    initial_matrix <- 0
  } else {
    Tau <- correlation_structures(params, q_r = q_r, q_f = q_f,
                                  structure = structure)
    mu <- params[1:q_f]
    initial_matrix <- matrix(0, q_f, q_f)
  }

  initial_value <- 0
  effect_size <- split(effect_name, StudyID)
  for(i in 1:nStudy) {
    if(structure == 'univariate') {
      cov <- Ss[[i]][yID[[i]]] + Tau[yID[[i]]]
    } else {
      cov <- Ss[[i]][yID[[i]], yID[[i]]] + Tau[yID[[i]], yID[[i]]]
    }
    # es_mean <- effect_size[[i]] - X_design[[i]] %*% as.matrix(mu[yID[[i]]])
    es_mean <- effect_size[[i]] - X_design[[i]] %*% as.matrix(mu)
    es_mean <- as.matrix(es_mean)
    logdet <- determinant(as.matrix(cov), logarithm = TRUE)
    if(logdet$sign <= 0) logdet <- -Inf else logdet <- logdet$modulus
    temp <- logdet + as.numeric(t(es_mean) %*% chol2inv(chol(cov)) %*% es_mean)
    initial_value <- initial_value + temp

    if(estimation_method == 'REML') {
      if(length(yID[[i]]) == 1) {
        cov <- as.numeric(cov)
        # initial_matrix <- initial_matrix + tcrossprod(X_design[[i]]) / cov
        initial_matrix <- initial_matrix + crossprod(X_design[[i]], (X_design[[i]] / cov))
      } else {
        initial_matrix <- initial_matrix + t(X_design[[i]]) %*% chol2inv(chol(cov)) %*% X_design[[i]]
      }
      logdet_reml <- determinant(as.matrix(initial_matrix), logarithm = TRUE)
      if(logdet_reml$sign <= 0) logdet_reml <- -Inf else logdet_reml <- logdet_reml$modulus
      initial_value_reml <- initial_value + logdet_reml
    }
  }
  if(estimation_method == 'MLE') {
    as.numeric(initial_value)
  } else {
    as.numeric(initial_value_reml)
  }

}

loglikef_mixed <- function(params, X, Z, effect_name, Ss, nStudy,
                             StudyID, X_design, q_f, q_r, yID,
                             estimation_method = 'MLE',
                             structure) {

  mu <- params[1:q_f]
  #mu <- 0
  Tau <- params[(q_f+1):(q_f + q_r)]
  initial_value <- 0
  initial_matrix <- matrix(0, q_f, q_f)
  effect_size <- split(effect_name, StudyID)

  if(is.null(Z)) {
    # to come...
  } else {
    Tau <- lapply(seq_along(Z), function(i)
      sum_list(lapply(seq_along(Tau),function(j)
        as.matrix(Matrix::bdiag(lapply(Z[[i]][[j]],function(x)
          x %*% Tau[[j]] %*% t(x)))))))
  }

  for(i in 1:nStudy) {

    cov <- Ss[[i]] + Tau[[i]]
    es_mean <- effect_size[[i]] - X_design[[i]] %*% as.matrix(mu)
    es_mean <- as.matrix(es_mean)
    logdet <- determinant(as.matrix(cov), logarithm = TRUE)
    if(logdet$sign <= 0) logdet <- -Inf else logdet <- logdet$modulus
    temp <- logdet + as.numeric(t(es_mean) %*% chol2inv(chol(cov)) %*% es_mean)
    initial_value <- initial_value + temp

    if(estimation_method == 'REML') {
      if(length(yID[[i]]) == 1) {
        cov <- as.numeric(cov)
        initial_matrix <- initial_matrix + tcrossprod(X[[i]]) / cov
      } else {
        initial_matrix <- initial_matrix + t(X[[i]]) %*% chol2inv(chol(cov)) %*% X[[i]]
      }
      logdet_reml <- determinant(as.matrix(initial_matrix), logarithm = TRUE)
      if(logdet_reml$sign <= 0) logdet_reml <- -Inf else logdet_reml <- logdet_reml$modulus
      initial_value_reml <- initial_value + logdet_reml
    }
  }
  if(estimation_method == 'MLE') {
    as.numeric(initial_value)
  } else {
    as.numeric(initial_value_reml)
  }

}



return_estimates <- function(q_f = NULL, q_r = NULL,
                             estimated_pars, structure) {

  if(structure != 'multilevel') {
    if(structure == 'univariate') {
      Tau_estimates <- correlation_export(estimated_pars = estimated_pars,
                                          q_f = q_f,
                                          q_r = q_r,
                                          structure = structure)
      mu_estimates <- estimated_pars[1:q_f]
    } else {
      Tau_estimates <- correlation_export(estimated_pars = estimated_pars,
                                          q_f = q_f,
                                          q_r = q_r,
                                          structure = structure)
      mu_estimates <- estimated_pars[1:q_f]
    }
  } else {
    Tau_estimates <- correlation_export(estimated_pars = estimated_pars,
                                        q_f = q_f, q_r = q_r,
                                        structure = structure)
    mu_estimates <- estimated_pars[1:q_f]
  }


  list(mu = mu_estimates,
       Tau = Tau_estimates)
}

design_matrix <- function(data, effectID, studyID, intercept,
                          formula = NULL,
                          unlist = FALSE,
                          multivariate_covs = NULL) {

  num_effects <- as.numeric(table(data[[studyID]]))
  start <- c(1, cumsum(num_effects)[1:(length(num_effects)-1)]+1)

  if(is.null(formula)) {
    if(intercept) {
      dm <- model.matrix(~ factor(data[[effectID]]))
    } else {
      dm <- model.matrix(~ factor(data[[effectID]]) - 1)
    }
  } else {
    dm <- model.matrix(formula, data = data)
  }

  if(!is.null(multivariate_covs)) {
    dm2 <- model.matrix(as.formula(paste0('~', multivariate_covs, '-1')[2]), data = data)
    dm <- cbind(dm, dm2)
  }

  fixed_names <- colnames(dm)

  dm_list <- lapply(seq_along(num_effects), function(xx)
    dm[start[xx]:cumsum(num_effects)[xx], , drop = FALSE]
  )


  if(unlist) {
    # dm
    dm_df <- do.call('rbind', dm_list)
    names(dm_df) <- fixed_names
  } else {
    dm_df <- dm_list
    for(ii in seq_along(dm_df)) {
      names(dm_df[[ii]]) <- fixed_names
    }
  }

  dm_df
}

var_cov_fe <- function(q, nStudy, Ss, Tau, yID, X) {

  cov_matrix <- matrix(0, q, q)

  for(i in 1:nStudy){
    cov <- Ss[[i]][yID[[i]], yID[[i]] ] + Tau[yID[[i]], yID[[i]] ]
    if(length(yID[[i]]) == 1){
      cov <- as.numeric(cov)
      cov_matrix <- cov_matrix + t(X[[i]]) %*% X[[i]] / cov
    }else{
      cov_matrix <- cov_matrix + t(X[[i]]) %*% chol2inv(chol(cov)) %*% X[[i]]
    }
  }
  mu.cov <- chol2inv(chol(cov_matrix))

  list(
    var_cov = mu.cov,
    SE = sqrt(diag(mu.cov))
  )

}

params_helper <- function(q_f = NULL,
                          q_r = NULL,
                          structure = 'UN') {

  switch(structure,
         UN = unstructured_param(q_f, q_r),
         DIAG1 = diag1_param(q_f, q_r),
         DIAG2 = diag2_param(q_f, q_r),
         CS = cs_param(q_f, q_r),
         HCS = hcs_param(q_f, q_r),
         AR1 = ar1_param(q_f, q_r),
         multilevel = multilevel_param(q_f, q_r),
         univariate = multilevel_param(q_f, q_r)
  )
}

unstructured_param <- function(q_f, q_r) {
  params <- rep(0, q_f + q_r*(q_r + 1)/2)
  lower <- rep(-Inf, q_f + q_r*(q_r + 1)/2)
  upper <- rep(Inf, q_f + q_r*(q_r + 1)/2)

  params[q_f + cumsum(1:q_r)] <- 1
  lower[q_f + cumsum(1:q_r)] <- 10^(-6)

  list(
    params = params,
    lower = lower,
    upper = upper
  )
}

# Unique variance across random dimensions
diag1_param <- function(q_f, q_r) {
  params <-  rep(0, q_f + q_r)
  lower <- rep(-Inf, q_f + q_r)
  upper <- rep(Inf, q_f + q_r)
  params[q_f + 1:q_r] <- 1
  lower[q_f + 1:q_r] <- 10^(-6)

  list(
    params = params,
    lower = lower,
    upper = upper
  )
}

# Pooled variance
diag2_param <- function(q_f, q_r) {
  params <-  rep(0, q_f + 1)
  lower <- rep(-Inf, q_f + 1)
  upper <- rep(Inf, q_f + 1)
  params[q_f + 1] <- 1
  lower[q_f + 1] <- 10^(-6)

  list(
    params = params,
    lower = lower,
    upper = upper
  )
}

cs_param <- function(q_f, q_r) {
  params <- rep(0, q_f + 2)
  lower <- rep(-Inf, q_f + 2)
  upper <- rep(Inf, q_f + 2)
  upper[q_f + 2] <- 1
  params[q_f + 1:2] <- 1
  lower[q_f + 1] <- 10^(-6)
  lower[q_f + 2] <- -1


  list(
    params = params,
    lower = lower,
    upper = upper
  )
}

hcs_param <- function(q_f, q_r) {
  params <- rep(0, q_f + q_r + 1)
  lower <- rep(-Inf, q_f + q_r + 1)
  upper <- rep(Inf, q_f + q_r + 1)
  upper[q_f + q_r + 1] <- 1
  params[-1:-q_f] <- 1
  lower[(q_f+1):(q_f + q_r)] <- 10^(-6)
  lower[q_f + q_r + 1] <- -1

  list(
    params = params,
    lower = lower,
    upper = upper
  )
}

ar1_param <- function(q_f, q_r) {
  params <- rep(0, q_f + 2)
  lower <- rep(-Inf, q_f + 2)
  upper <- rep(Inf, q_f + 2)
  upper[q_f + 2] <- 1
  params[-1:-q_f] <- 1
  lower[q_f + 1 ] <- 10^(-6)
  lower[q_f + 2] <- -1

  list(
    params = params,
    lower = lower,
    upper = upper
  )
}

multilevel_param <- function(q_f, q_r) {

  params <- rep(0, q_f + q_r)
  lower <- rep(-Inf, q_f + q_r)
  upper <- rep(Inf, q_f + q_r)
  params[(q_f + 1):(q_f + q_r)] <- 1
  lower[(q_f + 1):(q_f + q_r)] <- 10^(-6)

  list(
    params = params,
    lower = lower,
    upper = upper
  )
}


fit_statistics <- function(loglik, estimation_method,
                           design_matrix, q, num_params) {

  if(estimation_method == 'REML') {
    part1 <-  -0.5 * (nrow(design_matrix) - ncol(design_matrix)) * log(2 * base::pi) + 0.5 * log(det(t(design_matrix) %*% design_matrix))
    part2 <-  .5 * loglik
    logLik <- part1 - part2
    Dev <- -2 * logLik
    AIC <- -2 * logLik + 2 *  num_params
    BIC <- -2 * logLik + num_params * log(nrow(design_matrix) - ncol(design_matrix))
    k <- ncol(design_matrix) + q + 2
    AICc <- AIC + (2 * k * (k + 1)) / (nrow(design_matrix) - k - 1)
    reml.fit <- as.data.frame(cbind(logLik, Dev, AIC, BIC, AICc))
    names(reml.fit) <- c("logLik", "Dev", "AIC", "BIC", "AICc")

    reml.fit
  } else {
    part1 <-  -0.5 * nrow(design_matrix)  * log(2 * base::pi)
    part2 <-   .5 * loglik
    logLik <- part1 - part2
    Dev <- -2 * logLik +  2 * (ncol(design_matrix) + q)
    AIC <- -2 * logLik + 2*   num_params
    BIC <- -2 * logLik + num_params  * log(nrow(design_matrix))
    k <- ncol(design_matrix) + q + 2
    AICc <- AIC + (2 * k * (k + 1)) / (nrow(design_matrix) - k - 1)
    mle.fit <- as.data.frame(cbind(logLik, Dev, AIC, BIC, AICc))
    names(mle.fit) <- c("logLik", "Dev", "AIC", "BIC", "AICc")

    mle.fit
  }

}