R/util.r

Defines functions unique_names sum_list Z_calc Z_matrix Z_mat_helper parse_randomname parse_randomeffect parse_formula format_coef_names format_coefficients matrix_subset tau_matrix seq_length_ncol missing_data var_cov

var_cov <- function(data) {
  as.matrix(Matrix::bdiag(data))
}

missing_data <- function(data) {

  -1 * which(is.na(data))

}

seq_length_ncol <- function(xx) seq_len(ncol(xx))

tau_matrix <- function(rho, tau2) {

  rho_matrix <-  corpcor::vec2sm(rho)
  diag(rho_matrix) <- 1

  tau_mat <- diag(sqrt(tau2))

  tau_mat %*% rho_matrix %*% tau_mat

}

# Identify and subset matrix
matrix_subset <- function(matrix, variables) {

  match_column <- colnames(matrix) %in% variables
  match_row <- rownames(matrix) %in% variables

  matrix[match_row, match_column]

}

# format model coefficients
format_coefficients <- function(coefficients) {

  coef_names <- sapply(coefficients, names)
  coef_names <- lapply(coef_names, format_coef_names)

  coefs <- lapply(seq_along(coefficients), function(xx)
    cbind(coef_names[[xx]], estimate = coefficients[[xx]]))

  coefs
}

format_coef_names <- function(names) {

  names_coef <- data.frame(trimws(
    do.call('rbind', strsplit(names, "->"))),
    stringsAsFactors = FALSE)

  names(names_coef) <- c("predictor", "outcome")

  names_coef
}


parse_formula <- function(formula) {

    outcome <- as.character(formula)[2]

    fixed <- as.formula(paste0("~", gsub("^\\s+|\\s+$", "", gsub("\\+\\s*(\\s+|\\++)\\(.*?\\)", "", as.character(formula)[3]))))

    randomeffect <- gsub("^\\s+|\\s+$", "", unlist(regmatches(as.character(formula)[3],
                                                              gregexpr("(\\+|\\s+)\\(.*?\\)", as.character(formula)[3]))))

    list(outcome = outcome,
         fixed = fixed,
         randomeffect = randomeffect)
}

parse_randomeffect <- function(random_formula) {

  cluster_id_vars <- gsub("^\\s+|\\s+$", "", gsub("\\)", "", unlist(lapply(seq_along(random_formula), function(xx) strsplit(random_formula, "\\|")[[xx]][2]))))
  if(any(grepl("/", cluster_id_vars))) {
    cluster_id_vars <- unlist(strsplit(cluster_id_vars, "/"))
  }

  random_effects <- paste0('~', gsub("^\\s+|\\s+$", "", gsub("\\(", "", unlist(lapply(seq_along(random_formula), function(xx) strsplit(random_formula, "\\|")[[xx]][1])))))

  lapply(seq_along(cluster_id_vars), function(xx)
    as.formula(paste0(random_effects, "|", cluster_id_vars[xx])))

}
parse_randomname <- function(random_formula) {
  cluster_id_vars <- gsub("^\\s+|\\s+$", "", gsub("\\)", "", unlist(lapply(seq_along(random_formula), function(xx) strsplit(random_formula, "\\|")[[xx]][2]))))
  if(any(grepl("/", cluster_id_vars))) {
    cluster_id_vars <- unlist(strsplit(cluster_id_vars, "/"))
  }

  cluster_id_vars
}

#' @importFrom stats model.matrix
Z_mat_helper <- function(random_formula, data) {
  random_equation <- unlist(lapply(seq_along(random_formula),  function(xx)
    as.formula(paste("~", gsub("\\s+\\|\\s+[a-zA-Z]+$", "", as.character(random_formula[[xx]])[2])))))
  Z_mat <- lapply(seq_along(random_equation), function(xx)
    model.matrix(random_equation[[xx]], data = data))

  sum(unlist(lapply(Z_mat, ncol)))
}

Z_matrix <- function(random_formula, data) {

  ids <- unlist(lapply(seq_along(random_formula),  function(xx)
                gsub("^.\\s+\\|\\s+", "", as.character(random_formula[[xx]])[2])))

  random_equation <- unlist(lapply(seq_along(random_formula),  function(xx)
    as.formula(paste("~", gsub("\\s+\\|\\s+[a-zA-Z]+$", "", as.character(random_formula[[xx]])[2])))))

  data_ids <- data[ids]

  num_ids1 <- length(unique(data_ids[, 1]))
  num_ids2 <- length(unique(data_ids[, 2]))

  if(num_ids1 < num_ids2) {
    num_group <- unlist(lapply(split(data_ids, f = data_ids[,1]), nrow))
  } else {
    num_group <- unlist(lapply(split(data_ids, f = data_ids[,2]), nrow))
  }


  Z_mat <- lapply(seq_along(random_equation), function(xx)
    model.matrix(random_equation[[xx]], data = data))

  cumulative_groups <- cumsum(num_group)
  cumulative_start <- c(1, cumulative_groups[1:(length(cumulative_groups)-1)]+1)

  if(num_ids1 < num_ids2) {
    Z_calc_big <- lapply(seq_along(num_group), function(xx)
      as.matrix(Z_mat[[1]][cumulative_start[xx]:cumulative_groups[xx],])
    )

    Z_calc_small <- lapply(seq_along(num_group), function(xx)
      lapply(seq_len(num_group[xx]), function(zz)
        as.matrix(Z_mat[[2]][(cumulative_start[xx]+zz-1), ])
      )
    )
    lapply(seq_along(num_group), function(xx)
      c(list(Z_calc_big[xx]),
        Z_calc_small[xx])
      )
  } else {
    Z_calc_big <- lapply(seq_along(num_group), function(xx)
      as.matrix(Z_mat[[2]][cumulative_start[xx]:cumulative_groups[xx],])
    )

    Z_calc_small <- lapply(seq_along(num_group), function(xx)
      lapply(seq_len(num_group[xx]), function(zz)
        as.matrix(Z_mat[[1]][(cumulative_start[xx]+zz-1), ])
      )
    )
    lapply(seq_along(num_group), function(xx)
      c(list(Z_calc_big[xx]),
        Z_calc_small[xx])
    )
  }

}

Z_calc <- function(data, start, end, group) {
  if(group == 1) {
    as.matrix(data[[group]][start:end,])
  } else {
    as.matrix(data[[group]][start:start,])
  }

}

sum_list <- function(list) {
  res <- 0
  for(i in seq(list)) res <- res + list[[i]]
  #
  res
}

unique_names <- function(coefficients) {
  unique(c(coefficients[['predictor']], coefficients[['outcome']]))
}

# Horrible hack to keep CRAN happy and suppress NOTES about
# parts of the code that use non-standard evaluation.
# See:
# http://stackoverflow.com/questions/9439256/how-can-i-handle-r-cmd-check-no-visible-binding-for-global-variable-notes-when
# https://github.com/smbache/magrittr/issues/29
utils::globalVariables(c('lhs', 'model', 'model_out_random',
                         'op', 'rhs'))

Try the mars package in your browser

Any scripts or data that you put into this service are public.

mars documentation built on April 12, 2025, 1:35 a.m.