gws:

Documented in fixef_model_matrix ranef_model_matrix resid_model_matrix

#' Create a random effect incidence matrix
#'
#' @description
#' Creates an incidence matrix for the random effects of a mixed-model
#'
#' @param random A \code{formula} specifying the the random effect terms of the
#' linear mixed-model. Must be preceded by a \code{~} and can include the modifier
#' verbs \code{\link{at}[sommer]}, \code{\link{us}[sommer]}, and \code{\link{diag}[base]}
#' to specify different structures. If a random effect term has an accompanying
#' variance-covariance matrix (see \code{vcov}), the term should be nested using
#' the function \code{\link{g}[sommer]}
#' @param data A \code{data.frame} that includes the response and random effect
#' predictors. If not supplied, the function will look in the current running
#' environment.
#' @param vcov A named list of variance-covariance matrices that correspond to
#' the random effect terms that are specified with the \code{\link[g]{sommer}}
#' function. If not passed, an identity matrix is supplied.
#'
#' @return
#' A nested list of model matrices and variance-covariance matrices. For each
#' random effect term, the incidence matrix \code{Z} is returned along with its
#' accompanying variance-covariance matrix \code{K}.
#'
#'
ranef_model_matrix <- function(random, data, vcov, sparse = TRUE) {

  # Input check
  if (!inherits(random, "formula"))
    stop("The input 'random' must be a model formula.")

  # If data is missing, use the current environment
  if (missing(data)) {
    data <- environment(random)

  } else {
    # Convert data to a data.frame
    data <- as.data.frame(data) %>%
      mutate_if(is.character, as.factor)

  }

  # If vcov is supplied, perform some error checking
  if (!missing(vcov)) {
    if (is.null(names(vcov)))
      stop("vcov must be a named list.")

    # Make sure each is has column names
    if (any(lapply(vcov, dimnames) %>% sapply(is.null)))
      stop("The matrices in vcov must have dimnames.")

    # Extract the names
    vcov_names <- names(vcov)

  }

  # Edit the formula to remove the intercept
  random1 <- as.formula(paste("~ -1 +", random)[-1])

  # Create the base model matrix
  model_frame_base <- model.frame(random1, data = data)

  # Determine the variables in the formula
  rand_vars <- str_split(string = as.character(random)[-1], pattern = "\\+", simplify = FALSE) %>%
    unlist() %>%
    str_trim()

  # Create a list
  Z_list <- list()

  # Cycle through the random effect terms and create a model.matrix
  for (term in rand_vars) {

    ## Detect the modifier verbs in the formula (i.e. at, diag, us)
    is_at <- str_detect(string = term, "at\\(")
    is_us <- str_detect(string = term, "us\\(")
    is_diag <- str_detect(string = term, "diag\\(")

    # Are any terms nested in the 'g' function?
    is_g <- str_detect(string = term, "g\\(")

    # Create a formula for that specific term
    term_formula <- as.formula(paste("~ -1 +", term))

    # Pull out the model matrix
    if (sparse) {
      model_matrix <- sparse.model.matrix(object = term_formula, data = model_frame_base)
    } else {
      model_matrix <- model.matrix(object = term_formula, data = model_frame_base)
    }

    # Find the regressor that with the modifier
    if (is_at) {
      at_var <- str_extract(string = term, pattern = "at\\([A-Za-z0-9]*\\)") %>%
        str_replace_all(pattern = "at\\(|\\)", replacement = "")

      # What is the variable to the right of the parentheses/colon?
      sec_var <- str_split(string = term, pattern = ":") %>%
        unlist() %>%
        tail(-1) %>%
        paste(collapse = "|") %>%
        str_replace_all(pattern = "g\\(", "g\\\\(") %>%
        str_replace_all(pattern = "\\)", "\\\\)")

      # Pull out the levels of that variable
      at_var_levels <- levels(data[[at_var]])

      # For each level, subset the model_matrix
      for (lev in at_var_levels) {
        # Subset the matrix
        mat <- model_matrix[,str_detect(string = colnames(model_matrix), pattern = lev)]

        sec_var_levs <- colnames(mat) %>%
          str_split(pattern = ":") %>%
          sapply("[", 2) %>% str_replace_all(sec_var, "")

        # Adjust the columnnames
        colnames(mat) <- str_c("at(", lev, "):", sec_var_levs)

        # Determine a list name
        lev_list_name <- str_replace(string = term, pattern = at_var, replacement = lev)

        # If the term has a specified covariance matrix, figure out what term it is
        if (is_g) {
          # Is the 'g' at the beginning of the term?
          is_start_g <- str_detect(string = term, pattern = "^g")

          g_term <- str_extract(string = term, pattern = "g\\(.*\\)") %>%
            str_replace_all(pattern = "g\\(|\\)", replacement = "")

          # Is the name in the vcov list?
          if (!g_term %in% vcov_names) {
            stop("A term was specified using g(.), but the term is not present
                 in the 'vcov' list.")

          } else {
            # Pull out the vcov matrix
            vcov_term <- vcov[[g_term]]

          }

        } else {
          is_start_g <- FALSE

          if (sparse) {
            vcov_term <- Diagonal(n = ncol(mat))

          } else {
            vcov_term <- diag(ncol(mat))
          }

          dimnames(vcov_term) <- list(colnames(mat), colnames(mat))

        }


        # Add to the list
        Z_list[[lev_list_name]] <- list(Z = mat, K = vcov_term)

      }

    } else {

      # If so, figure out what term it is
      if (is_g) {
        # Is the 'g' at the beginning of the term?
        is_start_g <- str_detect(string = term, pattern = "^g")

        # The line '.*' matches all strings.
        g_term <- str_extract(string = term, pattern = "g\\(.*\\)") %>%
          str_replace_all(pattern = "g\\(|\\)", replacement = "")

        # Is the name in the vcov list?
        if (!g_term %in% vcov_names) {
          stop("A term was specified using g(.), but the term is not present in the 'vcov' list.")

        } else {
          # Pull out the vcov matrix
          vcov_term <- vcov[[g_term]]

        }

      } else {
        is_start_g <- FALSE

        if (sparse) {
          vcov_term <- Diagonal(n = ncol(model_matrix))

        } else {
          vcov_term <- diag(ncol(model_matrix))
        }

        dimnames(vcov_term) <- list(colnames(model_matrix), colnames(model_matrix))

      }

      # Remove the variable name from the matrix column names
      if (is_start_g) {
        colnames(model_matrix) <- colnames(model_matrix) %>%
          str_replace_all(pattern = "g\\(.*\\)", replacement = "")

      } else {
        colnames(model_matrix) <- colnames(model_matrix) %>%
          str_replace_all(pattern = term, replacement = "")

      }

      # Return the matrix
      Z_list[[term]] <- list(Z = model_matrix, K = vcov_term)

    }

  }

  ## Need to revise this for "un" and "diag" verbs



  # Return the list
  return(Z_list)

} # Close the function



#' Create a fixed effect incidence matrix
#'
#' @description
#' Creates an incidence matrix for the fixed effects of a mixed-model. This is a
#' very thin wrapper around \code{\link[model.matrix]{stats}}.
#'
#' @param fixed A \code{formula} specifying the the response variable and the
#' fixed effect terms of the linear mixed-model. Must take the form \code{response
#' ~ predictor(s)}.
#' @param data A \code{data.frame} that includes the response and fixed effect
#' predictors. If not supplied, the function will look in the current running
#' environment.
#'
#'
fixef_model_matrix <- function(fixed, data, sparse = TRUE) {

  if (sparse) {
    sparse.model.matrix(object = fixed, data = data)
  } else {
    model.matrix(object = fixed, data = data)
  }

} # Close the function



#' Create a residual effect covariance matrix
#'
#' @description
#' Creates an covariance matrix for the random effects of a mixed-model
#'
#' @param resid A \code{formula} specifying the the structure of the residual
#' effects in the linear mixed-model. Must be preceded by a \code{~} and can
#' include the modifier verbs \code{\link[at]{sommer}}, \code{\link[us]{sommer}},
#' and \code{\link[diag]{base}} to specify different structures. The formulae
#' can take the same form as those in \code{\link[ranef_model_matrix]{gws}},
#' except the term must be "units".
#' @param data A \code{data.frame} that includes the response and predictors.
#' If not supplied, the function will look in the current running environment.
#'
#' @return
#' A nested list of model matrices.
#'
resid_model_matrix <- function(resid, data, sparse = TRUE) {

  # Input check
  if (!inherits(resid, "formula"))
    stop("The input 'resid' must be a model formula.")

  # If data is missing, use the current environment
  if (missing(data)) {
    data <- environment(resid)

  } else {
    # Convert data to a data.frame
    data <- as.data.frame(data) %>%
      mutate_if(is.character, as.factor)

  }

  # Detect "units" variable in the model and in the specific format
  detect_format <- str_detect(string = as.character(resid)[2],
                              pattern = "at\\(.*\\):units|units")

  if (!detect_format) {
    stop("The 'resid' formula must take the form 'resid = ~ units' or 'resid = ~ at(.):units'.")

  } else {
    # Edit the formula to remove the intercept and the "units"
    resid1 <- paste("~ -1 +", as.character(resid)[2]) %>%
      str_replace_all(pattern = ":units|\\+ units", replacement = "") %>%
      str_trim() %>%
      as.formula()

  }

  # Create the base model frame
  model_frame_base <- model.frame(resid1, data = data)

  # If the model_frame is dimensionless (i.e. resid = ~ units), return the R list
  if (ncol(model_frame_base) == 0) {
    if (sparse) {
      return(list(units = Diagonal(nrow(model_frame_base))))
    } else {
      return(list(units = diag(nrow(model_frame_base))))
    }



  } else {
    # Otherwise create a list of R matrices
    resid_vars <- as.character(resid1)[2] %>%
      str_replace(pattern = "-1 \\+ ", replacement = "")

    # Create a list
    R_list <- list()

    # Cycle through the random effect terms and create a model.matrix
    for (term in resid_vars) {

      ## Detect the modifier verbs in the formula (i.e. at, diag, us)
      is_at <- str_detect(string = term, "at\\(")
      is_us <- str_detect(string = term, "us\\(")
      is_diag <- str_detect(string = term, "diag\\(")

      # Create a formula for that specific term
      term_formula <- as.formula(paste("~ -1 +", term))

      if (sparse) {
        model_matrix <- sparse.model.matrix(object = term_formula, data = model_frame_base)
      } else {
        model_matrix <- model.matrix(object = term_formula, data = model_frame_base)
      }

      # Find the regressor that with the modifier
      if (is_at) {
        at_var <- str_extract(string = term, pattern = "at\\([A-Za-z0-9]*\\)") %>%
          str_replace_all(pattern = "at\\(|\\)", replacement = "")

        # Pull out the levels of that variable
        at_var_levels <- levels(data[[at_var]])

        # For each level, subset the model_matrix
        for (lev in at_var_levels) {
          # Subset the matrix - this will become the diagonal of the matrix
          diag_mat <- model_matrix[,str_detect(string = colnames(model_matrix), pattern = lev)]

          # Create an empty matrix
          mat <- matrix(data = 0, nrow = length(diag_mat), ncol = length(diag_mat))
          diag(mat) <- diag_mat

          # Determine a list name
          lev_list_name <- str_replace(string = term, pattern = at_var, replacement = lev)

          # Add to the list
          R_list[[lev_list_name]] <- mat

        }
      }
    }
  }
  ## Need to revise this for "un" and "diag" verbs

  # Return the list
  return(R_list)

} # Close the function