# R/hyp_matrix.R In genomaths/usefr: Utility Functions for Statistical Analyses

#### Documented in hyp_matrix

## Copyright (C) 2019 Robersy Sanchez <https://genomaths.com/>
##
## Author: Robersy Sanchez
#
## This file is part of the R package "usefr".
##
## 'usefr' is free software: you can redistribute it and/or modify
## it under the terms of the GNU General Public License as published by
## the Free Software Foundation, either version 3 of the License, or
## (at your option) any later version.
##
## This program is distributed in the hope that it will be useful, but WITHOUT
## ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
## FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
## more details.
##
## You should have received a copy of the GNU General Public License along
## with this program; if not, see <http://www.gnu.org/licenses/>.

#' @rdname hyp_matrix
#' @title Compute the Hypothesis Matrix for MANOVA testing.
#' @description Given the summarized data provided, this function compute
#' the hypothesis matrices used in several multivaraite statistics.
#' @details To perform the comparison of a model under different parameter
#' values (different curve fits with the same model), the covariance matrices
#' and means from each set of model parameters can be used in place of the
#' covariance matrices and means for the variables inside of each group.
#' @return A list carrying the following matrices and numerical data is
#' returned:
#'
#' \itemize{
#'   \item "Pooled" sums of squares and cross product (SSCP) matrix: 'E'
#'   \item Degree of freedom for matrix E: 'df.e'
#'   \item The hypothesis SSCP matrix: 'H'
#'   \item Degree of freedom for matrix H: 'df.h'
#'   \item 'H + E': 'HpE.inv'
#'   \item Error covariance matrix: 'Se'
#'   \item Inverse of Se: Se.inv
#'   \item Sum of mean of Squares: 'W'
#'   \item Inverse of W: 'W.inv'
#'   \item Variance-covariance matrix from each model: 'Cov'
#'   \item Parameter matrix for the groups: 'par'
#'   \item Number of variables or model parameters: 'p'
#'   \item Total sample size: 'N'
#'   \item Number of models/groups: 'n'
#'   \item Sample size: 'm'
#' }
#' @param obj A list of ANOVA R objects or linear/non-linear model objects.
#' If "obj" is not given, then the parmeters 'par.mat', 'cov.mat', and
#' 'sample.size' must be given.
#' @param par.mat A matrix where each column contains the model parameters of
#' each curve.
#' @param cov.mat List of parameter variance-covariance matrices.
#' @param sample.size Vector with the sample sizes used to estimate each
#' models.
#' @param groups list of sets of indices that identify models from a same
#' group.
#' @export
#' @author Robersy Sanchez (\url{https://genomaths.com}).
#' @examples
#' ## Generate a dataset
#' set.seed(1230)
#' x1 <- rnorm(1e3, mean = 2.5, sd = 1)
#' x2 <- rnorm(1e3, mean = 2.5, sd = 1)
#'
#' line_1 <- 2.5 * x1 + 3 + runif(length(x1))/10
#' line_2 <- 2.5 * x2 + 3 + runif(length(x1))/10
#'
#' ## Fitting the linear models
#' lm_1 <- lm(line_1 ~ x1)
#' lm_2 <- lm(line_2 ~ x2)
#' res <- hyp_matrix(obj = list(model1 = lm_1, model2 = lm_2))

hyp_matrix <- function(
obj,
par.mat = NULL,
cov.mat = NULL,
sample.size = NULL,
groups = NULL) {

datos <- data_formating(obj = obj,
par.mat = par.mat,
cov.mat = cov.mat,
sample.size = sample.size,
groups = groups)
m <- datos$m n <- datos$n
N <- datos$N p <- datos$p
df.h <- datos$df.h par <- datos$par
Cov <- datos$Cov ## ========================================================== # ## "pooled" sums of squares and cross product matrix E (SSCP): ## ========================================================== # S <- mapply(function(x, y) (x - 1) * y, m, Cov, SIMPLIFY = F) E <- Reduce("+", S) df.e <- N - n # Degree of freedom for matrix E Se <- E / df.e # Error covariance matrix A <- lapply(seq_along(m), function(k) S[[k]]/m[k]) ## ------------------------------------------------------ -# ## ----------- Estimation of hypothesis matrix ----------- ## ------------------------------------------------------ -# ## vector of the grand means of all parameter in the models par.mean <- rowMeans(par) # SSCP for each group according to the hypothesis H: SSCPk <- list() for (k in seq_len(n)) { SSCPk[[k]] <- m[k] * (par[, k] - par.mean) %*% t(par[, k] - par.mean) } # The hypothesis SSCP matrix H: H <- Reduce("+", SSCPk) r <- min(p, df.h) # Necessary to compute dfs s <- max(p, df.h) #---------------------------------------- - W <- Reduce("+", A) # Sum of mean of Squares W.inv <- suppressWarnings(try(qr.solve(W), silent = TRUE)) if (inherits(W.inv, "try-error")) { # error handling code, maybe just skip this iteration using Se.inv <- try(solve(qr(Se, LAPACK = TRUE)), silent = TRUE) HpE.inv <- try(solve(qr(H + E, LAPACK = TRUE)), silent = TRUE) E.inv <- try(solve(qr(E, LAPACK = TRUE)), silent = TRUE) W.inv <- try(solve(qr(W, LAPACK = TRUE)), silent = TRUE) matrix_inv <- c( "'Error-covariance-matrix'\n", "'SSCP'\n", "'SSCP + SSCP-hypothesis\n'", "'Sum-of-mean-of-Squares'\n" ) try_error <- sapply(c(W.inv, Se.inv, HpE.inv, E.inv), inherits) if (any(try_error)) { stop( "*** It was possible to compute the inverse matrix for ", "some of the matrices:\n", matrix_inv[try_error], "\nCheck the covariance matrices of your models.\n", "'vcov(model)'" ) } } else { Se.inv <- qr.solve(Se, tol = 1e-10) HpE.inv <- qr.solve(H + E, tol = 1e-10) E.inv <- qr.solve(E, tol = 1e-10) } invisible( list( E = E, df.e = df.e, E.inv = E.inv, H = H, A = A, df.h = df.h, HpE.inv = HpE.inv, Se = Se, Se.inv = Se.inv, W = W, W.inv = W.inv, par = par, Cov = Cov, p = p, N = N, df.h = df.h, m = m, n = n) ) } ## ================== Auxiliary function ========================= data_formating <- function( obj, par.mat = NULL, cov.mat = NULL, sample.size = NULL, groups = NULL) { if (!missing(obj)) { n <- length(obj) ## number of models/groups Cov <- list() m <- c() par <- c() nls_lm_class <- all(sapply(obj, function(ob) inherits(ob, "nls.lm"))) nls_class <- all(sapply(obj, function(ob) inherits(ob, "nls"))) if (nls_class || nls_lm_class) { for (k in seq_len(n)) { ## To get the parameter variance-covariance matrix ## for each model Cov[[k]] <- unname(vcov(obj[[k]])) ## This generates a vector with the sample sizes used ## to estimate each models m <- c(m, sum(summary(obj[[k]])$df))

## This generated a matrix where each column contains
## the model parameters of each curve.
par <- cbind(par, coef(obj[[k]]))
}
}

lm_class <- all(sapply(obj, function(ob) inherits(ob, "lm")))
aov_class <- all(sapply(obj, function(ob) inherits(ob, "aov")))

if (lm_class || aov_class) {
for (k in seq_len(n)) {
## To get the parameter variance-covariance matrix for each
##  model
Cov[[k]] <- unname(vcov(obj[[k]]))

## It generates a vector with the sample sizes
m <- c(m, length(obj[[k]]\$fitted.values))

## It generates a matrix where each column contains group
## means or the model parameters of each group or linear
## model, respectively.
par <- cbind(par, unname(coef(obj[[k]])))
}
}
}

if (is.null(par)) {
stop(
"*** Non-informative models provided. \n",
"All models must belong to the same class.\n",
"The supported classes are:\n",
"'nls', 'nls.lm', 'lm', and 'aov'"
)
}

if (missing(obj)) {
Cov <- cov.mat
par <- par.mat
m <- sample.size
n <- length(m) ## number of models/groups
}

if (!is.null(groups)) {
M <- c()
PAR <- c()
COV <- list()
for (k in seq_along(groups)) {
## Pooled covariance matrix for each group (joint covariance)
COV[[k]] <- Reduce("+", mapply(function(x, y) (x - 1) * y,
m[groups[[k]]],
Cov[groups[[k]]],
SIMPLIFY = F
))
COV[[k]] <- COV[[k]] / sum(m[groups[[k]]] - 1)

## Sample size in group k
M[k] <- sum(m[groups[[k]]])

## Parameter matrix for the groups
PAR <- cbind(PAR, rowMeans(par[, groups[[k]]], na.rm = TRUE))
}
Cov <- COV
par <- PAR
m <- M
n <- length(m)
}

p <- nrow(par) ## Number of variables or model parameters
N <- sum(m) ## Total sample size
df.h <- n - 1 ## Degree of freedom for matrix H
return(list(par = par, Cov = Cov, p = p,
N = N, df.h = df.h, m = m, n = n))
}

genomaths/usefr documentation built on July 28, 2022, 12:31 p.m.