R/fastInferences4-eigen-CA.R

Defines functions print.Inference4CA multinomCV4CA eigCA4Multinom malinvaudQ4CA.perm eigCA

Documented in eigCA eigCA4Multinom malinvaudQ4CA.perm multinomCV4CA print.Inference4CA

#___________________________________________________________
# file fastInferences4CA.R
# Created 09/27/2020
# Fast inferences for CA based on multinomial distribution
# includes bootstrap and permutation tests.
#  Current Version 09/27/2020
# __________________________________________________________
# Preamble ----
# File for fast Inferences in CA 
# An example on how to compute (fast)
# permuted and bootstrap values 
# for the CA of a data table
# Functions to be ported to data4PCCAR
# Current functions ----
# eigCA
# malinvaudQ4CA.perm
# eigCA4Multinom
# multinomCV4CA
# print.Inference4CA
#
# **********************************************************
# The functions starts here ----
# __________________________________________________________
# Preamble eigCA ----
# First a nice function to get only the CA eigenvalues 
# from a data matrix
# 
#___________________________________________________________ 
# eigCA a function to compute 
#  the CA-eigenvalues for a matrix
#  if eig.only is FALSE, eigCA will also give Fi and Fj
#     (Useful for Booststraping Fi & Fj)
# help starts here 
# ________________
# eigCA
#' @title A bare-bone function to compute  the
#' eigen-values (and if asked row and column factor scores)
#' of a matrix suitable for correspondence analysis.
#' @description \code{eigCA}:
#' A very fast and bare-bone function that computes
#' the eigenvalues 
#' (and possibly the row and column factor scores)
#' of the Correspondence Analysis (CA)
#' of a data matrix suitable for CA
#' (i.e., a matrix whose all entries are non-negative).
#' \code{eigCA} is mainly used for cross-validation
#' and resampling methods (e.g., permutation and
#' bootstrap tests).
#' @param Xdata a data matrix 
#' (whose all entries are non-negative) suitable
#' for correspondence analysis.
#' @param eig.only when \code{TRUE} (Default) 
#' compute only the CA- eigen-values of \code{X}.
#' Otherwise compute also the row and column
#' CA factor scores (i.e., \code{fi} and \code{fj}).
#' @return if \code{eig.only} is \code{TRUE}
#' \code{eigCA} returns the CA-eigenvalues of
#' \code{X}; 
#' if \code{eig.only} is \code{FALSE}
#' \code{eigCA} returns 
#' a list with: \code{$eigen}: the CA-eigenvalues of
#' \code{X}, \code{$fi}: the CA row factor scores,
#' and \code{$fi}: the CA column factor scores.
#' @details As a fast bare-bones CA based computations
#' \code{eigCA} is mainly used for 
#' cross-validation for CA methods.
#' @author  Hervé Abdi
#' @examples 
#' \dontrun{
#' set.seed(87) # set the seed
#' X <- matrix(round(runif(21)*20), ncol = 3) # good for CA
#' eigenOfX <- eigCA(X)
#' }
#' @rdname eigCA
#' @export 
eigCA <- function(Xdata, # data matrix
            eig.only = TRUE # we just want the eigenvalues
                   ){# function starts here
  nI <-  nrow(Xdata)
  nJ <-  ncol(Xdata)
  Fliped = FALSE # Did we flip the matrix?
  # make sure that we work on the smallest side
  if (nI > nJ){Xdata <-  t(Xdata)
               truc <-  nI
               nI <-  nJ
               nJ <-  truc
               Fliped <-  TRUE}
  Y <-  Xdata / sum(Xdata) # stochastic matrix
  y.ip <-  rowSums(Y)
  y.pj <-  colSums(Y)
  # long way with diag to check computation     
  # Y4eig      = diag(1/sqrt(y.ip))%*% Y %*% diag(1/sqrt(y.pj))
  # rewrite Y4eig in a better way a la repmat!
  Y4eig <-   matrix((1/sqrt(y.ip)),nI,nJ) * Y *
    matrix((1/sqrt(y.pj)),nI,nJ,byrow = TRUE)
  # ^ this could be done faster I think
  if (eig.only){
    S4eig <-  Y4eig %*% t(Y4eig)
    Y.eig <-  eigen(S4eig, symmetric = TRUE, 
                       only.values = TRUE)
    # check.svd = svd(Y4eig) to double check
    return(Y.eig$values[-1]) # ignore the first trivial eigenvalue
  } else {# go for ze svd            
    Y.svd <- svd(Y4eig,nu = min(c(nI,nJ)))
    Y.eig <- Y.svd$d^2 # eigenvalue
    nL <- length(Y.eig)
    Fi <- matrix((1/sqrt(y.ip)),nI,nL) * Y.svd$u[,1:nL] * 
      matrix(Y.svd$d,nI,nL,byrow = TRUE)
    Fj <- matrix((1/sqrt(y.pj)),nJ,nL) * Y.svd$v[,1:nL] *
      matrix(Y.svd$d,nJ,nL,byrow = TRUE)
    if (Fliped){ truc <-  Fj
                 Fj   <-  Fi
                 Fi   <-  truc} # if Fliped, unFliped 
    ShortCA = list(eigen = Y.eig[-1],
                      Fi = Fi[,-1],
                      Fj = Fj[,-1]) # drop first dim
    return(ShortCA)
  }
} # end of function eigCA
# eof eigCA ----
#___________________________________________________________
# Preamble malinvaudQ4CA.perm  ----
# malinvaudQ4CA.perm computes the Malinvaud/Saporta test for CA
#                   with asymptotic and permutation derived p-values
# Help here 
# ___________
# 
#' @title Compute the Malinvaud/Saporta test for
#' the omnibus and dimensions of a correspondence 
#' analysis.
#' @description \code{malinvaudQ4CA.perm}:
#' Computes the Malinvaud / Saporta test for
#' the omnibus and dimensions of a correspondence 
#' analysis. \code{malinvaudQ4CA.perm} gives
#' the asymptotic Chi2 values and their associated 
#' \emph{p}-value under the usual assumptions.
#' If provided with permuted 
#' CA-eigenvalues, \code{malinvaudQ4CA.perm}
#' will report the \emph{p}-value obtained from
#' the permutation test.
#' @param Data A matrix suitable for 
#' correspondence analysis 
#' (i.e., with all non-negative elements).
#' @param LesEigPerm 
#' the permuted eigenvalues
#' as a \code{niteration} times rank of the
#' data  matrix.
#' if  \code{LesEigPerm} is \code{NULL}
#' (Default), \code{malinvaudQ4CA.perm}
#' will only compute the normal 
#' (i.e., chi2 based) approximation.
#' @param ndigit4print (Default: 4), 
#' number of significant 
#' digits to use to report the results.
#' @return A (self-explanatory) 
#' table with the results of the test
#' @details DETAILS
#' @author  Hervé Abdi
#' @references 
#' The original work is described in a rather
#' hard to find paper (published in the proceeding
#' of a meeting) by Malinvaud:
#' 
#' Malinvaud, E. (1987). 
#' Data Analysis in applied socio-economic statistics
#' with special considerations of correspondence analysis.
#' \emph{Marketing Science Conference Proceedings},
#'  HEC-ISA, Jouy-en-Josas.
#'  
#' A synthesis of the method with additional information 
#' can be found  
#' in 
#' 
#' Saporta (2011). \emph{Probabilité 
#' et Analyse des Données (3rd Ed)}.
#'    Technip, Paris. p. 209.  
#' @examples 
#' \dontrun{
#' set.seed(87) # set the seed
#' X <- matrix(round(runif(21)*20), ncol = 3) # good for CA
#' resMalin <- malinvaudQ4CA.perm(X)
#' }
#' @importFrom stats pchisq
#' @rdname malinvaudQ4CA.perm
#' @export 
malinvaudQ4CA.perm <- function(
          Data, # The original Data Table
          # Val.P = NULL, # fixed eigenvalues
          #  e.g. resFromExposition$ExPosition.Data$eigs
          # Output from ExPosition 
          LesEigPerm = NULL, # the permuted eigenvalues
          # as a niteration * rank of X matrix
          # -> to add if LesEigPerm is NULL skip
          # the permutation part 
          # and compute normal approximation
          ndigit4print = 4 # how many digits for printing
){# Function begins here
  # References:
  # Malinvaud, E. (1987). Data Analysis in applied socio-economic statistics
  # with special considerations of correspondence analysis.
  # Marketing Science Conference Proceedings, HEC-ISA, Jouy-en-Josas.
  # Also cited in Saporta (2011). Probabilité et Analyse des Données (3rd Ed).
  #    Technip, Paris. p. 209.  
  #   Val.P <- ResFromExposition$ExPosition.Data$eigs
  N.pp  <- sum(Data) # Grand Total of the data table
  # if(is.null(Val.P)){Val.P = eigCA(Data)}
  # OLd Version
  # Compute eigenvalues if not given
  Val.P = eigCA(Data) # Now compute the eigenvalues
  nL <- length(Val.P) # how many eigenvalues
  nI <- nrow(Data)
  nJ <- ncol(Data)
  Q     <- N.pp * cumsum(Val.P[nL:1])[nL:1]
  Q.nu  <- (nI - 1:nL)*(nJ - 1:nL)  
  # Get the values from Chi Square 
  pQ = 1 - stats::pchisq(Q,Q.nu) 
  # Add NA to make clear that the last 
  #     dimension is not tested
  Le.Q = c(Q,NA)
  Le.pQ = c(pQ,NA)
  Le.Q.nu = c(Q.nu,0)
  #
  NamesOf.Q = c('Ho: Omnibus', paste0('Dim-',seq(1:nL)))
  names(Le.Q)    <- NamesOf.Q
  names(Le.pQ)   <- NamesOf.Q
  names(Le.Q.nu) <- NamesOf.Q
  # Now compute the probability from permutation test
  #     from InPosition
  # LesEigPerm <-  Eigen.perm
  # ^ to make life easier
  if (!is.null(LesEigPerm)){# If LesEigPerm exist get p-values
    Q.perm     <-  N.pp * t(apply( LesEigPerm[,nL:1], 1, cumsum) )[,nL:1]
    #
    Logical.Q.perm =  t(t(Q.perm) >  (Q))
   pQ.perm = colMeans(Logical.Q.perm)
   pQ.perm[pQ.perm == 0] = 1 / nrow(Q.perm)
   Le.pQ.perm = c(pQ.perm,NA)
   # return the table
   Malinvaud.Q = data.frame(matrix(c(
    c(round(c(sum(Val.P),Val.P),digits = ndigit4print) ),
    round(Le.Q,      digits = ndigit4print),
    round(Le.pQ,     digits = ndigit4print),
    round(Le.Q.nu,   digits = ndigit4print),
    round(Le.pQ.perm,digits = ndigit4print)),
    nrow = 5, byrow = TRUE))
  colnames(Malinvaud.Q) = NamesOf.Q 
  rownames(Malinvaud.Q) = c('Inertia / sum lambda',
                            'Chi2',
                            'p-Chi2','df',
                            'p-perm') 
  } else {
    Malinvaud.Q = data.frame(matrix(c(
      c(round(c(sum(Val.P),Val.P),digits = ndigit4print) ),
      round(Le.Q,      digits = ndigit4print),
      round(Le.pQ,     digits = ndigit4print),
      round(Le.Q.nu,   digits = ndigit4print)
      # ,round(Le.pQ.perm,digit = ndigit4print)
      ),
      nrow = 4, byrow = TRUE))
    colnames(Malinvaud.Q) = NamesOf.Q 
    rownames(Malinvaud.Q) = c('Inertia / sum lambda',
                              'Chi2',
                              'p-Chi2','df'
                              #,'p-perm'
                              ) 
  }
  # test p for chi2 to avoid 0
  for (k in 1:(ncol(Malinvaud.Q)-1)){
  if((Malinvaud.Q[3,k]) == 0){
    Malinvaud.Q[3, k] <- round( 
          1/(10^ndigit4print), ndigit4print) }
  }
    return(Malinvaud.Q)
} # End of function MalinvaudQ4CA here
# eof malinvaudQ4CA.perm ----
#___________________________________________________________
# Preamble eigCA4Multinom ----
# Function eigCA4ultinom starts here
# Sample from a multinomial distribution 
# and compute the eigenvalues
# of a CA. 
# NB needs the function eigCA
# Examples of calls
# 1. Bootstrap of X:   
#     Boot.eig <- eigCA4Multinom(sum(X),X,nrow(X),ncol(X))))
# 2. Permutation of X 
#      X4H0 <- (1/sum(X)^2)*(as.matrix(rowSums(X))%*%t(as.matrix(colSums(X))))
#      Perm.eig <- eigCA4Multinom(sum(X),X4H0,nrow(X),ncol(X))))
#___________________________________________________________
# Help for eigCA4Multinom starts here
#' @title 
#' Sample from a multinomial distribution 
#' (with a given probability distribution)
#' and compute the eigenvalues
#' of a correpondence analysis of the simulated
#' matrix.. 
#' @description \code{eigCA4Multinom}:
#' Sample from a multinomial distribution 
#' (with a given probability distribution)
#' and compute the eigenvalues
#' of the CA of an \code{nrow*ncol}
#'  (see below for these parameters).
#'  data matrix simulating correspondence analysis.  
#' 
#' @param nobs grand total of the table to be simulated.
#' @param prob probability distribution 
#'  for the cells. Should be length = \code{nrow*ncol}
#'  (see below for these parameters).
#'
#' @param nrow The number of rows of the matrix
#' to be simulated. 
#' @param ncol The number of columns of the matrix
#' to be simulated.
#' @return OUTPUT_DESCRIPTION
#' @details  \code{eigCA4Multinom}
#' is mostly used for computing eigenvalues
#' of
#'  created data matrices
#' simulating permutation and bootstrap procedures
#' for correspondence analysis.
#' 
#' @examples 
#' \dontrun{
#' set.seed(87) # set the seed
#' X <- matrix(round(runif(21)*20), ncol = 3) # good for CA
#' nobs <-  sum(X) # grand total
#' nI <- nrow(X)
#' nJ <- ncol(X)
#' pI <- as.matrix(rowSums(X) / nobs) # marginal I & J
#' pJ <- as.matrix(colSums(X) / nobs) # probabilites
#' p4Permutation <- pI %*% t(pJ) # Independence <=> permutation
#' # Simulated Permutation Probabilities
#' permEigen <- eigCA4Multinom(nobs, p4Permutation, nI, nJ)
#' p4Bootstrap   <- X / nobs # Actual prob <=> Bootstrap
#' permBoots <- eigCA4Multinom(nobs, p4Bootstrap, nI, nJ)
#' }
#' @importFrom stats rmultinom
#' @rdname eigCA4Multinom
#' @export 
#' @author  Hervé Abdi
eigCA4Multinom <- function(nobs, # grandtotal of the table
                   prob, # probability distribution 
                  # for the cells. Should be length = nI*nJ
                  nrow, ncol# nrow & ncol of the matrix
){ # function starts here
  CA.Valp  <- eigCA(matrix(
    as.vector(stats::rmultinom(1, nobs, prob)),
    nrow = nrow, ncol = ncol, byrow = FALSE))
  return(CA.Valp)
} # eof eigCA4Multinom ----
#___________________________________________________________
# Preamble multinomCV4CA ----
# function multinomCV4CA
#  Multinomial distribution Based
#      Cross Validation for Correspondence Analysis
#  Assumes a plain model for CA as a contingency table
#___________________________________________________________
# Help multinomCV4CA starts here
#' @title Compute the permuted and bootstrapped eigenvalues
#' of the correspondence analysis (CA) of a matrix suitable
#' for CA (i.e., a matrix with non negative elements).
#' 
#' @description \code{multinomCV4CA}: 
#' a very fast routine that
#' computes the permuted and bootstrapped eigenvalues
#' of the correspondence analysis (CA) of a matrix suitable
#' for CA (i.e., a matrix with non negative elements).
#' @param Data a matrix suitable
#' for CA (i.e., a matrix with non negative elements).
#' @param niter number of Bootstrapped/Permutations
#'  (Default: \code{1000}).
#' @return A list with two elements: 1)
#' \code{$Permuted.ValP}: The matrix of the 
#' \code{niter} by \code{rank(Data)} permuted
#' eigenvalues of the data matrix \code{Data}, and
#' \code{$Bootstraped.ValP}: The matrix of the 
#' \code{niter} by \code{rank(Data)} bootstrapped 
#' eigenvalues of the data matrix \code{Data}.
#' 
#' @details \code{multinomCV4CA}
#' uses the multinomial distribution to
#' simulate bootstrap and permutation resampling
#' for a correspondence analysis.
#' @examples 
#' \dontrun{
#' set.seed(87) # set the seed
#' X <- matrix(round(runif(21)*20), ncol = 3) # good for CA
#' ResCV <- multinomCV4CA(X)
#' }
#' @rdname multinomCV4CA
#' @export 

multinomCV4CA <- function(Data, # The contingency Table
                           # data frame or matrix
                  niter = 1000 # How Many Iterations
                          ){
          X = as.matrix(Data)
          nN = sum(X)
          nI = nrow(X)
          nJ = ncol(X)
 # Get permutated eigenvalues from multinomial distribution
 # Probability distribution under H0 for permutation
          X4H0 <- (1/sum(X)^2)*
                (as.matrix(rowSums(X))%*%t(as.matrix(colSums(X))))
          Perm.ValP <- t(replicate(niter,eigCA4Multinom(nN,X4H0,nI,nJ)))
# Get bootstraped eigenvalues from multinomial distribution
          Boot.ValP <- t(replicate(niter,
                            eigCA4Multinom(nN, X, nI, nJ)))
          nL <-  ncol(Boot.ValP)
   colnames(Perm.ValP) <- paste0('Dimension ',1:nL) -> colnames(Boot.ValP)
   return.list <- structure(
                     list(Permuted.ValP = Perm.ValP,
                        Bootstrapped.ValP = Boot.ValP),
                            class = 'Inference4CA')
   return(return.list)
       } # eof multinomCV4CA ----
# Print routines ----
# print routines ----
# #_____________________________________________________________________
# print.Inference4CA ----
#
#' Change the print function for Inference4CA
#'
#'  Change the print function for Inference4CA
#'
#' @param x a list: output of, e.g.,  multinomCV4CA
#' @param ... everything else for the functions
#' @author Hervé Abdi
#' @export
print.Inference4CA <- function(x, ...) {
  ndash = 78 # How many dashes for separation lines
  cat(rep("-", ndash), sep = "")
  cat("\n Inferences for the Eigenvalues of Correspondence Analysis \n")
  # cat("\n List name: ", deparse(eval(substitute(substitute(x)))),"\n")
    cat(rep("-", ndash), sep = "")
    cat("\n$Permuted.ValP     : ", "The matrix of permuted eigenvalues.") 
    cat("\n$ Bootstrapped.ValP: ", "The matrix of bootstrapped eigenvalues.")
  cat("\n",rep("-", ndash), sep = "")
  cat("\n")
  invisible(x)
} # end of function print.Inference4CA ----
# ____________________________________________________________________
#___________________________________________________________
#********************  End of Functions Here ***************
#***********************************************************
HerveAbdi/data4PCCAR documentation built on Sept. 11, 2022, 4:19 p.m.