#' Within matrix decomposition for repeated measurements (cross-over design)
#'
#' This function is internally called by \code{pca}, \code{pls}, \code{spls},
#' \code{plsda} and \code{splsda} functions for cross-over design data, but can
#' be called independently prior to any kind of multivariate analyses.
#'
#'
#' \code{withinVariation} function decomposes the Within variation in the
#' \eqn{X} data set. The resulting \eqn{Xw} matrix is then input in the
#' \code{multilevel} function.
#'
#' One or two-factor analyses are available.
#'
#' @param X numeric matrix of predictors. \code{NA}s are allowed.
#' @param design a numeric matrix or data frame. The first column indicates the
#' repeated measures on each individual, i.e. the individuals ID. The 2nd and
#' 3rd columns are to split the variation for a 2 level factor.
#' @return \code{withinVariation} simply returns the \eqn{Xw} within matrix,
#' which can be input in the other multivariate approaches already implemented
#' in mixOmics (i.e. spls or splsda, see \code{multilevel}, but also pca or
#' ipca).
#' @author Benoit Liquet, Kim-Anh Lê Cao, Benoit Gautier, Ignacio González,
#' Florian Rohart, AL J Abadi
#' @seealso \code{\link{spls}}, \code{\link{splsda}}, \code{\link{plotIndiv}},
#' \code{\link{plotVar}}, \code{\link{cim}}, \code{\link{network}}.
#' @references On multilevel analysis:
#'
#' Liquet, B., Lê Cao, K.-A., Hocini, H. and Thiebaut, R. (2012) A novel
#' approach for biomarker selection and the integration of repeated measures
#' experiments from two platforms. \emph{BMC Bioinformatics} \bold{13}:325.
#'
#' Westerhuis, J. A., van Velzen, E. J., Hoefsloot, H. C., and Smilde, A. K.
#' (2010). Multivariate paired data analysis: multilevel PLSDA versus OPLSDA.
#' \emph{Metabolomics}, \bold{6}(1), 119-128.
#' @keywords regression multivariate
#' @export
#' @examples
#' ## Example: one-factor analysis matrix decomposition
#' #--------------------------------------------------------------
#' data(vac18)
#' X <- vac18$genes
#' # in design we only need to mention the repeated measurements to split the one level variation
#' design <- data.frame(sample = vac18$sample)
#'
#' Xw <- withinVariation(X = X, design = design)
#' # multilevel PCA
#' res.pca.1level <- pca(Xw, ncomp = 3)
#'
#' # compare a normal PCA with a multilevel PCA for repeated measurements.
#' # note: PCA makes the assumptions that all samples are independent,
#' # so this analysis is flawed and you should use a multilevel PCA instead
#' res.pca <- pca(X, ncomp = 3)
#'
#' # set up colors for plotIndiv
#' col.stim <- c("darkblue", "purple", "green4","red3")
#' col.stim <- col.stim[as.numeric(vac18$stimulation)]
#'
#' # plotIndiv comparing both PCA and PCA multilevel
#' plotIndiv(res.pca, ind.names = vac18$stimulation, group = col.stim)
#' title(main = 'PCA ')
#' plotIndiv(res.pca.1level, ind.names = vac18$stimulation, group = col.stim)
#' title(main = 'PCA multilevel')
#'
withinVariation <-
function(X, design){
# need a matrix for matrix calculations
X = as.matrix(X)
rep.measures = factor(design[, 1])
factors = design[, -1, drop = FALSE]
if(any(summary(as.factor(rep.measures)) == 1))
stop("Multilevel analysis can only be performed when all samples are repeated at least once.", call. = FALSE)
# calculate the variation
# ---------------------------
# added condition for the spls case where the condition is not needed
# all we need is the rep.measures
if ((ncol(factors) == 0) | (ncol(factors) == 1))
{
message("Splitting the variation for 1 level factor.")
# save sample names for the output
indiv.names = rownames(X)
rownames(X) = as.character(rep.measures)
# compute the mean for each unique individual
# dealing with specific case with only one subject (leave one out case during prediction)
X.mean.indiv = matrix(apply(X, 2, tapply, rep.measures, mean, na.rm = TRUE), # to deal with only one subject
nrow = length(unique(rep.measures)), ncol = dim(X)[2], dimnames = list(levels(as.factor(rep.measures)), colnames(X)))
# fill the between matrix with those means
Xb = X.mean.indiv[as.character(rep.measures), ]
# compute the within matrix as a difference between the original data
# and the between matrix
Xw = X - Xb
# put dimnames
dimnames(Xw) = list(indiv.names, colnames(X))
} else { # for 2 levels split
message("Splitting the variation for 2 level factors.")
###### off set term
Xm = colMeans(X)
###### compute the mean within each subject
Xs = apply(X, 2, tapply, rep.measures, mean, na.rm = TRUE)
Xs = Xs[rep.measures, ]
# for the first factor
xbfact1 = apply(X, 2, tapply, paste0(rep.measures, factors[, 1]), mean, na.rm = TRUE)
xbfact1 = xbfact1[paste0(rep.measures, factors[, 1]), ]
# for the second factor
xbfact2 = apply(X, 2, tapply, paste0(rep.measures, factors[, 2]), mean, na.rm = TRUE)
xbfact2 = xbfact2[paste0(rep.measures, factors[, 2]), ]
#### fixed effect
# for the first factor
Xmfact1 = apply(X, 2, tapply, factors[, 1], mean, na.rm = TRUE)
Xmfact1 = Xmfact1[factors[, 1], ]
# for the second factor
Xmfact2 = apply(X, 2, tapply, factors[, 2], mean, na.rm = TRUE)
Xmfact2 = Xmfact2[factors[, 2], ]
# compute the within matrix
Xw = X + Xs - xbfact1 - xbfact2 + Xmfact1 + Xmfact2
Xw = sweep(Xw, 2, 2*Xm) # see formula in refernece Liquet et al.
# put dimnames
dimnames(Xw) = dimnames(X)
}
return(invisible(Xw))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.