#' Multiple-response Correspondence Analysis (MR-CA)
#'
#' @description This functions performs a multiple-response Correspondence Analysis (MR-CA) as defined in Mahieu, Schlich, Visalli, and Cardot (2021)
#'
#' @param data A data.frame of observations in rows whose first column is a factor (the categories) and subsequent columns are binary numeric or integer, each column being a response option
#' @param proj.row Optional. A contingency table with new categories to be projected as supplementary rows within the MR-CA space in rows and the same response options as data in columns
#' @param proj.row.obs A numeric vector whose length equals nrow(proj.row) and giving the number of observations within each projected rows. Useless if proj.row=NULL
#' @param proj.col Optional. A contingency table with new response options to be projected as supplementary columns within the MR-CA space in columns and the same categories as data in rows
#' @param ellipse Logical. Are confidence ellipses for the categories to be computed? Default is FALSE. See details
#' @param nboot Number of virtual datasets used in the total bootsrap procedure. Useless when ellipse=FALSE. See details
#' @param nbaxes.sig The number of significant axes retuned by \code{\link[MultiResponseR]{mr.dimensionality.test}}. By default, all axes are considered significant. Useless when ellipse=FALSE. See details
#'
#' @details
#' \itemize{
#' \item \strong{ellipse}: When ellipse=TRUE, confidence ellipses for the categories are computed using a total bootstrap procedure (Cadoret & Husson, 2013). \strong{nboot} virtual datasets are generated by randomly sampling with replacement response vectors within each category. A MR-CA is then performed on these virtual dataset and the resulting virtual configurations are adjusted to the actual configuration using Procrustes rotations accounting for \strong{nbaxes.sig} axes (Mahieu, Schlich, Visalli, & Cardot, 2021). Finally, for each category, a confidence ellipse is constructed using the position of its bootstrap replicates. The ellipses are plotted when using \code{\link[MultiResponseR]{plot.mrCA}} Pairwise total bootstrap tests as proposed in Castura et al. (2023) are also performed between the categories.
#' }
#'
#'
#' @return A list with the following elements:
#' \describe{
#' \item{eigen}{Eigenvalues and their corresponding percentages of inertia}
#' \item{row.coord}{Rows coordinates}
#' \item{col.coord}{Columns coordinates}
#' \item{proj.row.coord}{Projected rows coordinates}
#' \item{proj.col.coord}{Projected columns coordinates}
#' \item{svd}{Results of the singular value decomposition}
#' \item{bootstrap.replicate.coord}{Coordinates of the rotated bootstrap replicates}
#' \item{total.bootstrap.test.pvalues}{P-values of the pairwise total bootstrap tests}
#' }
#'
#' @references Mahieu, B., Schlich, P., Visalli, M., & Cardot, H. (2021). A multiple-response chi-square framework for the analysis of Free-Comment and Check-All-That-Apply data. Food Quality and Preference, 93.
#' @references Loughin, T. M., & Scherer, P. N. (1998). Testing for Association in Contingency Tables with Multiple Column Responses. Biometrics, 54(2), 630-637.
#' @references Cadoret, M., & Husson, F. (2013). Construction and evaluation of confidence ellipses applied at sensory data. Food Quality and Preference, 28(1), 106-115.
#' @references Castura, J. C., Varela, P., & Næs, T. (2023). Evaluation of complementary numerical and visual approaches for investigating pairwise comparisons after principal component analysis. Food Quality and Preference, 107.
#'
#'
#' @export
#'
#' @import stats
#' @import utils
#'
#' @examples
#' nb.obs=200
#' nb.response=5
#' nb.category=5
#' vec.category=paste("C",1:nb.category,sep="")
#' right=matrix(rbinom(nb.response*nb.obs,1,0.25),nb.obs,nb.response)
#' category=sample(vec.category,nb.obs,replace = TRUE)
#' dset=cbind.data.frame(category,right)
#' dset$category=as.factor(dset$category)
#'
#' res=mrCA(dset)
#'
#' plot(res)
mrCA=function(data,proj.row=NULL,proj.row.obs=NULL,proj.col=NULL,ellipse=FALSE,nboot=2000,nbaxes.sig=Inf){
classe=class(data)[1]
if (!classe%in%c("data.frame")){
stop("data must be a data.frame")
}
classe=class(data[,1])[1]
if (!classe%in%c("factor")){
stop("The first column of data must be factor")
}
if(!colnames(data)[1]%in%c("category","Category","produit","Produit","product","Product")){
stop("First column name must be category, Category, produit, Produit, product or Product")
}
colnames(data)[1]="cat"
data$cat=as.factor(as.character(data$cat))
for (j in 2:ncol(data)){
classe=class(data[,j])
if (!classe%in%c("numeric","integer")){
stop("Contingency data must be numeric or integer")
}
}
check.bin=unique(unlist(data[,2:ncol(data)]))
if (length(check.bin)>2){
warning("contingency data are not composed of only ones and zeros")
}else{
check.un=sum(check.bin==c(0,1))
check.deux=sum(check.bin==c(1,0))
if (check.un!=2 & check.deux!=2){
warning("contingency data are not composed of only ones and zeros")
}
}
verif.col=colSums(data[,2:ncol(data)])
if (any(verif.col==0)){
stop("Some columns are only zeros")
}
data=data[order(data$cat),]
rownames(data)=as.character(1:nrow(data))
cont=aggregate(.~cat,data,sum)
rownames(cont)=cont$cat
cont$cat=NULL
if (nbaxes.sig==1){
nbaxes.proc=2
}else if (nbaxes.sig==Inf){
nbaxes.proc=min((nrow(cont)-1),ncol(cont))
nbaxes.sig=min((nrow(cont)-1),ncol(cont))
}else if (nbaxes.sig==0){
stop("nbaxes.sig is equal to zero")
}else{
nbaxes.proc=nbaxes.sig
}
row.obs=table(data$cat)
nom.cat=names(row.obs)
row.obs=as.numeric(row.obs)
names(row.obs)=nom.cat
nplus=row.obs
nplusplus=sum(nplus)
redui=cont
fij=(nplus%o%colSums(redui))/nplusplus
std=((redui-fij)/sqrt(fij))/sqrt(nplusplus)
n=nrow(redui)
p=ncol(redui)
nb.axe=min(n-1,p)
udv=svd(std)
u=udv$u[,1:nb.axe,drop=FALSE]
vs=udv$d[1:nb.axe]
eig=vs^2
v=udv$v[,1:nb.axe,drop=FALSE]
rownames(u)=rownames(redui)
rownames(v)=colnames(redui)
marge.r=nplus/nplusplus
if (nb.axe==1){
dvs=vs
}else{
dvs=diag(vs)
}
row.coord=(u/sqrt(marge.r))%*%dvs
col.coord=v
if(!is.null(proj.row)){
classe=class(proj.row)[1]
if (!classe%in%c("matrix","data.frame")){
stop("proj.row must be a matrix or a data.frame")
}
if (ncol(proj.row)!=ncol(cont)){
stop("proj.row must have the same number of response options that data")
}
classe=class(proj.row.obs)[1]
if (!classe%in%c("numeric","integer")){
stop("proj.row.obs must be integer or numeric")
}
if (nrow(proj.row)!=length(proj.row.obs)){
stop("proj.row and proj.row.obs have not the same size")
}
unit.supp=proj.row/proj.row.obs
fij.sup=(colSums(redui))/nplusplus
mat.fij.sup=as.matrix(rep(1,nrow(proj.row)))%*%fij.sup
vec.sup=((unit.supp-mat.fij.sup)/sqrt(mat.fij.sup))
proj.row.coord=as.matrix(vec.sup)%*%v
rownames(proj.row.coord)=rownames(proj.row)
}
if(!is.null(proj.col)){
classe=class(proj.col)[1]
if (!classe%in%c("matrix","data.frame")){
stop("proj.col must be a matrix or a data.frame")
}
if (nrow(proj.col)!=nrow(cont)){
stop("proj.col must have the same number of categories that data")
}
c.supp=colSums(proj.col)/nplusplus
o.supp=proj.col/nplusplus
r.supp=nplus/nplusplus
e.supp=r.supp%o%c.supp
std.supp=(o.supp-e.supp)/sqrt(e.supp)
proj.col.coord=t(t(u)%*%as.matrix(std.supp)/vs)
rownames(proj.col.coord)=colnames(proj.col)
}
percent.eig=eig/sum(eig)*100
cum.percent.eig=cumsum(percent.eig)
mat.eig=cbind(eig,percent.eig,cum.percent.eig)
colnames(mat.eig)=c("eigenvalue","percentage of inertia","cumulative percentage of inertia")
name.dim=paste("Dim.",1:nrow(mat.eig),sep="")
rownames(mat.eig)=colnames(col.coord)=colnames(row.coord)=name.dim
if(!is.null(proj.row)){
colnames(proj.row.coord)=name.dim
}else{
proj.row.coord=NULL
}
if(!is.null(proj.col)){
colnames(proj.col.coord)=name.dim
}else{
proj.col.coord=NULL
}
colnames(u)=name.dim
names(vs)=name.dim
colnames(v)=name.dim
if (ellipse){
row.coord=row.coord[,1:nbaxes.proc,drop=FALSE]
col.coord=col.coord[,1:nbaxes.proc,drop=FALSE]
if(!is.null(proj.row)){
proj.row.coord=proj.row.coord[,1:nbaxes.proc,drop=FALSE]
}
if(!is.null(proj.col)){
proj.col.coord=proj.col.coord[,1:nbaxes.proc,drop=FALSE]
}
row.c=matrix(NA,max(table(data$cat)),nlevels(data$cat))
colnames(row.c)=levels(data$cat)
for (c in unique(data$cat)){
ou.c=which(data$cat==c)
row.c[1:length(ou.c),c]=ou.c
}
mySample=function(vec){
vec.retour=na.omit(vec)
if (length(vec.retour)>1){
vec.retour=sample(vec.retour,length(vec.retour),replace = TRUE)
}else{
vec.retour=vec.retour[1]
}
return(vec.retour)
}
myProcrustes=function(tofit,target,row.weights=rep(1,nrow(target)),scaling=FALSE){
if (!is.matrix(tofit) & !is.data.frame(tofit)){
stop("tofit must be a matrix or a data.frame")
}
if (!is.matrix(target) & !is.data.frame(target)){
stop("tofit must be a matrix or a data.frame")
}
X=as.matrix(tofit)
Y=as.matrix(target)
if (ncol(X)!=ncol(Y) | nrow(X)!=nrow(Y)){
stop("Dimension of tofit and target are different")
}
if (!is.logical(scaling)){
stop("scaling must be logical")
}
if (is.numeric(row.weights) | is.integer(row.weights)){
if (length(row.weights)!=nrow(target)){
stop("length(row.weights) must equal nrow(target)")
}
}else{
stop("class(row.weights) must be numeric or integer")
}
vecW=row.weights/sum(row.weights)
mX=t(as.matrix(vecW))%*%X
X=sweep(X,2,mX,"-")
mY=t(as.matrix(vecW))%*%Y
loc=mY
Y=sweep(Y,2,mY,"-")
matW=diag(row.weights/sum(row.weights)*nrow(target))
croise=t(X)%*%matW%*%Y
udv=svd(croise)
if (scaling){
dilat=sum(udv$d)/sum(diag(crossprod(X)))
}else{
dilat=1
}
H=udv$u%*%t(udv$v)
aligned=(dilat*X%*%H)
aligned=sweep(aligned,2,loc,"+")
return(aligned)
}
sortie=data.frame(cat=as.factor(rep(levels(data$cat),nboot)),matrix(0,nlevels(data$cat)*nboot,nbaxes.proc))
colnames(sortie)[2:ncol(sortie)]=colnames(row.coord)
vec.boot=seq(1,nrow(sortie),by=nlevels(data$cat))
pb=txtProgressBar(min=0,max = nboot,style=3)
for (boot in 1:nboot){
tirage=unlist(apply(row.c,2,mySample))
jdd.tirage=data[tirage,]
jdd.tirage=jdd.tirage[order(jdd.tirage$cat),]
rownames(jdd.tirage)=as.character(1:nrow(jdd.tirage))
nplus.tirage=table(jdd.tirage$cat)
nom.cat=names(nplus.tirage)
nplus.tirage=as.numeric(nplus.tirage)
names(nplus.tirage)=nom.cat
cont.tirage = aggregate(.~cat,jdd.tirage,sum)
rownames(cont.tirage)=as.character(cont.tirage$cat)
cont.tirage$cat=NULL
redui.tirage=cont.tirage
verif=colSums(cont.tirage)
vire = which(verif==0)
if(length(vire)!=0){
cont.tirage=cont.tirage[,-vire]
redui.tirage=redui.tirage[,-vire]
vire.extend=NULL
for (quel.vire in names(vire)){
ou.virer=which(colnames(jdd.tirage)==quel.vire)
vire.extend=c(vire.extend,ou.virer)
}
jdd.tirage=jdd.tirage[,-vire.extend]
}
nplusplus.tirage=sum(nplus.tirage)
fij.tirage=(nplus.tirage%o%colSums(redui.tirage))/nplusplus.tirage
std.tirage=((redui.tirage-fij.tirage)/sqrt(fij.tirage))/sqrt(nplusplus.tirage)
n.tirage=nrow(redui.tirage)
p.tirage=ncol(redui.tirage)
nb.axe.tirage=min(n.tirage-1,p.tirage)
udv.tirage=svd(std.tirage)
u.tirage=udv.tirage$u[,1:nb.axe.tirage]
vs.tirage=udv.tirage$d[1:nb.axe.tirage]
rownames(u.tirage)=rownames(redui.tirage)
marge.r.tirage=nplus.tirage/nplusplus.tirage
row.coord.tirage=(u.tirage/sqrt(marge.r.tirage))%*%diag(vs.tirage)
row.coord.tirage=row.coord.tirage[,1:nbaxes.proc]
rot = myProcrustes(row.coord.tirage, row.coord,nplus)
sortie[vec.boot[boot]:(vec.boot[boot]+nlevels(data$cat)-1),2:ncol(sortie)]=rot
setTxtProgressBar(pb,boot)
}
sortie$cat=as.factor(sortie$cat)
toellipse=sortie
rownames(toellipse)=as.character(1:nrow(toellipse))
coord.boot=toellipse[,1:(nbaxes.sig+1)]
dim.sig=nbaxes.sig
les.prod=unique(coord.boot$cat)
diff.test=as.data.frame(matrix(1,length(les.prod),length(les.prod)))
colnames(diff.test)=rownames(diff.test)=les.prod
for(i in 1:(nrow(diff.test)-1)){
for (j in (i+1):ncol(diff.test)){
p.1=rownames(diff.test)[i]
p.2=colnames(diff.test)[j]
coord.p1=coord.boot[coord.boot$cat==p.1,,drop=FALSE]
coord.p2=coord.boot[coord.boot$cat==p.2,,drop=FALSE]
delta=as.matrix(coord.p1[,-1,drop=FALSE]-coord.p2[,-1,drop=FALSE])
mu=colMeans(delta)
sigma=cov(delta)
calc.mal.sq=function(vec){
mal.bary=t(as.matrix(vec-mu))%*%solve(sigma,tol=1e-300)%*%(as.matrix(vec-mu))
return(as.numeric(mal.bary))
}
mal.sq.cloud=apply(as.matrix(delta),1,calc.mal.sq)
mal.sq.origin=as.numeric(t(as.matrix(rep(0,length(mu))-mu))%*%solve(sigma)%*%(as.matrix(rep(0,length(mu))-mu)))
pvalue.test=(sum(mal.sq.cloud>=mal.sq.origin)+1)/(nrow(delta)+1)
diff.test[p.1,p.2]=pvalue.test
diff.test[p.2,p.1]=pvalue.test
}
}
}else{
toellipse=NULL
diff.test=NULL
}
back=list(eigen=mat.eig,row.coord=row.coord,col.coord=col.coord,proj.row.coord=proj.row.coord,proj.col.coord=proj.col.coord,svd=list(u=u,vs=vs,v=v),bootstrap.replicate.coord=toellipse,total.bootstrap.test.pvalues=diff.test)
class(back)=c("mrCA","list")
return(back)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.