R/CMM1.R

Defines functions margmodfit tablesize modeldf tomodelform tomargmodform printThetaAndBeta tocatlabels printadvancedstatistics printbasicstatistics printalgorithmdetails pds dphi phi gfunction Gfunction RecordsToFrequencies augmentData JoinModels SpecifyCoefficient MultiCoefficient spec.ordinal.disp.A spec.ordinal.disp.L spec.ordinal.loc.A spec.ordinal.loc.L unitvec spec.logOR spec.loglinearpars spec.gamma spec.kendalltau spec.concdiscprobs spec.hij spec.allmokkenhj spec.mokkenh spec.cronbachalpha allPatterns spec.cohenkappa spec.uncertaintycoefficient spec.goodmankruskaltau spec.diffprop spec.correlation spec.covariance spec.standarddeviation spec.variance spec.diversityindex spec.entropy spec.ginimeandifference spec.mean spec.condprob spec.logprob spec.prob betamat ConstraintMatrix MarginalMatrix EMarginalMatrix DesignMatrix desmat1 Margins allsubsets allsubsets1 allnsubsets allnsubsets1

Documented in ConstraintMatrix DesignMatrix JoinModels MarginalMatrix Margins RecordsToFrequencies SpecifyCoefficient

# Code file for CMM Version 1
#
# R programme for the book:
#
# Marginal models
# Models for Dependent, Clustered, and Longitudinal Categorical Data
# www.cmm.st
# 
# Wicher P. Bergsma
# Marcel Croon
# Jacques A. Hagenaars
#
# Springer, Statistics for the Social Sciences
#
# This R-programme is written by Wicher Bergsma and Andries van der Ark, 2009; updated 2023
#
##############################################################################
##############################################################################
##############################################################################
#
# List of main functions
#
#   Fitting:
#
#       MarginalModelFit( n, model ) (TO DO: Method="MDI")
#       SampleStatistics( n, coeff ) 
#       ModelStatistics( n, m, model, coeff )
#
#   Matrix functions:
#
#       MarginalMatrix( var, marg, dim )
#       DesignMatrix( var, marg, dim )
#       ConstraintMatrix( var, marg, dim )
#       DirectSum( mat1, mat2 )
# 
#
#   Specification of coefficients and models
#
#       SpecifyCoefficient( name, parameters, rep )  (TO DO: "KendallTau" etc)
#       JoinModels( model1, model2 )
#
#
#   Data formatting:
# 
#       RecordsToFrequencies(dat, dim )
# 
##############################################################################
##############################################################################
##############################################################################
##############################################################################
##############################################################################
##############################################################################
# matrix functions: desmat,MarginalMatrix


##############################################################################
# DirectSum (modified from web), takes variable no of args

DirectSum <- function (...){
     p.tr = 0;p.ll = 0;   ## p.tr and p.ll are padding values: originally given as optional args to function
     matlist = list(...);
     nmat = length(matlist);
     m1 = matlist[[1]];
     matlist = if(nmat==1 && is.list(m1)) m1 else matlist # check if list of matrices is given and amend accordingly
     nmat = length(matlist);                              # ,,
     m1 = matlist[[1]];                                   # ,,
     if(nmat==1) return(m1);
     for(i in 2:nmat){ 
        m2 = matlist[[i]];
        topleft <- m1
        topright <- matrix(p.tr, nrow(m1), ncol(m2))
        colnames(topright) <- colnames(m2)
        lowleft <- matrix(p.ll, nrow(m2), ncol(m1))
        lowright <- m2
        m1 = rbind(cbind(topleft, topright), cbind(lowleft, lowright))
     }
     m1
} 

#a=matrix(1:12,3,4);b=matrix(rep(2,6),3,2);DirectSum(a);



##############################################################################
#subsets

#a recursive function returning a list of all subsets of size n of list elements.
#used for function allsubsets
allnsubsets1<-function(x,n,index=1){
   rtn<-list()
   if(length(x)<n){return(NULL)}
   if(length(x)==n){return(list(x))}
   if(length(x)>n){
      #trigger recursion
      for(i in 1:length(x)){
         if(i>=index){
            rtn<-c(rtn,allnsubsets1(x[-i],n,i))
         }
      }   
   }
   return(rtn)
} 
allnsubsets=function(x,n,index=1){rev(allnsubsets1(x,n,index))}

#allnsubsets(c(1,2,3,4),2)

#finds all subsets of a set or a list of sets
allsubsets1 <- function(x){
   if ( data.class(x) == "NULL" ) { subsets <- c() }
   else
   if (data.class(x)=="numeric" || data.class(x)=="character"){
      n=length(x)
      subsets=allnsubsets(x,1)
      if (n>1) for(i in 2:n) subsets=c(subsets,allnsubsets(x,i))
   }
   else{
      subsets=allsubsets1(x[[1]])
      k=length(x)
      if (k>1) for(i in 2:k) subsets=union(subsets,allsubsets1(x[[i]]))
   }
   return(subsets)
} 

allsubsets = function(x){c("NULL",allsubsets1(x))}

#allsubsets(list(c(1,2),c(2,3)))

#allsubsets(c(1,2,3))
#allsubsets(list(c(1,2,3)))
#allsubsets(list(c(1,2),c(2,3),c(1,4)))

#############
# AA: Nieuwe export functie om 'marg' makkelijker te maken bij veel variabelen.
#     allnsubsets() is al een interne functie van cmm
Margins <- function(var, k){ 
  rtn <- NULL 
  for (idx in k) if (idx == 0L) rtn <- append(rtn, 0) else rtn <- append(rtn, as.list(as.data.frame(combn(length(var), idx))))
  return(unname(rtn))
} 

#############


##############################################################################
#MarginalMatrix and DesignMatrix


desmat1 = function(var, subvar, dim, coding ){

   #(*lmat2 produces kxk-matrix*)     ## AA 02-05-2023: produceert een "? x k"matrix (bv bij 'quintic' is het een 5 x k matrix) 
   lmat2 = function(type,k){
      x = c(1:k) - mean(c(1:k));
      switch( type,
         "Identity"  = diag(k),
         "Nominal"   = rbind(diag(k)[1:k-1,]),
         "Linear"    = rbind(x),
         "Quadratic" = rbind(x,c(1:k)^2),
         "Cubic"     = rbind(x,c(1:k)^2,c(1:k)^3),
         "Quartic"   = rbind(x,c(1:k)^2,c(1:k)^3,c(1:k)^4),
         "Quintic"   = rbind(x,c(1:k)^2,c(1:k)^3,c(1:k)^4,c(1:k)^5)
         )
   }
   
   nvar=length(var);
   coding = if( is.character(coding)&&length(coding)==1 ) rep(coding,length(subvar)) else coding;
   coding = lapply( var, function(x){if(is.element(x,subvar)) coding[subvar==x][[1]] else rbind(rep(1,dim[var==x]))} );

   matlist = list();
   for(i in 1:nvar){
      matlist[[i]] = 
         if      (is.vector(coding[[i]])&&!is.character(coding[[i]])) rbind(coding[[i]])
         else if (is.matrix(coding[[i]])&&!is.character(coding[[i]])) coding[[i]]
         else if (is.element(var[[i]],subvar))                        lmat2(coding[[i]],dim[[i]])
         else                                                         rbind(rep(1,dim[[i]]))
   }
   mat=matlist[[1]];
   if(nvar>1) for (i in 2:nvar){mat = kronecker(mat,matlist[[i]])}

   return(t(mat))
}

#desmat1(c(1,2),c(1,2),c(3,3),c("Nominal","Quintic"))
#desmat1(c(1,2),c(1,2),c(3,3),list("Nominal",rbind(c(1,2,3))))
#desmat1(c(1,2),c(1,2),c(3,3),c("Nominal","Quintic"))
#desmat1(c("A","B"),c("A","B"),c(3,3),c("Nominal","Quintic"))
#desmat1(c("A","B"),c(),c(2,2),c("Nominal"))
#desmat1(c("A"),c("A"),c(2),c("Nominal"))


DesignMatrix = function(var, suffconfigs, dim, SubsetCoding="Automatic", MakeSubsets=TRUE){
   suffconfigs = if(is.list(suffconfigs)) suffconfigs else list(suffconfigs)
   marglist = if(MakeSubsets) allsubsets(suffconfigs) else suffconfigs;
   nmarg=length(marglist);
   
   #put in standard form
   q = SubsetCoding[[1]];
   SubsetCoding = if( is.character(q) || is.numeric(q) ) list(SubsetCoding) else SubsetCoding

   nsubs = length(SubsetCoding);

   #coding gives specification for each marginal
   coding = 
      if( isTRUE(SubsetCoding=="Automatic") ) rep("Nominal",nmarg)   
      else if( is.character(SubsetCoding[[1]]) && length(SubsetCoding)==1 && length(SubsetCoding[[1]])==1) rep(SubsetCoding,nmarg) 
      else lapply(marglist, function(x){ 
         for(i in 1:nsubs) {
            ss1 = SubsetCoding[[i]][[1]];
            if(length(x)==length(ss1)) if(isTRUE(prod(x==ss1)==1)) return(SubsetCoding[[i]][[2]])
            }
         return("Nominal")
         }  )
   deslist = list();
   for(i in 1:nmarg) deslist[[i]] = desmat1(var,marglist[[i]],dim,coding[[i]])
   des = deslist[[1]];
   if(nmarg>1) for(i in 2:nmarg) des = cbind(des,deslist[[i]])
   return(des)
}
#DesignMatrix(c("P", "R"), list(c("P","R")), c(3,3), SubsetCoding = list(c("P", "R"),list("Linear", rbind(c(1,1,0),c(1,1,0),c(0,0,1)))))
#DesignMatrix(c("P", "R"), list(c("P","R")), c(3,3), SubsetCoding = list(c("P", "R"),list("Linear", rbind(c(1,1,0),c(1,1,0),c(0,0,1)))))
#DesignMatrix(c("A","B"),list(c("A"),c("B")),c(2,2))
#DesignMatrix(c(1,2,3),list(c(1,2),c(2,3)),c(3,3,3),SubsetCoding=list(list(c(1,2),c("Linear","Linear")),list(c(1,2),c("Linear","Linear"))))
#t(DesignMatrix(c(1,2),c(1,2),c(3,3),SubsetCoding="Identity",MakeSubsets=FALSE))

#################
# AA: EMarginalMatrix: Hulpfunctie voor MarginalMatrix() bij MEL of MAEL.

EMarginalMatrix <- function(nLab, var, marg, dim){
  dim <- lapply(dim, function(x) 1:x)
  cMatch <- function(strng, positions, criteria) all(unlist(strsplit(strng, NULL))[positions] == criteria)
  D <- NULL; i <- 1; j <- 1
  for (i in 1:length(marg)){
    if(length(marg[[i]]) == 1 & any(marg[[i]] == 0L)) { 
        newD <- matrix(rep(1, length(nLab)), nrow = 1)
        name <- rep("+", length(var))
        dimnames(newD) <- list(paste(name, collapse = ""), nLab)
        D <- rbind(D, newD)
    } else {
       cValues <- expand.grid(dim[marg[[i]]])
       if(ncol(cValues) > 1) cValues <- cValues[, ncol(cValues):1]
       cPositions <- marg[[i]]
       for (j in 1:nrow(cValues)){ 
         newD <- matrix(sapply(nLab, cMatch, cPositions, cValues[j,]) * 1, nrow = 1) 
         name <- rep("+", length(var))
         name[marg[[i]]] <- cValues[j, ]
         dimnames(newD) <- list(paste(name, collapse = ""), nLab)
         D <- rbind(D, newD)
       }
    }   
  }
  if(any(apply(D,1,sum)== 0)) warning("Some margins were not observed")
  return(D[apply(D,1,sum)!=0,])
}

#################
# AA: MarginalMatrix has been slightly adjusted. 
#     Argument 'vec' has been added. If is.null(vec) the traditional MarginalMatrix is built, 
#     if vec = is the frequency vector with rownames equal to the score patterns, then vec is used for building the MarginalMatrix.
 
MarginalMatrix = function(var, marg, dim, SubsetCoding = "Identity", vec = NULL){
     if (is.null(vec)){
      if (prod(dim) > 100000) warning("Marginal matrix is very large. Please consider using reduced-cells estimation method MAEL")
      t(DesignMatrix(var, marg, dim, SubsetCoding = SubsetCoding, MakeSubsets = FALSE))
     } else {
     if (!inherits(vec, "matrix")) stop("Argument 'vec' must be of class 'matrix'.")
     if (is.null(dimnames(vec)[[1]])) stop("Argument 'vec' must have dimnames representing the variable scores.")
     EMarginalMatrix(dimnames(vec)[[1]], var, marg, dim)  
  }
}    

#MarginalMatrix(var, 1)
#MarginalMatrix(c(1,2),list(c(1),c(2)),c(3,3))
#MarginalMatrix(c(1,),list(c(1),c(2)),c(3,3))

newnull <- function (M) {
   tmp <- qr(M)
   set <- if (tmp$rank == 0) 1:ncol(M) else -(1:tmp$rank)
   qr.Q(tmp, complete = TRUE)[, set, drop = FALSE]
}


#returns hierarchical model constraint matrix, ie nullspace of design matrix
ConstraintMatrix = function(var,suffconfigs,dim, SubsetCoding="Automatic"){
   return(t(newnull(DesignMatrix(var,suffconfigs,dim,SubsetCoding))))
   }

#ConstraintMatrix(c(1,2),list(c(1),c(2)),c(2,2))

#compute the matrix for obtaining the beta parameters under different coding schemes

betamat = function(var, subvar, dim, coding){

   #lmat1 produces kx1-vector, lmat2 produces kxk-matrix

   polmat = function(x,k){
      powmat = rbind(rep(1,k));
      if(k>1) for(i in 1:(k-1)) powmat = rbind(powmat, x^i - mean(x^i))
      invmat = solve(t(powmat))
      return(rbind(invmat[2:k,],rep(0,k)))     }
   
   lmat1 = function(type2, k){
      x = if(length(type2)>1) type2[[2]] else NULL
      type = if(length(type2)>1) type2[[1]] else type2 #second arg is constant for dummy coding, vector for polynomial
      switch( type,
         "Effect"     = rbind(rep(1/k,k)),
         "Simple"     = rbind(rep(1,k)),
         "Dummy"      = {if(length(x)==0) x=1; rbind(diag(k)[x,])}, 
         "Polynomial" = rbind(rep(1,k))
      ) }  

   lmat2 = function(type2,k){
      x = if(length(type2)>1) type2[[2]] else NULL
      type = if(length(type2)>1) type2[[1]] else type2 #second arg is constant for dummy coding, vector for polynomial
      switch( type,
         "Effect"     = diag(k)-1/k,
         "Simple"     = diag(k),
         "Dummy"      = {if(length(x)==0) x=1; diag(k) - t(matrix(rep(diag(k)[x,],k),k,k))},
         "Polynomial" = {if(length(x)==0) x=c(1:k)-(k+1)/2; polmat(x,k)}
         ) }
         
   nvar = length(var);
   coding = if( is.character(coding)&&length(coding)==1 ) as.list(rep(coding,nvar)) else coding;

   matlist = list();
   for( i in 1:nvar ){matlist[[i]] = if(is.element(var[[i]],subvar)) lmat2(coding[[i]],dim[[i]]) else lmat1(coding[[i]],dim[[i]]) }

   mat = matlist[[1]] 
   if(nvar==1) mat else for(i in 2:nvar) mat = kronecker(mat,matlist[[i]])

   return(mat)

   }

#betamat(c(1,2),c(1,2),c(2,2),list(list("Dummy",2),"Effect"))
#betamat(c(1,2),c(1,2),c(3,3),"Effect")
#betamat(c(1,2),c(1,2),c(2,2),list(list("Polynomial",c(-2,2)),"Polynomial"))


#   
#   lmat2["Polynomial", k_] := 
#   lmat2[{"Polynomial", Range[k] - (k + 1)/2}, k];


###############################################################################################
###############################################################################################
###############################################################################################
# specification of coefficients


#probabilities in honogeneous form
spec.prob <- function(dim){
  k = prod(dim);
  id <- diag(rep(1,k))
  one <- t(rep(1,k))
  m1 <- rbind(id,one)
  m2 <- t(rbind(id,-one))
  matlist <- list(id,m2,m1)
  funlist <- list("exp","log","identity")
  coeff <- list(matlist, funlist)
  return(coeff)
}

#log probabilities in honogeneous form
spec.logprob <- function(dim){
  k = prod(dim);
  id <- diag(rep(1,k))
  one <- t(rep(1,k))
  m1 <- rbind(id,one)
  m2 <- t(rbind(id,-one))
  matlist <- list(m2,m1)
  funlist <- list("log","identity")
  coeff <- list(matlist, funlist)
  return(coeff)
}


spec.condprob <- function(z){  # z=(var,condvar,dim)
  var = z[[1]];
  condvar = z[[2]];
  dim = z[[3]];
  ident = diag(prod(dim));
  at1 = MarginalMatrix(var,condvar,dim);
  at1 = t(at1) %*% at1;
  at1 = rbind(ident,at1);
  at2 = cbind(ident,-ident);
  at3 = ident;
  matlist = list(at3, at2, at1);
  funlist = list("exp", "log", "identity");
  list(matlist, funlist)
}
  
#spec.condprob(list(c(1,2),c(1),c(2,2)))



spec.mean <- function(scores){
   at <- t(matrix(scores))
   prob <- spec.prob(length(scores))
   matlist <- c(list(at), prob[[1]])
   funlist <- c(list("identity"), prob[[2]])
   coeff <- list(matlist, funlist)
   return(coeff)
}


spec.ginimeandifference <- function(scores){
   k <- length(scores)
   q=diag(k)
   m1=matrix(nrow=k^2,ncol=k)
   for(i in 1:k) for(j in 1:k) m1[(i-1)*k + j,] = q[i,] + q[j,]
   m2=matrix(nrow=1,ncol=k^2)
   for(i in 1:k) for(j in 1:k) m2[,(i-1)*k + j] = abs(scores[i] - scores[j])
   prob <- spec.prob(k)
   matlist <- c( list( m2, m1 ), prob[[1]] )
   funlist <- c(list( "exp", "log" ), prob[[2]])
   coeff <- list(matlist, funlist)
   return(coeff)
}

spec.entropy <- function(k){
   at <- t(matrix(rep(1,k)))
   prob <- spec.prob(k)
   matlist <- c(list(at), prob[[1]])
   funlist <- c(list("xlogx"), prob[[2]])
   coeff <- list(matlist, funlist)
   return(coeff)
}


spec.diversityindex <- function(k){
   at <- t(matrix(rep(1,k)))
   prob <- spec.prob(k)
   matlist <- c(list(at), prob[[1]])
   funlist <- c(list("xbarx"), prob[[2]])
   coeff <- list(matlist, funlist)
   return(coeff)
}


spec.variance <- function(scores){
   newscores <- scores - min(scores) + 1
   m1 <- rbind(newscores^2, newscores, rep(1,length(scores)))
   m2 <- rbind(c(1, 0, -1),c(0, 2, -2))
   m3 <- t(matrix(c(1,-1)))
   matlist <- list(m3, m2, m1)
   funlist <- list("exp", "log", "identity")
   coeff <- list(matlist, funlist)
   return(coeff)
}

spec.standarddeviation <- function(scores){
   varspec <- spec.variance(scores)
   m1 <- t(matrix(c(1)))
   matlist <- c(list(m1), varspec[[1]])
   funlist <- c(list("sqrt"), varspec[[2]])
   coeff <- list(matlist, funlist)
   return(coeff)
}

#coeff=spec.standarddeviation(c(1,2))
#gfunction(c(2,2),coeff)

spec.covariance <- function(xyscores){
   xscores <- xyscores[[1]]
   xscores <- xscores - min(xscores) + 1
   yscores <- xyscores[[2]]
   yscores <- yscores - min(yscores) + 1
   k <- length( xscores )
   l <- length( yscores )
   m1a <- MarginalMatrix( c(1,2), list(c(1,2),c(1),c(2)), c(k,l) )
   m1b1 <- as.vector( as.vector(xscores) %o% as.vector(yscores) )
   m1b1 <- rbind( rep( 1, k*l ), m1b1 )
   m1b2 <- DirectSum( rbind(xscores), rbind(yscores) )
   m1b <- DirectSum( m1b1, m1b2 )
   m1 <- m1b %*% m1a
   m2 <- rbind(c(1, 1, 0, 0),c(0, 0, 1, 1))
   m3 <- t(matrix(c(1,-1)))
   matlist <- list(m3, m2, m1)
   funlist <- list("exp", "log", "identity")
   list(matlist, funlist)
}

   
spec.correlation <- function(xyscores){
   xscores <- xyscores[[1]]
   xscores <- xscores - min(xscores) + 1
   yscores <- xyscores[[2]]
   yscores <- yscores - min(yscores) + 1
   k <- length( xscores )
   l <- length( yscores )
   m1a <- MarginalMatrix( c(1,2), list(c(1,2),c(1),c(2)), c(k,l) )
   m1b1 <- as.vector( as.vector(xscores) %o% as.vector(yscores) )
   m1b1 <- rbind( rep( 1, k*l ), m1b1 )
   m1b2 <- DirectSum( rbind(xscores,xscores^2), rbind(yscores,yscores^2) )
   m1b <- DirectSum( m1b1, m1b2 )
   m1 <- m1b %*% m1a
   m2 <- rbind(c(1, 1, 0, 0, 0, 0), c(0, 0, 1, 0, 1, 0), c(0, 0, 0, 1, 0, 0), c(0, 0, 2, 0, 0, 0), c(0, 0, 0, 0, 0, 1), c(0, 0, 0, 0, 2, 0) )
   m3 <- rbind( c(1, 0, 0, 0, 0, 0), c(0, 1, 0, 0, 0, 0), c(0, 0, 1, 0, -1, 0), c(0, 0, 0, 1, 0, -1) )
   m4 <- rbind( c(1, 0, -.5, -.5), c(0, 1, -.5, -.5) )
   m5 <- rbind( c(1, -1) )
   matlist <- list( m5, m4, m3, m2, m1 )
   funlist <- list( "exp", "log", "exp", "log", "identity" )
   list(matlist, funlist)
}

#SpecifyCoefficient("Correlation",list(c(1,2),c(1,2)))

spec.diffprop <- function(c,Response="Columns",DropLast=FALSE){
   id1 <- diag(c)
   id2 <- diag(2*c)
   
   m1b = if(Response == "Columns") MarginalMatrix(c(1, 2), c(1), c(2,c)) else MarginalMatrix(c(1, 2), c(2), c(c,2))
   m1b = t(m1b) %*% m1b;
   m1 = rbind(id2, m1b);

   m2 = cbind(id2, -id2);
   m3 = cbind(id1, -id1);
   m3 = if(DropLast) m3[1:dim(m3)[1]-1,] else m3
   list( list(m3, m2, m1), list("exp", "log", "identity"))
}

#spec.diffprop(2,Response="Rows",DropLast=TRUE)

spec.goodmankruskaltau <- function(rc,Response="Columns"){
   r=rc[[1]];
   c=rc[[2]];

   id = diag(r*c);
   idc = diag(c);
  
   m1a = MarginalMatrix(c(1,2),c(1),c(r,c))
   m1b = MarginalMatrix(c(1,2),c(2),c(r,c))
   m1 = if(Response=="Columns") rbind(id,t(m1a) %*% m1a,m1b) else rbind(id,t(m1b) %*% m1b,m1a)

   z1=matrix(0,r*c,c);
   z2=matrix(0,r*c,r*c);
   m2 = rbind(cbind(2*id, -id, z1), cbind(id, z2, z1), cbind(t(z1), t(z1), 2*idc));

   m3a = matrix(1,1,r*c);
   m3b = matrix(1,1,c);
   zero= matrix(0,1,r*c);
   m3 = rbind(cbind(m3a, zero, -m3b), cbind(zero, m3a, -m3b));

   m4 = matrix(c(1,-1),nrow=1,ncol=2);
   m5 = matrix(c(1),nrow=1,ncol=1);

   funlist = list("exp", "log", "exp", "log", "identity");
   matlist = list(m5, m4, m3, m2, m1);

   prob <- spec.prob(r*c)
   matlist <- c(matlist, prob[[1]])
   funlist <- c(funlist, prob[[2]])
   coeff <- list(matlist, funlist)
   return(coeff)
}

#coeff=spec.goodmankruskaltau(3,2)
#gfunction(c(4,2,6,4,6,7),coeff)


spec.uncertaintycoefficient <- function(rc){
   r=rc[[1]];
   c=rc[[2]];

   m1 = MarginalMatrix(c(1,2),list(c(1, 2),c(1),c(2)), c(r,c));
   m2rc = matrix(1,1,r*c);
   m2r = matrix(1,1,r);
   m2c = matrix(1,1,c);
   zrc= matrix(0,1,r*c);
   zr = matrix(0,1,r);
   m2 = rbind(cbind(m2rc, -m2r, -m2c), cbind(zrc, zr, -m2c));
   m3 = matrix(c(1,-1),nrow=1,ncol=2);
   m4 = matrix(c(1),nrow=1,ncol=1);

   funlist = list("exp","log","xlogx","identity");
   matlist = list(m4, m3, m2, m1);

   prob <- spec.prob(r*c)
   matlist <- c(matlist, prob[[1]])
   funlist <- c(funlist, prob[[2]])
   coeff <- list(matlist, funlist)
   return(coeff)
}


spec.cohenkappa <- function(k){
   m1a = c(diag(k));
   m1b = MarginalMatrix(c(1,2), list(c(),c(1),c(2)), c(k,k));
   m1one = matrix(1,1,k*k);
   m1 = rbind(m1a, m1b, m1one);
   m2a = DirectSum(DirectSum(matrix(1,1,1),matrix(1,1,1)),MarginalMatrix(c(1,2),c(2),c(2,k)));
   m2b = apply(m2a,1,sum);
   m2 = cbind(m2a,-m2b);
   m3 = DirectSum(matrix(1,1,1), rbind(c(0,rep(1, k)), c(1,rep(-1, k))));
   m4 = rbind(c(1, 0, -1), c(0, 1, -1));
   m5 = matrix(c(1,-1),nrow=1,ncol=2);
   list(list(m5, m4, m3, m2, m1),list("exp","log","exp","log","identity"))
}

#coeff=spec.cohenkappa(2)
#gfunction(c(4,2,6,4),coeff)

allPatterns <- function(J,m){
  grid <- list()
  j <- 0;
  p <- m^J
  for (j in 1:J){
    grid <- c(grid, j)
    grid[[j]] <- 0:(m-1)
  }
  X <- t(expand.grid(grid))
  dimnames(X) <- NULL
  return(X[J:1,])
}


spec.cronbachalpha <- function(arg, data){
# arg = list(
#         list(items alpha 1, items alpha 2, ..., items alpha k),
#         list(    g alpha 1,     g alpha 2, ...,     g alpha k),
#         list(group alpha 1, group alpha 2, ..., group alpha k)
#       )
# 
  eps <- 1e-5
# Functions for matrices filled with ones (U) and zeroes (O)
  U <- function(a,b) matrix(1,a,b)
  O <- function(a,b) matrix(0,a,b)
# Function converts string to integer
  string2integer <- function(s) as.numeric(unlist(strsplit(s,NULL)))
# Function modified DirectSum
  direct.sum <- function(L){
    if (length(L)==1) return(L[[1]])
    C <- L[[1]]
    for (i in 2:length(L)) C <- DirectSum(C,L[[i]])
    return(C)
  }

  if(is.null(arg)) stop("'arg' not specified")
  if(!is.list(arg)) stop("'arg' is not a list")
  if(!is.list(arg[[1]])) stop("First element of 'arg' is not a list")
  method <- ifelse(is.null(data),1,2)  
  rep <- length(arg[[1]])
  mats <- list()
  for (i in 1:5)  mats[[i]] <- list()
  for (i in 1:rep){
    items <- arg[[1]][[i]]
    J <- length(items)
    g <- arg[[2]][[i]]
    groupVar <- NULL
    if(length(arg)==3) groupVar <- 1
    if (method==1){
      R <- allPatterns(J,g)
    } else {
      if(is.null(groupVar)) Y <- data[,items] else Y <- data[data[,groupVar]==i,items]
      if(any(apply(Y,2,var) < eps))stop("One or more variables have zero variance")  
      nLab <- matrix(names(table(apply(Y,1,paste, collapse=""))))
      R <- apply(nLab,1,string2integer)
    }
    S <- U(1,J) %*% R
    C5 <- rbind(R,S,R^2,S^2,U(1,ncol(R)))
    C4 <- rbind(
            cbind(O(J+1,J+1),  diag(J+1), U(J+1,1)),
            cbind(diag(2,J+1), O(J+1,J+1),O(J+1,1))
          )
    C3 <- rbind(
           cbind(U(1,J), 0,      -U(1,J), 0),
           cbind(O(2,J), U(2,1),  O(2,J), -U(2,1))
          )
    C2 <- matrix(c(0,1,1,-1,-1,0),nrow=2)
    C1 <- matrix(c(J/(J-1),-J/(J-1)),nrow=1)
    mats[[1]][[i]] <- C1
    mats[[2]][[i]] <- C2
    mats[[3]][[i]] <- C3
    mats[[4]][[i]] <- C4
    mats[[5]][[i]] <- C5
  }
  C1 <- direct.sum(mats[[1]])   
  C2 <- direct.sum(mats[[2]])   
  C3 <- direct.sum(mats[[3]])   
  C4 <- direct.sum(mats[[4]])   
  C5 <- direct.sum(mats[[5]])   
  matlist = list(C1,C2,C3,C4,C5)
  funlist = list("exp","log","exp","log","identity")
  return(list(matlist,funlist))
}

spec.mokkenh <- function(arg, data){
# arg = list(
#         list(items H 1, items H 2, ..., items H k),
#         list(    g H 1,     g H 2, ...,     g H k),
#         list(group H 1, group H 2, ..., group H k)
#       )
# 
  eps <- 1e-5
# Functions for matrices filled with ones (U) and zeroes (O)
  U <- function(a,b) matrix(1,a,b)
  O <- function(a,b) matrix(0,a,b)
# Function converts string to integer
  string2integer <- function(s) as.numeric(unlist(strsplit(s,NULL)))
# Function modified DirectSum
  direct.sum <- function(L){
    if (length(L)==1) return(L[[1]])
    C <- L[[1]]
    for (i in 2:length(L)) C <- DirectSum(C,L[[i]])
    return(C)
  }
# Function prp
  prp <- function(A,B){
    if(nrow(A)!=nrow(B) | ncol(A)!=ncol(B)) stop('A and B have unequal rows or columns')
    n <- nrow(A)
    C <- matrix(nrow=n*(n-1)/2,ncol=ncol(A))
    i <- 0; j <- 0
    for(i in (1:(n-1))) for(j in ((i+1):n)){
      k <- (i-1)*n - sum(0:i) + j
      C[k,] <- A[i,] * B[j,]
    }
    return(C)
  }

  if(is.null(arg)) stop("'arg' not specified")
  if(!is.list(arg)) stop("'arg' is not a list")
  if(!is.list(arg[[1]])) stop("First element of 'arg' is not a list")
  method <- ifelse(is.null(data),1,2)  
  rep <- length(arg[[1]])
  mats <- list()
  for (i in 1:5)  mats[[i]] <- list()
  for (i in 1:rep){
    items <- arg[[1]][[i]]
    J <- length(items)
    g <- arg[[2]][[i]]
    groupVar <- NULL
    if(length(arg)==3) groupVar <- 1
    if (method==1){
      R <- allPatterns(J,g)
    } else {
      if(is.null(groupVar)) Y <- data[,items] else Y <- data[data[,groupVar]==i,items]
      if(any(apply(Y,2,var) < eps))stop("One or more variables have zero variance")  
      nLab <- matrix(names(table(apply(Y,1,paste, collapse=""))))
      R <- apply(nLab,1,string2integer)
    }
    p <- J*(J-1)/2
  
    C5 <- rbind(U(1,ncol(R)),1-R,R,prp(1-R,R)) 
    Q1 <- O(p,2*J)
    for (ii in (1:(J-1))) for (jj in (ii+1):J) Q1[J*(ii-1) - ii*(ii+1)/2 + jj,c(ii,J+jj)] <- 1
    C4 <- DirectSum(U(1,1),Q1,diag(p))
    C3 <- DirectSum(U(2,1),U(1,p),U(1,p))
    C2 <- matrix(c(1,-1,0,0, 0, 1,-1,1),2,4,byrow=TRUE)
    C1 <- matrix(c(1,-1),1,2)
    mats[[1]][[i]] <- C1
    mats[[2]][[i]] <- C2
    mats[[3]][[i]] <- C3
    mats[[4]][[i]] <- C4
    mats[[5]][[i]] <- C5
  }
  C1 <- direct.sum(mats[[1]])   
  C2 <- direct.sum(mats[[2]])   
  C3 <- direct.sum(mats[[3]])   
  C4 <- direct.sum(mats[[4]])   
  C5 <- direct.sum(mats[[5]])   
  matlist = list(C1,C2,C3,C4,C5)
  funlist = list("exp","log","exp","log","identity")
  return(list(matlist,funlist))
}

spec.allmokkenhj <- function(arg, data){
# arg = list(
#         list(items Hj 1, items Hj 2, ..., items Hj k),
#         list(    g Hj 1,     g Hj 2, ...,     g Hj k), # SHOULD BE 2
#         list(group Hj 1, group Hj 2, ..., group Hj k)
#       )
# 
  eps <- 1e-5
# Functions for matrices filled with ones (U) and zeroes (O)
  U <- function(a,b) matrix(1,a,b)
  O <- function(a,b) matrix(0,a,b)
# Function converts string to integer
  string2integer <- function(s) as.numeric(unlist(strsplit(s,NULL)))
# Function modified DirectSum
  direct.sum <- function(L){
    if (length(L)==1) return(L[[1]])
    C <- L[[1]]
    for (i in 2:length(L)) C <- DirectSum(C,L[[i]])
    return(C)
  }
# Function prp
  prp <- function(A,B){
    if(nrow(A)!=nrow(B) | ncol(A)!=ncol(B)) stop('A and B have unequal rows or columns')
    n <- nrow(A)
    C <- matrix(nrow=n*(n-1)/2,ncol=ncol(A))
    i <- 0; j <- 0
    for(i in (1:(n-1))) for(j in ((i+1):n)){
      k <- (i-1)*n - sum(0:i) + j
      C[k,] <- A[i,] * B[j,]
    }
    return(C)
  }

  if(is.null(arg)) stop("'arg' not specified")
  if(!is.list(arg)) stop("'arg' is not a list")
  if(!is.list(arg[[1]])) stop("First element of 'arg' is not a list")
  method <- ifelse(is.null(data),1,2)  
  rep <- length(arg[[1]])
  mats <- list()
  for (i in 1:5)  mats[[i]] <- list()
  for (i in 1:rep){
    items <- arg[[1]][[i]]
    J <- length(items)
    g <- arg[[2]][[i]]
    groupVar <- NULL
    if(length(arg)==3) groupVar <- 1
    if (method==1){
      R <- allPatterns(J,g)
    } else {
      if(is.null(groupVar)) Y <- data[,items] else Y <- data[data[,groupVar]==i,items]
      if(any(apply(Y,2,var) < eps))stop("One or more variables have zero variance")  
      nLab <- matrix(names(table(apply(Y,1,paste, collapse=""))))
      R <- apply(nLab,1,string2integer)
    }
    p <- J*(J-1)/2
  
    C5 <- rbind(U(1,ncol(R)),1-R,R,prp(1-R,R)) 
    Q1 <- O(p,2*J)
    for (ii in (1:(J-1))) for (jj in (ii+1):J) Q1[J*(ii-1) - ii*(ii+1)/2 + jj,c(ii,J+jj)] <- 1
    C4 <- DirectSum(U(1,1),Q1,diag(p))
    Q2 <- t(Q1[,1:J] + Q1[,(J+1):(2*J)])

    C3 <- DirectSum(U(2,1),Q2,Q2)
    C2 <-  rbind(
        cbind(matrix(c(1,-1),1,2),O(1,2*J)),
        cbind(U(J,1),O(J,1),-diag(J),diag(J))
      )
    C1 <- cbind(U(J,1),diag(-1,J))
    mats[[1]][[i]] <- C1
    mats[[2]][[i]] <- C2
    mats[[3]][[i]] <- C3
    mats[[4]][[i]] <- C4
    mats[[5]][[i]] <- C5
  }
  C1 <- direct.sum(mats[[1]])   
  C2 <- direct.sum(mats[[2]])   
  C3 <- direct.sum(mats[[3]])   
  C4 <- direct.sum(mats[[4]])   
  C5 <- direct.sum(mats[[5]])   
  matlist = list(C1,C2,C3,C4,C5)
  funlist = list("exp","log","exp","log","identity")
  return(list(matlist,funlist))
}


spec.hij <- function(){
  cat("For computing Loevinger's scalability coefficients it is assumed that the items are ordered by there p-value;",fill=TRUE)
  cat("item 1 is the easiest/most popular item",fill=TRUE)
  R <- matrix(c(0,0,0,1,1,0,1,1),2,4)
  C3 <- rbind(matrix(1,2,4),1-R,R,matrix(c(0,1,0,0),1,4))
  # C3 . n = (n+++, n+++, n0++, n+0+, n++0, n1++, n+1+, n++1, n01+, n0+1, n+01)'
  C2 <- matrix(c(1,1,-1,0,0,-1,0,0,0,0,0,-1,0,1),2,7)
  # exp(C2. log(C1 .n)) = 1, 1-H12, 1-H13, 1-H14
  C1 <- matrix(c(1, -1),1,2)
  # C1 %*% exp(C2 %*% log(C3 %*% n))
  matlist = list(C1,C2,C3)
  funlist = list("exp","log","identity")
  return(list(matlist,funlist))
}


spec.concdiscprobs <- function(rc){
   r=rc[[1]];
   c=rc[[2]];

   m1a=matrix(0,(r-1)*(c-1),r*c);
   m1b=matrix(0,(r-1)*(c-1),r*c);
   m1c=matrix(0,(r-1)*(c-1),r*c);
   m1d=matrix(0,(r-1)*(c-1),r*c);
   for(i in 1:(r-1)) for(j in 1:(c-1)) for(k in (i+1):r) for(l in (j+1):c) {m1a[[(i-1)*(c-1)+j,(k-1)*c+l]]=1}
   for(i in 1:(r-1)) for(j in 1:(c-1)) for(k in (i+1):r) for(l in (j+1):c) {m1b[[(i-1)*(c-1)+j,(i-1)*c+j]]=1}
   for(i in 1:(r-1)) for(j in 2:c) for(k in (i+1):r) for(l in 1:(j-1)) {m1c[[(i-1)*(c-1)+j-1,(k-1)*c+l]]=1}
   for(i in 1:(r-1)) for(j in 2:c) for(k in (i+1):r) for(l in 1:(j-1)) {m1d[[(i-1)*(c-1)+j-1,(i-1)*c+j]]=1}
   m1 = rbind(m1a, m1b, m1c, m1d);
   m2 = diag((r-1)*(c-1));
   m2 = cbind(m2, m2);
   m2 = DirectSum(m2, m2);
   m3 = rbind(4 * rep(1,(r-1)*(c-1)));
   m3 = DirectSum(m3, m3);
   funlist = list("exp","log","identity");
   matlist = list(m3, m2, m1);

   prob <- spec.prob(r*c)
   matlist <- c(matlist, prob[[1]])
   funlist <- c(funlist, prob[[2]])
   coeff <- list(matlist, funlist)
   return(coeff)
}

#coeff=spec.concdiscprobs(2,3)
#gfunction(c(4,2,6,4,6,7),coeff)


spec.kendalltau <- function(rc){
   concdiscspec <- spec.concdiscprobs(rc)
   m1 = matrix(c(1,-1),nrow=1,ncol=2);
   matlist <- c(list(m1), concdiscspec[[1]])
   funlist <- c(list("identity"), concdiscspec[[2]])
   coeff <- list(matlist, funlist)
   return(coeff)
}

spec.gamma <- function(rc){

   m1 = rbind(c(1, 0), c(0, 1), c(1, 1));
   m2 = rbind(c(1, 0, -1), c(0, 1, -1));
   m3 = rbind(c(1, -1));

   funlist = list("exp","log","identity");
   matlist = list(m3, m2, m1);

   concdiscspec <- spec.concdiscprobs(rc)
   matlist <- c(matlist, concdiscspec[[1]])
   funlist <- c(funlist, concdiscspec[[2]])
   coeff <- list(matlist, funlist)
   return(coeff)
}

#coeff=spec.gamma(2,3)
#gfunction(c(4,2,6,4,6,7),coeff)

spec.loglinearpars = function(z){
   if(length(z)==2&&!is.numeric(z)) 
      {dim = z[[1]]; coding = z[[2]]}
   else 
      {dim = z; coding = "Effect"}
   var = c(1:length(dim));
   mat = betamat(var, var, dim, coding)
   list(list(mat),list("log"))
}

#spec.loglinearpars(list(c(2,2),"Dummy"))
#spec.loglinearpars(c(2,2))


spec.logOR <- function(z){
   list(list(rbind(c(1,-1,-1,1))),list(c("log")))
}


#unitvec(5,2) yields c(0,1,0,0,0)
unitvec <- function(n,k){ a=rep(0,n); a[k]=1; a }

spec.ordinal.loc.L <- function(z){

   nvar = z[[1]];
   ncat = z[[2]];
   
   spec0 = spec.condprob(list(c(1,2),c(1),c(nvar,ncat)))
   matlist0 = spec0[[1]];
   funlist0 = spec0[[2]];

   m1a = list(); k=0;
   for(a in 1:nvar) for(b in 1:nvar) for(i in 1:(ncat-1)) for(j in (i+1):(ncat)) 
      {k=k+1; m1a[[k]]=unitvec(nvar*ncat,(a-1)*ncat+i)+unitvec(nvar*ncat,(b-1)*ncat+j)}
   m1a = do.call(rbind,m1a);
   m1b = list(); k=0;
   for(a in 1:nvar) for(b in 1:nvar) for(j in 1:(ncat-1)) for(i in (j+1):(ncat)) 
      {k=k+1; m1b[[k]]=unitvec(nvar*ncat,(a-1)*ncat+i)+unitvec(nvar*ncat,(b-1)*ncat+j)}
   m1b = do.call(rbind,m1b);
   m1 = rbind(m1a,m1b);
   one = t(as.matrix(rep(1,ncat*(ncat-1)/2)));
   m2 = one; for(i in 1:(nvar^2-1)) m2 = DirectSum(m2,one)
   m2 = DirectSum(m2,m2)
   m3 =diag(dim(m2)[[1]]/2)
   m3 = cbind(m3,-m3)
   
   matlist1 = list(m3,m2,m1);
   funlist1 = list("log","exp","log");

   list(append(matlist1,matlist0),append(funlist1,funlist0))
}

spec.ordinal.loc.A <- function(z){ #z=c(nvar,ncat)
   spec0 = spec.ordinal.loc.L(z);
   spec0[[2]][[1]] = "identity";  
   spec0
}
#mat=spec.ordinal.loc.A(c(2,3))


spec.ordinal.disp.L <- function(z){

   nvar = z[[1]];
   ncat = z[[2]];
   if(ncat<3){print("Number of categories must be at least 3");stop()};
   
   spec0 = spec.condprob(list(c(1,2),c(1),c(nvar,ncat)))
   matlist0 = spec0[[1]];
   funlist0 = spec0[[2]];

   m1a = list(); z=0;
   for(a in 1:nvar) for(b in 1:nvar) for(i in 1:(ncat-2)) for(j in (i+1):(ncat-1))  for(k in (j):(ncat-1))  for(l in (k+1):(ncat)) 
      {z=z+1; m1a[[z]]=unitvec(nvar*ncat,(a-1)*ncat+i)+unitvec(nvar*ncat,(b-1)*ncat+j)+unitvec(nvar*ncat,(b-1)*ncat+k)+unitvec(nvar*ncat,(a-1)*ncat+l)}
   m1a = do.call(rbind,m1a);
   m1b = list(); z=0;
   for(a in 1:nvar) for(b in 1:nvar) for(i in 1:(ncat-2)) for(j in (i+1):(ncat-1))  for(k in (j):(ncat-1))  for(l in (k+1):(ncat)) 
      {z=z+1; m1b[[z]]=unitvec(nvar*ncat,(b-1)*ncat+i)+unitvec(nvar*ncat,(a-1)*ncat+j)+unitvec(nvar*ncat,(a-1)*ncat+k)+unitvec(nvar*ncat,(b-1)*ncat+l)}
   m1b = do.call(rbind,m1b);
   m1 = rbind(m1a,m1b);
   one = t(as.matrix(rep(1,(2*ncat-ncat^2-2*ncat^3+ncat^4)/24))); #length found with MMA
   m2 = one; for(i in 1:(nvar^2-1)) m2 = DirectSum(m2,one)
   m2 = DirectSum(m2,m2)
   m3 = diag(dim(m2)[[1]]/2)
   m3 = cbind(m3,-m3)
   
   matlist1 = list(m3,m2,m1);
   funlist1 = list("log","exp","log");

   list(append(matlist1,matlist0),append(funlist1,funlist0))

}
#spec.ordinal.disp.L(c(2,3))

spec.ordinal.disp.A <- function(z){ #z=c(nvar,ncat)
   spec0 = spec.ordinal.disp.L(z);
   spec0[[1]][[1]] = 4*spec0[[1]][[1]];  
   spec0[[2]][[1]] = "identity";  
   spec0
}
#spec.ordinal.disp.A(c(2,3))


MultiCoefficient <- function(coeff,k){
    if( k == 1 ) newcoeff <- coeff
    else{
        matlist <- coeff[[1]] 
        funlist <- coeff[[2]]
        q <- length(matlist)
        newmatlist <- list()
        for (i in 1:q) {
            newmatlist[[i]] <- matlist[[i]]
            for (j in 2:k) newmatlist[[i]] <- DirectSum(newmatlist[[i]],matlist[[i]])   } 
        newcoeff <- list( newmatlist, funlist) }
    return(newcoeff)
}


SpecifyCoefficient <- function(
      name,        # Name of the coefficient 
      arg = NULL,  # Coefficient specific arguments
      rep = 1,
      data = NULL  # Data set. If provided, matrices designed for MEL are provided.
      ){
    coeff <- switch( name,
        "Mean" = spec.mean(arg),
        "Variance" = spec.variance(arg),
        "StandardDeviation" = spec.variance(arg),
        "Entropy" = spec.entropy(arg),
        "DiversityIndex" = spec.diversityindex(arg),
        "GiniMeanDifference" = spec.ginimeandifference(arg),
        "Covariance" = spec.covariance(arg),
        "Correlation" = spec.correlation(arg),
        "CohenKappa" = spec.cohenkappa(arg), 
        "CronbachAlpha" = spec.cronbachalpha(arg, data),
        "Hij" = spec.hij(),
        "KendallTau" = spec.kendalltau(arg),
        "GoodmanKruskalGamma" = spec.gamma(arg),
        "DifferenceInProportions" = spec.diffprop(arg), #arg has the form (var,condvar,dim)
        "ConditionalProbabilities" = spec.condprob(arg),
        "Probabilities" = spec.prob(arg),
        "LogProbabilities" = spec.logprob(arg),
        "LoglinearParameters" = spec.loglinearpars(arg), #arg=dim
        "LogOddsRatio" = spec.logOR(arg),
        # NOT YET AVAILABLE "MokkenHij" = spec.mokkenhij(),
        # NOT YET AVAILABLE "AllMokkenHij" = spec.allmokkenhij(arg,data),
        # NOT YET AVAILABLE "MokkenHj" = spec.mokkenhj(arg),
        "AllMokkenHj" = spec.allmokkenhj(arg,data),
        "MokkenH" = spec.mokkenh(arg, data),
        "OrdinalLocation-A" = spec.ordinal.loc.A(arg),
        "OrdinalLocation-L" = spec.ordinal.loc.L(arg),
        "OrdinalDispersion-A" = spec.ordinal.disp.A(arg),
        "OrdinalDispersion-L" = spec.ordinal.disp.L(arg)
        )
    MultiCoefficient(coeff,rep)
}



#OLD
#JoinModels <- function(model1,model2){
#
#  model1 = tomodelform(model1);
#  model2 = tomodelform(model2);#
#
#  bt1 = model1[[1]][[1]]
#  matlist1 = model1[[1]][[2]][[1]]
#  funlist1 = model1[[1]][[2]][[2]]
#  at1 = model1[[1]][[3]]
#  x1 = model1[[2]]

#  bt2 = model2[[1]][[1]]
#  matlist2 = model2[[1]][[2]][[1]]
#  funlist2 = model2[[1]][[2]][[2]]
#  at2 = model2[[1]][[3]]
#  x2 = model2[[2]]#

#  bt = DirectSum(bt1,bt2)
#  matlist = list();
#  for(i in 1:length(matlist1)) matlist[[i]] = DirectSum(matlist1[[i]],matlist2[[i]])
#  funlist = if( length(funlist1)==length(funlist2) && funlist1[[1]]==funlist2[[1]] ) funlist1
#  at = rbind(at1,at2)
#  model = list(bt,list(matlist,funlist),at);
#  
#  return(model)
#
#}

#design matrix not yet implemented
JoinModels <- function(...){
   modellist = list(...);
   modellist = lapply(modellist,tomodelform);
   nmod = length(modellist);
   if(nmod==1) return(modellist);
   
   btlist = lapply(modellist,function(x){x[[1]]$bt});
   matlistlist = lapply(modellist,function(x){x[[1]]$coeff$matlist});
   funlistlist = lapply(modellist,function(x){x[[1]]$coeff$funlist});
   atlist = lapply(modellist,function(x){x[[1]]$at});
   
   bt = do.call("DirectSum",btlist);
   at = do.call("rbind",atlist);
   
   rr=list();
   for(i in 1:length(matlistlist[[1]])) rr[[i]] = lapply(matlistlist,function(x){x[[i]]})
   matlist = lapply(rr,function(x){do.call("DirectSum",x)})
   
   funlist = funlistlist[[1]];
   
   joinmod = list()
   joinmod$bt = bt
   joinmod$coeff$matlist = matlist
   joinmod$coeff$funlist = funlist
   joinmod$at = at
   
   return(joinmod)
}
#a=matrix(1:12,4,3);b=matrix(rep(2,6),3,2);m1=list(a,"log",b);JoinModels(m1,m1,m1)



###############################################################################################
###############################################################################################
###############################################################################################

# Aangepaste functie RecordToFrequencies om op basis van data (record) een vector met frequenties te maken (voor MEL en MAEL schattingen)
# augmentData is een hulpfunctie


augmentData <- function(Y, dim, augment = 2, eps = 0){
  N <- nrow(Y)                                 # Sample size
  J <- ncol(Y)                                 # Number of items
  Q <- choose(J, augment)                      # The number of a-tuples of items (i.e., items if a==1, pairs if a==2, triples if a==3, quadruples if a==4)
  # scores <- lapply(as.list(dim), function(x) 1:x)
  scores <- dim

  # MAKING THE AUGMENTED DATA
  # Ylist: list with Q elements containing the observed scores for each a-tuple of items
  # Ilist: list with Q elements containing all possible scores for each a-tuple of items
  # Addlist: list with Q elements containing the unobserved scores for each a-tuple of items

  A <- combn(1:J, augment)

  q <- NULL
  Ylist <- Ylistc <- Ilist <- Ilistc <- Addlist <- list()
  for (q in 1:Q){
    Ylist[[q]] <- Y[,A[,q]]
    if(!is.matrix(Ylist[[q]])) Ylist[[q]] <- matrix(Ylist[[q]],,1)
    Ylistc[[q]] <- apply(Ylist[[q]],1,paste, collapse="")
    Ilist[[q]] <- as.matrix(expand.grid(scores[A[,q]]))
    if(!is.matrix(Ilist[[q]])) Ilist[[q]] <- matrix(Ilist[[q]],,1)
    Ilistc[[q]] <- apply(Ilist[[q]],1,paste, collapse="")
    Addlist[[q]] <- Ilist[[q]][Ilistc[[q]] %in% Ylistc[[q]] == FALSE,]
    if(!is.matrix(Addlist[[q]])) Addlist[[q]] <- matrix(Addlist[[q]],1)
    if(length(Addlist[[q]])==0) Addlist[[q]] <- matrix(NA,0,augment)
  }

  # augY: augmented data
  # augY are the required scores from Addlist on each a-tuple of items, the remaining scores are NA
  augY <- NULL
  for (q in 1:Q){
    tmp <- matrix(,nrow(Addlist[[q]]),J)
    tmp[,A[,q]] <- Addlist[[q]]
    augY <- rbind(augY,tmp)
  }
  # Imputation of random values
  for (j in 1:J) augY[,j][is.na(augY[,j])] <- sample(scores[[j]],length(augY[,j][is.na(augY[,j])]),TRUE)

  # Remove duplicated response patterns
  if(nrow(augY) > 1){
    augY <- augY[!duplicated(apply(augY, 1, paste, collapse="")), ]
    if(!is.matrix(augY)) augY <- matrix(augY, 1)
  }

  # Create the corresponding vector of frequencies (zero by definition)
  augn <- matrix(eps, nrow(augY), 1)
  if(nrow(augY) == 1) dimnames(augn)[[1]] <- list(apply(augY, 1, paste, collapse = ""))
  if(nrow(augY) > 1) dimnames(augn)[[1]] <- apply(augY, 1, paste, collapse = "")
  return(list(augY, augn))
}

RecordsToFrequencies <- function(dat, var = varDefault, dim = dimDefault, augment = "all", seed = FALSE){ 
   if (!is.matrix(dat)) stop("Argument 'dat' must be of class 'matrix'.")
   varDefault <- 1:ncol(dat)
   if(is.numeric(seed)) set.seed(seed)
   Yobs <- dat[, var]
   nobs <- as.matrix(table(apply(Yobs, 1, paste, collapse="")))
   dim <- lapply(dim, function(x) 1:x)
   dimDefault <- apply(cbind(Yobs, NA), 2, function(x) sort(unique(x)))[-(ncol(Yobs) + 1)]
   nCells <- prod(sapply(dim, length))
   if (nCells > 1e06 & augment == "all") { 
      warning("Number of cells exceeds 1e06. Cannot compute frequencies for all cells. Instead '1k' augmentation is used.")
      augment <- "1k"
      }
   if (augment == "obs") n <- nobs
   if (augment == "all"){ 
      nlab <- sort(apply(expand.grid(dim), 1, paste, collapse = ""))
      n <- matrix(rep(0, length(nlab)), dimnames = list(nlab,""))
      n[(nlab %in% dimnames(nobs)[[1]]), ] <- nobs[,]
   }  
   if (augment == "1k" | augment == "2k"){ 
      k <- switch(augment, "1k"  = 2, "2k" = 4) 
      aug <- augmentData(Yobs, dim, k)   # Generate augmented data 
      Yaug  <- aug[[1]]                         
      naug  <- aug[[2]]
      n <- rbind(nobs, naug)
      n <- as.matrix(n[order(dimnames(n)[[1]]),])
   }
   return(n)
}

#dat=rbind(c(1,1,2),c(2,1,3))i
#RecordsToFrequencies(dat,c(2,2),SelectColumns=c(1,2))
#RecordsToFrequencies(dat,c(2,2,3))


# 4. Required functions
Gfunction <- function( m, coeff ){
# requires functions (1) phi, (2) dphi,
  matlist <- coeff$matlist     # read design matrices from the model
  funlist <- coeff$funlist     # read algebraic operations from the model
  q <- length(funlist)
  g <- list()
  g[["g.0"]] <- m
  for (i in 1:q) g[[i+1]] <- phi(matlist[[q+1-i]], g[[i]], funlist[[q+1-i]])
  Gfun <- list()
  Gfun[["Gfun.0"]] <- diag(length(m))
  for (i in 1:q) Gfun[[i+1]] <- dphi(matlist[[q+1-i]], g[[i]], Gfun[[i]], funlist[[q+1-i]])
  return( Gfun[[q+1]] )
}

gfunction <- function(m,coeff){
# requires functions (1) phi
    matlist <- coeff$matlist     # read design matrices from the model
    funlist <- coeff$funlist     # read algebraic operations from the model
    q <- length(funlist)
    g <- list()
    g[["g.0"]] <- m
    for (i in 1:q) {g[[i+1]] <- phi(matlist[[q+1-i]], g[[i]], funlist[[q+1-i]])}
    return( g[[q+1]] )
}

phi <- function(A,f, action){
# Numerical values are translations h(A %*% f) = A %*% f -
    eps=1E-80;
    switch(action,
    "identity" = A %*% f,
    "exp"      = A %*% exp(f),
    "log"      = A %*% log(abs(f)+eps),
    "square"   = A %*% f^2,
    "sqrt"     = A %*% sqrt(f),
    "xlogx"    = A %*% (-f*log(f+eps)),
    "xbarx"    = A %*% (f*(1-f)), # x(1-x)
    "xoverx"   = A %*% (f /(1-f))  # x/(1-x) 
    )
}

dphi <- function(A,f,df, action){
  eps=1E-80;
  switch(action,
  "identity" = A %*% df,
  "exp"      = A %*% (as.numeric(exp(f)) * df),
  "log"      = A %*% (as.numeric(1/(f+eps)) * df),
  "square"   = A %*% (as.numeric(2*f) * df),
  "sqrt"     = A %*% (as.numeric(1/(2*sqrt(f))) * df),
  "xlogx"    = A %*% (as.numeric(-1-log(f+eps)) * df),
  "xbarx"    = A %*% (as.numeric(1-2*f) * df),  # x(1-x)
  "xoverx"   = A %*% (as.numeric(1/(1-f)^2) * df)     # x/(1-x) 
    )
}

pds <- function(n,m,lambda=1){
# Power divergence statistics (Cressie + Read)
   if(length(m)!=length(n))stop("m and n have unequal lengths")
   n[n < 1e-100] <- 1e-100
   m[m < 1e-100] <- 1e-100
   if(lambda==0) return(2*sum(n*log(n/m)))
   if(lambda==-1) return(2*sum(m*log(m/n)))
   return(2/(lambda*(lambda+1))*sum(n*(((n/m)^lambda) - 1)))
}



###############################################################################################
###############################################################################################
###############################################################################################
# Printing statistics


printalgorithmdetails = function(fit){
   estmethod=switch(fit$Method,
      "ML"="Maximum Likelihood",
      "MDI"="Minimum Discrimination Information",
      "GSK"="GSK");
   cat(fit$Title,"\n")
   cat("\n")
   cat("Estimation method     = ",estmethod,"\n")
   cat("Time taken            = ",fit$TimeTaken," seconds","\n")
   cat("Number of iterations  = ",fit$NumberOfIterations,"\n") 
   cat("Convergence criterion = ",fit$ConvergenceCriterion,"\n")
}

printbasicstatistics = function(stats,showeig,satmod){
   if(satmod){
       cat("Sample size = ",stats$SampleSize,"\n")
       cat("Eigenvalues sample covariance matrix","\n")
       cat("\t",stats$Eigenvalues,"\n")
      } else
   if (stats$Method=="ML") { 
          cat("Loglikelihood ratio (G^2) = ",stats$LogLikelihoodRatio,"\n");
          cat("Chi-square (X^2)          = ",stats$ChiSquare,"\n")} else if 
      (stats$Method=="MDI"){ 
          cat("Discrimination information = ",stats$DiscriminationInformation,"\n")
          cat("Loglikelihood ratio (G^2)  = ",stats$LogLikelihoodRatio,"\n")} else if
      (stats$Method=="GSK"){ 
          cat("Wald statistic             = ",stats$WaldStatistic,"\n")}
   if(!satmod){
      cat("Degrees of freedom         = ",stats$DegreesOfFreedom,"\n")
      cat("p-value     = ",stats$PValue,"\n")
      cat("Sample size = ",stats$SampleSize,"\n")
      cat("BIC         = ",stats$BIC,"\n")
      if(showeig) {cat("\n"); cat("Eigenvalues of inverse covariance matrix of Lagrange multipliers: ", "\n", stats$Eigenvalues, "\n")}
   }
}


printadvancedstatistics = function( stats, satmod, ShowCorrelations ){

    obsval   = stats$ObservedCoefficients 
    fitval   = stats$FittedCoefficients 
    se       = stats$CoefficientStandardErrors
    zscores  = stats$CoefficientZScores
    adjres   = stats$CoefficientAdjustedResiduals
    cormat   = stats$CoefficientCorrelationMatrix
    coeffdim = stats$CoefficientDimensions
    varlabels = stats$CoefficientTableVariableLabels
    catlabels = stats$CoefficientTableCategoryLabels

    coeffdim  = rev(coeffdim);
    catlabels = rev(catlabels);

    nvar=length(coeffdim)
    if(nvar==1) {  
       if(!satmod) {
          cat("   Observed values             = ",obsval,sep="\t","\n")
          cat("   Fitted values               = ",fitval,sep="\t","\n");
          cat("   Ese of fitted values        = ",se,sep="\t","\n");
          cat("   Fitted values / std errors  = ",zscores,sep="\t","\n");
          cat("   Adjusted residuals          = ",adjres,sep="\t","\n") }
       else{
          cat("   Observed values              = ",obsval,sep="\t","\n")
          cat("   Ese of observed values       = ",se,sep="\t","\n");
          cat("   Observed values / std errors = ",zscores,sep="\t","\n")} }
    else {    
       if(!satmod) {
          cat("   Observed values: ","\n");  print(array(obsval,coeffdim,dimnames=catlabels))
          cat("   Fitted values: ","\n");  print(array(fitval,coeffdim,dimnames=catlabels))
          cat("   Ese of fitted values: ","\n");  print(array(se,coeffdim,dimnames=catlabels))
          cat("   Fitted values / std errors: ","\n");  print(array(zscores,coeffdim,dimnames=catlabels))
          cat("   Adjusted residuals: ","\n");  print(array(adjres,coeffdim,dimnames=catlabels))  }
       else{
          cat("   Observed values: ","\n");  print(array(obsval,coeffdim,dimnames=catlabels))
          cat("   Ese of observed values: ","\n");  print(array(se,coeffdim,dimnames=catlabels))
          cat("   Observed values / std errors: ","\n");  print(array(zscores,coeffdim,dimnames=catlabels)) }
    }

    if(ShowCorrelations && length(se)>1) {
       cat("Correlation matrix:","\n")
       cat(cormat,"\n","\n") }
}


tocatlabels = function(lab,dim){
   nvar = length(dim);
   catlabels=vector("list",nvar)  #labels for categories
   for(i in 1:nvar){ 
       for(j in 1:dim[[i]]) {catlabels[[i]][[j]]=
          if(is.list(lab[[i]])) paste(lab[[i]][[1]],"=",lab[[i]][[2]][[j]],sep="")
          else paste(lab[[i]],"=",j,sep="")}}
   catlabels
   }

#tocatlabels(c(1,2),c(2,2))
#tocatlabels(list(list("G",c("men","women")),"T"),c(2,3))

printThetaAndBeta = function(stats, showcoeffs, showpars, ShowCorrelations, ParameterCoding, satmod, modeltype="Manifest"){

   obsval = stats$ObservedCoefficients 
   fitval = stats$FittedCoefficients 
   coeffdim = stats$CoefficientDimensions
   varlabels = stats$CoefficientTableVariableLabels
   catlabels = stats$CoefficientTableCategoryLabels
   nvar=length(coeffdim)

   if(showcoeffs){
      cat("Statistics for the coefficients: ", "\n")
      cat("Variables = ",varlabels," (dim = ",coeffdim,")","\n",sep="")
      printadvancedstatistics(stats, satmod, ShowCorrelations)
   }

   printBeta = function(beta){
      efflab = beta$CoefficientTableVariableLabels
      effdim = beta$CoefficientDimensions
      cat("Effect = ");cat(as.character(efflab,sep=","),sep=",");cat(" (dim = ");cat(effdim,sep=",");cat(")","\n",sep="")
      printadvancedstatistics(beta, satmod, ShowCorrelations)  
   }

   if(showpars){
      cat("\n")
      cat("Statistics for the parameters: ", "\n")
      subvar = allsubsets(c(1:nvar));
      for(i in 1:length(subvar)) printBeta(stats$Parameters[[i]])
   }

}




###############################################################################################
###############################################################################################
###############################################################################################
# Functions relating to model specification


#puts marginal model into standard form (bt,theta,at,d)
tomargmodform = function(margmodel){

   #case 0: margmodel=None
   if( isTRUE(margmodel == "None") ) return(margmodel)

   #find out if d is given in model, if so, drop it from margmodel
   if({lm =length(margmodel);is.vector(margmodel[[lm]])&&is.numeric(margmodel[[lm]])&&!is.matrix(margmodel)}){
         d = margmodel[[lm]];
         margmodel = margmodel[-lm]
         }
   else d = "None";

   mm = list()  #marginal model in standard form

   #case 1: single matrix is given
   if( data.class(margmodel)=="matrix" ){
      mm$at = margmodel;
      id = diag(nrow(mm$at));
      mm$bt = diag(nrow(mm$at));
      mm$coeff$matlist = list(id)
      mm$coeff$funlist = list("identity") }
   else if(length(margmodel) > 4) {print("Error in model specification: length of model should be at most four, in the form list(bt,coeff,at,d) representing the equation bt.coeff(at.pi)=d")}

#   if(length(margmodel)==4) {print("Error in model specification: fourth element should be a numeric constant vector d from the formula bt.coeff(at.pi)=d ")}
   
   #case 2: form is {bt,Log,at}
   else if(length(margmodel) == 3 && data.class(margmodel[[2]])=="character"){
      mm$bt = margmodel[[1]];
      mm$at = margmodel[[3]];
      if(ncol(mm$bt)!=nrow(mm$at)) {cat("Incompatible matrix dimensions in model specification",dim(mm$bt),"and",dim(mm$at),"\n")}
      mm$coeff$matlist = list(diag(ncol(mm$bt)))
      mm$coeff$funlist = list(margmodel[[2]])
   }
   
   #case 3: form is standard: {bt,coeff,at}, with coeff={matlist,funlist}
   else if(length(margmodel) == 3 && !data.class(margmodel[[2]])=="character" ) {
      mm$bt = margmodel[[1]];
      mm$coeff$matlist = margmodel[[2]][[1]];
      mm$coeff$funlist = margmodel[[2]][[2]];
      mm$at = margmodel[[3]];
   }
   
   #hereafter: length[margmodel]=2 ################################
   
   #form is eg {Log,at}
   else if(data.class(margmodel[[1]])=="character" && data.class(margmodel[[2]])=="matrix"){
      id=diag(nrow(margmodel[[2]]));
      mm$bt = id;
      mm$coeff$matlist = list(id);
      mm$coeff$funlist = list(margmodel[[1]]);
      mm$at = margmodel[[2]];
   }

   #form is {bt,Log}
   else if(data.class(margmodel[[2]])=="character" && data.class(margmodel[[1]])=="matrix"){
      id=diag(ncol(margmodel[[1]]));
      mm$bt = margmodel[[1]];
      mm$coeff$matlist = list(id);
      mm$coeff$funlist = list(margmodel[[2]]);
      mm$at = id;
   }
    
   
   #form is {bt,coeff}
   else if( data.class(margmodel[[1]])=="matrix" && length(margmodel[[2]]) == 2 ) list( margmodel[[1]], margmodel[[2]], "None" )
   
   #form is {coeff,None}
   else if( isTRUE(margmodel[[2]] == "None") && length(margmodel[[1]]) == 2 ){
      mm$coeff$matlist = margmodel[[1]][[1]];
      mm$coeff$funlist = margmodel[[1]][[2]];
      df = nrow(mm$coeff$matlist[[1]]);
      mm$bt = diag(df);
      mm$at = "None"
   } 
   
   #form is {coeff,at}
   else if( data.class(margmodel[[2]])=="matrix" && length(margmodel[[1]]) == 2 ){
      mm$coeff$matlist = margmodel[[1]][[1]];
      mm$coeff$funlist = margmodel[[1]][[2]];
      mm$at = margmodel[[2]];
      df = nrow((mm$coeff$matlist[[1]]));
      mm$bt = diag(df);
   }
   
   #form is coeff with coeff={matlist,funlist}
   else if( length(margmodel) == 2 && data.class(margmodel[[1]][[1]])=="matrix" && data.class(margmodel[[2]])=="list" ){
      mm$coeff$matlist = margmodel[[1]];
      mm$coeff$funlist = margmodel[[2]];
      k=length(mm$coeff$matlist);k=length(mm$coeff$matlist[[k]][1,])
      mm$at = diag(k)
      df = nrow((mm$coeff$matlist[[1]]));
      mm$bt = diag(df);
   }
   
   else{ cat("Cannot recognize model specification", margmodel)}

   #now homogenize margmodel specification
   k = ncol(mm$coeff$matlist[[length(mm$coeff$matlist)]]);
   prob <- spec.prob(k);
   mm$coeff$matlist <- c(mm$coeff$matlist, prob[[1]]);
   mm$coeff$funlist <- c(mm$coeff$funlist, prob[[2]]);

   #add a constant vector d
   d = if( isTRUE(d=="None") ) rep(0,nrow(mm$bt)) else d;
   mm$d=d

   #check if matrix dimensions are correct
   dim0 = list(c(0,length(d)));
   dimfirst = list(dim(mm$bt));
   dimlast  = list(dim(mm$at));
   dimlist  = lapply(mm$coeff$matlist,dim);
   dimlist  = append(dimlist,dimfirst,after=0);
   dimlist  = append(dimlist,dim0,after=0);
   dimlist  = if(length(dimlast[[1]])>0) append(dimlist,dimlast) else dimlist
   for(i in 2:length(dimlist)){if(dimlist[[i-1]][[2]]!=dimlist[[i]][[1]]){print("Incompatible matrix dimensions. Dimensions are: ");print(dimlist);stop()}}
   
   return(mm)
}

#mod=list(matrix(1:6,2,3),"log",matrix(1:12,3,4),c(1,2))
#tomargmodform2(mod)

#margmodel="None"
#tomargmodform2(margmodel)

#mod=rbind(c(1,2))
#tomargmodform2(mod)

#margmodel=list( rbind(c(1,2)), "log", rbind(c(1,2)) )
#tomodelform2(margmodel)



#puts model into standard form: (margmodel,x) where x denotes design matrix for loglinear model for full table
tomodelform = function(model){

   desmatQ = function(mat){ if( !(data.class(mat)=="matrix") ) FALSE else if(nrow(mat)>ncol(mat)) TRUE else FALSE }
  
   standardformmodel =
  
      if(desmatQ(model)) list("None",model)                                                    #form is model=x
  
      else if( isTRUE(model[[1]]=="None") && desmatQ(model[[2]]) ) model                       #form is model={None,x}
     
      else if( length(model)==2 && desmatQ(model[[2]]) && data.class(model[[1]])=="character" ){   #form is eg {Log,x}: only used for coefficients, no df for margmod
         x = model[[2]]
         id = diag(nrow(x))
         list( tomargmodform(list(id,list(list(id),list(model[[1]])), "None")), x )}
        
      else if( length(model) == 2 && (desmatQ(model[[2]]) || isTRUE(model[[2]] == "None") ) ){ #form is {margmodel,x} or {margmodel,None}
         margmod = tomargmodform(model[[1]])
         x = model[[2]]
         list(margmod, x)}
        
      else list(tomargmodform(model), "None")                                                  #form is "margmod", no design matrix
      
   return(standardformmodel)
}

#x=rbind(c(1,2))
#tomodelform(x)

modeldf = function(model,n){
   tblsize = tablesize(model);
   latdim = tblsize / length(n)
   margmod = model[[1]];
   x = model[[2]];
   df1 = if (isTRUE(margmod == "None")) 0 else nrow(margmod$bt)
   df2 = if (isTRUE(x == "None")) 0 else max(0, tblsize/latdim - ncol(x));
   c(df1,df2)
   }

tablesize = function(model) {
   if(length(model)==2){
      margmod = model[[1]];   x = model[[2]];
      if      (!isTRUE(x == "None")) nrow(x)
      else if (!isTRUE(margmod$at == "None")) ncol(margmod$at)
      else    {k=length(margmod$coeff$matlist); ncol(margmod$coeff$matlist[[k]])}  }
   else  #model is of the form margmod
      if(isTRUE(model$at=="None")){k=length(model$coeff$matlist); ncol(model$coeff$matlist[[k]])}
      else ncol(model$at)
}

#mod=list(matrix(1:2,2,2),"log",matrix(1:6,2,3))
#mod=list(matrix(1:2,2,2),"log","None")
#mod2=tomargmodform(mod)
#tablesize(mod2)



###############################################################################################
###############################################################################################
###############################################################################################
# Fitting procedures

margmodfit = function(n, model, MaxSteps=1000, MaxError=1e-25, StartingPoint="Automatic", ShowProgress=TRUE, MaxStepSize=1,Method="ML"){

    eps= 1.e-80;
    starttime = proc.time()[3]

    margmod = model[[1]]
    x     <- model[[2]]
    bt    <- margmod$bt
    coeff <- margmod$coeff
    at    <- margmod$at
    d     <- margmod$d
        
    step <- 1             # Initial step size (no modification yet)
    k <- 0                # index for iterations k = 1, 2, ... , k = 1 is initial iterration
    w <- list()
    H <- matrix()

    m  <- if(isTRUE(StartingPoint=="Automatic") && !is.matrix(x) ){ n + 1e-3 } 
          else if(is.matrix(x)) rep(sum(n)/length(n),length(n))
          else StartingPoint        # initial estimates for expected frequencies
    m  <- as.numeric(m)
    logm <- log(m)                      # logarithm of initial expected frequencies
    atm  <- if(data.class(at)!="matrix") m else at %*% m
    g  <- matrix(gfunction(atm,coeff))  # initial values of g in a C x 1 vector
    Gt <- if(data.class(at)!="matrix") Gfunction(atm,coeff) else Gfunction(atm,coeff) %*% at   # initial values of G in an L x C matrix
    h  <- bt %*% g - d
    Ht <- bt %*% Gt
    H  <- t(Ht)
    df <- nrow(bt)                      # number of constraints
    mu <- matrix(0,df,1)                # initial estimates of Lagrange multipliers in a C x 1 vector
    olderror <- 100000                  # initial values of error

    if(!isTRUE(!ShowProgress)) cat("Iteration      ","step size      ","G^2             ","Error","\n")
    olderror <- 1.e100;

    # Iteration = 1, ..., k
    repeat{
        k <- k + 1
        atm <- if(data.class(at)!="matrix") m else at %*% m
        g <- gfunction(atm,coeff)
        Gt <- if(data.class(at)!="matrix") Gfunction(atm,coeff) else Gfunction(atm,coeff) %*% at
        h <- bt %*% g - d
        Ht <- (bt %*% Gt )
        H <- t(Ht)
        HDH <- Ht %*% ( m * H )
        ndivm <- (m+eps)/(n+eps)
        dl = switch(Method,
            "ML"  = n - m,        #derivative of multinomial likelihood wrt log(m)
            "MDI" = -m*log(ndivm))#derivative of MDI criterion wrt log(m)
        dl2 = switch(Method,
            "ML"  = ndivm - 1,   #derivative of multinomial likelihood wrt m
            "MDI" = -log(ndivm)) #derivative of MDI criterion wrt m
        lambda <- - solve( HDH, Ht %*% dl + h )
        loglinincr = if(isTRUE(x=="None")) 0 else x %*% (solve(t(x) %*% (m * x)) %*% (t(x) %*% dl)) - dl2
        incr <- dl/m + H %*% lambda + loglinincr  #cannot replace dl/m by dl2 for some numerical reason
        newerror <-  (t(incr) %*% (m * incr))[1]
        errorratio = if(newerror==0.) 1 else sqrt(olderror/newerror);
        newstep = if(errorratio<MaxStepSize && errorratio>.01*MaxStepSize) errorratio/2 else MaxStepSize; 
        olderror = newerror;
        logm <- logm + newstep*incr
        m <- exp(logm)
        m <- as.numeric(m)
        m[m < 1e-80] <- 1e-80
        if( isTRUE(ShowProgress) || (data.class(ShowProgress)=="numeric" && isTRUE(k%%ShowProgress==0)) ){ cat(k,"             ",newstep,"             ",pds(n,m,0),"         ",newerror,"\n") }
        if ( k >= MaxSteps || abs(newerror) < MaxError ) break
    }

    fit=list();   fit$FittedFrequencies=m;  fit$ConvergenceCriterion=newerror;    fit$NumberOfIterations=k;    fit$TimeTaken <- proc.time()[3] - starttime
    return(fit) 
}


margmodfitEM <- function( n, latdim, model, maxoutersteps=1000, StartingPoint="Automatic", MaxError=1e-20, MaxInnerSteps=2, ShowProgress=TRUE, MaxStepSize=1){

    starttime = proc.time()[3]
    eps <- 1e-80
    k <- 0
    collapse = function(q){ apply( matrix( q, nrow = latdim ), 2, sum ) }
    expand = function(q){ rep(q, each=latdim ) }
    v <- c(1:(length(n)*latdim))
    mhat = expand(n/latdim)
    mhat = mhat + sum(n)/100/length(mhat)  #startingpoint

    cat("Iteration      ","G^2             ","Error","\n")

    repeat{ k <- k + 1
        nhat <- mhat * expand( (n+eps) / (collapse(mhat)+eps) ) # E-step
        newmhat <- margmodfit( nhat, model, MaxSteps=MaxInnerSteps, StartingPoint=mhat, ShowProgress=FALSE, MaxStepSize=MaxStepSize )$FittedFrequencies  # M-step
        error <- sum( abs( mhat - newmhat ) )
        mhat <- newmhat
        if( isTRUE(ShowProgress) || (data.class(ShowProgress)=="numeric" && isTRUE(k%%ShowProgress==0)) ){ 
             cat("  ",k,"             ",pds(n,collapse(newmhat),0),"         ",error,"\n") }
        if ( k >= maxoutersteps || abs(error) < MaxError ) break
    }

    fit=list();   fit$FittedFrequencies=mhat;  fit$ConvergenceCriterion=error;    fit$NumberOfIterations=k;    fit$TimeTaken <- proc.time()[3] - starttime
    return(fit) 
}


gsk <- function( n, model, CoefficientDimensions="Automatic", Labels="Automatic" ){

    model = tomodelform(model)
    margmod = model[[1]]
    x     <- model[[2]]
    bt    <- margmod$bt
    coeff <- margmod$coeff
    at    <- margmod$at
    d     <- margmod$d

    atn <- if(data.class(at)!="matrix") n else at %*% n

    h  <- bt %*% gfunction(atn,coeff) - d
    Gt <- if(data.class(at)!="matrix") Gfunction(atn,coeff) else Gfunction(atn,coeff) %*% at
    Ht <- (bt %*% Gt )
    H <- t(Ht)

    GDG <- Gt %*% (n * t(Gt)) 
    GDH <- Gt %*% (n * H)
    HDH <- Ht %*% (n * H)
    
    covresid <- GDH %*% solve(HDH) %*% t(GDH) 
    covtheta  <- GDG - covresid
    obsval <- gfunction(atn,coeff)
    fitval <- covtheta %*% solve( GDG, obsval )
    adjres = (obsval - fitval)/sqrt(diag(covtheta))

    stats = list()
    stats$Method = "GSK"
    stats$LogLikelihoodRatio = 0
    stats$DegreesOfFreedom <- length(h)
    stats$WaldStatistic <- h %*% solve( HDH, h )
    stats$SampleSize <- sum(n)
    stats$BIC  <- stats$WaldStatistic - stats$DegreesOfFreedom * log( sum(n) )
    stats$PValue <- signif(1-pchisq(stats$WaldStatistic,stats$DegreesOfFreedom),5)
    stats$Eigenvalues = eigen(HDH, only.values = TRUE)$values
    stats = c(stats,coefficientstats(obsval,fitval,covtheta,covresid,CoefficientDimensions,Labels,FALSE))

    return(stats)
}
#  model <- list( bt, coeff2, at, c(0.1) )
#  m <- gsk(n,model)



#parameters
getBetas = function(efflab,effcatlab,effdim,obsval1,fitval1,covtheta1,covresid1,coeffdim,varlabels,satmod,noresid,ParameterCoding="Effect"){
       effmat = betamat(varlabels,efflab,coeffdim,ParameterCoding);
       obsval   = effmat %*% obsval1;
       fitval   = effmat %*% fitval1
       covtheta = effmat %*% covtheta1 %*% t(effmat)
       covresid = if(satmod) 0 else effmat %*% covresid1 %*% t(effmat)

       var <- diag(covtheta)
       fitval[abs(fitval) < 1e-20] <- 0
       var[var <= 0] <- 1e-80
       se <- sqrt(var)
       zscores <- fitval / se
       if(noresid==1||isTRUE(covresid==list(0))) adjres=NULL else {
          vr <- diag(covresid);
          vr[vr <= 0] <- 1e-80
          adjres <- if(is.null(obsval)||is.null(fitval)||isTRUE(vr==0)) 0 else (obsval - fitval)/sqrt(vr)
          adjres[abs(adjres) < 1e-39] <- 0  }
       zscores[abs(zscores) < 1e-39] <- 0
       cormat = if(length(se)==1) 1 else diag(1/se) %*% covtheta %*% diag(1/se)

       beta = list()
       beta$ObservedCoefficients = obsval
       beta$FittedCoefficients = fitval
       beta$CoefficientStandardErrors = se
       beta$CoefficientZScores = zscores
       beta$CoefficientAdjustedResiduals = adjres
       beta$CoefficientCovarianceMatrix = covtheta
       beta$CoefficientCorrelationMatrix = cormat
       beta$CoefficientAdjustedResidualsCovarianceMatrix = covresid
       beta$CoefficientDimensions = effdim
       beta$CoefficientTableVariableLabels = efflab
       beta$CoefficientTableCategoryLabels = effcatlab

       return(beta)
}


coefficientstats = function(obsval, fitval, covtheta, covresid, coeffdim, varlabels,satmod,eig="Automatic",ParameterCoding="Effect"){
    noresid = prod(obsval==fitval)

    var <- diag(covtheta)
    fitval[abs(fitval) < 1e-20] <- 0
    var[var <= 0] <- 1e-80
    se <- sqrt(var)
    zscores <- fitval / se
    if(noresid==1) adjres=NULL else {
       vr <- diag(covresid);
       vr[vr <= 0] <- 1e-80
       adjres <- if(is.null(obsval)||is.null(fitval)||isTRUE(vr==0)) 0 else (obsval - fitval)/sqrt(vr)
       adjres[abs(adjres) < 1e-39] <- 0  }
    zscores[abs(zscores) < 1e-39] <- 0
    cormat = if(length(se)==1) 1 else diag(1/se) %*% covtheta %*% diag(1/se)

    coeffdim = if(isTRUE(coeffdim=="Automatic")) c(length(obsval)) else coeffdim;
    nvar=length(coeffdim)
    varlabels = if(isTRUE(varlabels=="Automatic")) apply(rbind(c(1:nvar)),1,function(x){paste("Var",x,sep="")}) else varlabels;
    catlabels=tocatlabels(varlabels,coeffdim);
    if(prod(coeffdim)!=length(obsval)){print("Error: CoefficientDimensions incorrect, the product of the dimensions should equal the number of coefficients"); stop()}
    if(length(coeffdim)!=length(varlabels)){print("Error: CoefficientDimensions has different length than Labels"); stop()}

    varlabels1 = list();   #varlabels1 contains only variable labels, not category labels
    for(i in 1:nvar){ varlabels1[[i]] = if(is.list(varlabels[[i]])) varlabels[[i]][[1]] else varlabels[[i]] }
    varlabels1 = as.character(varlabels1)

    nvar = length(varlabels)
    subvar = allsubsets(c(1:nvar));
    beta = list()
    for(i in 1:length(subvar)) beta[[i]] = getBetas(varlabels1[subvar[[i]]],catlabels[subvar[[i]]],coeffdim[subvar[[i]]],obsval,fitval,covtheta,covresid,coeffdim,varlabels1,satmod,noresid,ParameterCoding=ParameterCoding)

    stats = list()
    stats$ObservedCoefficients = obsval
    stats$FittedCoefficients = fitval
    stats$CoefficientStandardErrors = se
    stats$CoefficientZScores = zscores
    stats$CoefficientAdjustedResiduals = adjres
    stats$CoefficientCovarianceMatrix = covtheta
    stats$CoefficientCorrelationMatrix = cormat
    stats$CoefficientAdjustedResidualsCovarianceMatrix = covresid
    stats$CoefficientDimensions = coeffdim
    stats$CoefficientTableVariableLabels = varlabels1
    stats$CoefficientTableCategoryLabels = catlabels
    stats$Parameters = beta

    class(stats) = "CMM"
    return(stats)
}


"summary.CMM" = function(object,...,ShowCoefficients=TRUE,ShowParameters=FALSE,ShowCorrelations=FALSE,ParameterCoding="Effect",Title=""){
    if(!is.null(object$Title)) print(object$Title)
    satmod = if(object$LogLikelihoodRatio==0) TRUE else FALSE
    printbasicstatistics(object,showeig=TRUE,satmod)
    cat("\n")
    printThetaAndBeta( object, ShowCoefficients, ShowParameters, ShowCorrelations, ParameterCoding, satmod )
}


getsamplestats = function(dat,coeff,CoefficientDimensions,ParameterCoding,Labels){
    eps <- 1e-80
    n   = if(data.class(dat)=="numeric") dat else c(t(ftable(dat)));
    coeff = tomargmodform(coeff); #yields list(bt,coeff,at,d)

    atn <- if(data.class(coeff$at)!="matrix") n else coeff$at %*% n
    Gt <- if(data.class(coeff$at)!="matrix") Gfunction( atn, coeff$coeff ) else  Gfunction( atn, coeff$coeff ) %*% coeff$at 
    GDG <- Gt %*% (n * t(Gt))

    stats = list()
    stats$Eigenvalues = eigen(GDG, only.values = TRUE)$values;
    stats$SampleSize <- sum(n)
    stats$LogLikelihoodRatio = 0

    covresid <- NULL
    covtheta <- GDG
    obsval = gfunction( atn + eps, coeff$coeff )
#   cat("\n")
    stats = c(stats,coefficientstats( obsval, obsval, covtheta, covresid, CoefficientDimensions, Labels, satmod=TRUE, ParameterCoding=ParameterCoding ))
}

SampleStatistics = function(dat, 
                            coeff, 
                            CoefficientDimensions="Automatic", 
                            Labels="Automatic", 
                            ShowCoefficients=TRUE, 
                            ParameterCoding="Effect", 
                            ShowParameters=FALSE, 
                            ShowCorrelations=FALSE,  
                            Title="", 
                            ShowSummary = TRUE
                   ){
    stats = getsamplestats(dat,coeff,CoefficientDimensions,ParameterCoding,Labels)
    class(stats) = "CMM"
    if(ShowSummary) summary(stats, ShowParameters=ShowParameters, ShowCorrelations=ShowCorrelations, ParameterCoding=ParameterCoding)
    return(stats)
}


getbasicstatistics = function(n,manmhat,df,eig){
   stats = list()
   stats$SampleSize = sum(n);
   stats$DegreesOfFreedom = df[1]+df[2];
   stats$LogLikelihoodRatio = pds(n,manmhat,0)
   stats$ChiSquare = pds(n,manmhat,1)
   stats$DiscriminationInformation = pds(n,manmhat,-1)
   stats$BIC = stats$LogLikelihoodRatio - stats$DegreesOfFreedom * log(sum(n))
   stats$PValue = signif(1-pchisq(stats$LogLikelihoodRatio,stats$DegreesOfFreedom),5)
   stats$Eigenvalues = eig
   stats$ManifestFittedFrequencies = manmhat
   return(stats)
}

getmodelstats = function(dat, mhat, model, coeff, CoefficientDimensions, Labels, Method, satmod=FALSE, ParameterCoding="Effect"){
    eps = 1e-80
    stats = list()
    stats$Method = Method
    mm=coeff

    n   = if(data.class(dat)=="numeric") dat else c(t(ftable(dat)));

    margmod = model[[1]]

    latdim = length(mhat) / length(n);
    collapse = function(q){ apply( matrix( q, nrow = latdim ), 2, sum ) }
    expand = function(q){ rep(q, each=latdim ) }
    nhat <- if(latdim==1) n else mhat * expand( (n+eps) / (collapse(mhat)+eps) ) 
    manmhat = if(latdim == 1) mhat else collapse(mhat);

    Gfun = if(data.class(mm$at)!="matrix") Gfunction( mhat, mm$coeff ) else Gfunction( mm$at %*% mhat, mm$coeff ) %*% mm$at
    Gt   = if(is.null(mm$bt)) Gfun else mm$bt %*% Gfun
    Ht <- if(data.class(margmod$at)!="matrix")  margmod$bt  %*% Gfunction( mhat, margmod$coeff )  else margmod$bt  %*% Gfunction( margmod$at  %*% mhat, margmod$coeff )  %*% margmod$at
    obsval <- if(data.class(mm$at)!="matrix") gfunction( nhat, mm$coeff) else gfunction( mm$at %*% nhat, mm$coeff)
    fitval <- if(data.class(mm$at)!="matrix") gfunction( mhat, mm$coeff) else gfunction( mm$at %*% mhat, mm$coeff)

    GDG <- Gt %*% (mhat * t(Gt)) 
    GDH <- Gt %*% (mhat * t(Ht))
    HDH <- Ht %*% (mhat * t(Ht))
    covresid <- GDH %*% solve(HDH) %*% t(GDH) 
    covtheta <- GDG - covresid
    eig = eigen(HDH, only.values = TRUE)$values;
    df = modeldf(model,n)

    stats = c(stats,getbasicstatistics(n,manmhat,df,eig))
    stats = c(stats,coefficientstats( obsval, fitval, covtheta, covresid, CoefficientDimensions, Labels, satmod, ParameterCoding=ParameterCoding ))
    return(stats)
}

ModelStatistics <- function(dat, fitfreq, model, coeff, CoefficientDimensions="Automatic",
    Labels="Automatic",Method="ML",ShowCoefficients=TRUE,ShowParameters=FALSE, ParameterCoding="Effect", ShowCorrelations=FALSE, Title="" ){
    
    stats = if(isTRUE(model=="SaturatedModel")) 
                    getsamplestats(dat,coeff,CoefficientDimensions,ParameterCoding,Labels)
            else {
                    model = tomodelform(model); 
                    coeff = tomargmodform(coeff); #yields list(bt,coeff,at,d)
                    getmodelstats(dat,fitfreq,model,coeff,CoefficientDimensions,Labels,Method)}
    class(stats) = "CMM"
    summary(stats,ShowCoefficients=ShowCoefficients,ShowParameters=ShowParameters, ParameterCoding=ParameterCoding, ShowCorrelations=ShowCorrelations)
    return(stats)
}


MarginalModelFit = function(dat, model, 
    ShowSummary=TRUE,
    MaxSteps=1000, MaxStepSize=1, MaxError=1e-20, StartingPoint="Automatic", MaxInnerSteps=2, ShowProgress=TRUE, CoefficientDimensions="Automatic",
    Labels="Automatic",ShowCoefficients=TRUE,ShowParameters=FALSE, ParameterCoding="Effect", ShowCorrelations=FALSE, Method="ML", Title="Summary of model fit"){

    n  = if(data.class(dat)=="numeric") dat else c(t(ftable(dat)));
    model = tomodelform(model); #put model in standard form: "list(margmod,x)"
    latdim = tablesize(model) / length(n)
    if (latdim != floor(latdim) || latdim < 1) cat("Error: incorrect dimensions of the matrix specification of the model","\n") 

    fit <- switch( Method, 
        "ML" = if (latdim == 1) 
                    margmodfit( n, model, MaxSteps=MaxSteps, MaxStepSize=MaxStepSize, StartingPoint=StartingPoint, MaxError=MaxError,ShowProgress=ShowProgress,Method="ML")
               else margmodfitEM( n, latdim, model, maxoutersteps=MaxSteps, MaxInnerSteps=MaxInnerSteps, MaxStepSize=MaxStepSize, StartingPoint=StartingPoint, 
                                    MaxError=MaxError, ShowProgress=ShowProgress),
        "MDI" = if (latdim != 1)
                    cat("Error: cannot do latent variable models for Method='MDI'","\n") 
            else    margmodfit( n, model, MaxSteps=MaxSteps, MaxStepSize=MaxStepSize, StartingPoint=StartingPoint, MaxError=MaxError,ShowProgress=ShowProgress,Method="MDI"),
        "GSK" = gsk( n, model) 
    )

    if( Method!="GSK"){
        coeff = model[[1]]
        coeff$bt=NULL
        fit = c(fit, getmodelstats(dat,fit$FittedFrequencies,model,coeff,CoefficientDimensions,Labels,Method))
    }

    fit$Method = Method 
    fit$Title = Title
    class(fit) = "CMM"

    if( ShowSummary){
        if(Method!="GSK") printalgorithmdetails(fit)
        summary(fit, ShowCoefficients=ShowCoefficients,ShowParameters=ShowParameters, ShowCorrelations=ShowCorrelations, ParameterCoding=ParameterCoding)
        cat("\n")
    }

    return(fit)
}

Try the cmm package in your browser

Any scripts or data that you put into this service are public.

cmm documentation built on Aug. 10, 2023, 1:07 a.m.