R/GLMM_longitDA.R
In mixAK: Multivariate Normal Mixture Models and Mixtures of Generalized Linear Mixed Models Including Model Based Clustering

Documented in GLMM_longitDA

##
##  PURPOSE:   Longitudinal discriminant analysis
##             based on GLMM MCMC fits (with possibly several response variables)
##
##  AUTHOR:    Arnost Komarek (LaTeX: Arno\v{s}t Kom\'arek)
##             arnost.komarek[AT]mff.cuni.cz
##
##  LOG:       05/08/2009: created as GLMM_longitClust, only allows for continuous responses
##             28/10/2009: renamed to GLMM_longitDA, still allows for continuous responses only
##             16/04/2015: minor revision (stop added when some response variable is not continuous)
##             15/03/2017  .C call uses registered routines
##
##  FUNCTIONS:  GLMM_longitDA
##
## ================================================================================================

## *************************************************************
## GLMM_longitDA
## *************************************************************
##
GLMM_longitDA <- function(mod, w.prior, y, id, time, x, z, xz.common = TRUE, info)
{
  thispackage <- "mixAK" 
  silent <- TRUE
  
########## ========== Some input checks ========== ##########
########## ======================================= ##########
  if (!is.list(mod)) stop("mod must be a list")
  if (length(mod) < 2) stop("mod must be a list of length 2 or more")
  if (any(sapply(mod, class) != "GLMM_MCMC")) stop("all components of mod must be of class GLMM_MCMC")
  nClust <- length(mod)

  if (length(w.prior) != nClust) stop(paste("w.prior must be of length ", nClust, sep=""))
  if (any(w.prior < 0)) stop("w.prior must all be non-negative")
  w.prior <- w.prior / sum(w.prior)
  
  Rc <- mod[[1]]$R["Rc"]
  Rd <- mod[[1]]$R["Rd"]
  R <- Rc + Rd

  keepMCMC <- mod[[1]]$nMCMC["keep"]
  for (cl in 2:nClust){
    if (mod[[cl]]$R["Rc"] != Rc) stop(paste("cluster number ", cl, " has ", mod[[cl]]$R["Rc"], " cont. responses (must be ", Rc, ")", sep=""))
    if (mod[[cl]]$R["Rd"] != Rd) stop(paste("cluster number ", cl, " has ", mod[[cl]]$R["Rd"], " discrete responses (must be ", Rd, ")", sep=""))
    keepMCMC <- c(keepMCMC, mod[[cl]]$nMCMC["keep"])
  }  

  
########## ========== Design matrices ============ ##########
########## ======================================= ##########
  if (!xz.common){
    if (!is.list(x)) stop("x must be a list when xz.common is FALSE")
    if (length(x) != nClust) stop(paste("x must be a list of length ", nClust, sep=""))
    if (!is.list(z)) stop("z must be a list when xz.common is FALSE")
    if (length(z) != nClust) stop(paste("z must be a list of length ", nClust, sep=""))    
  }  


########## ========== Work out the data ============ ##########
########## ========================================= ##########
  Cp <- Cq <- CfixedIntcpt <- CrandomIntcpt <- numeric()
  CX <- CZ <- numeric()
  ## CXtX <- CZitZi <- numeric()                              ### CODE REMOVED ON 02/11/2009
  CshiftScale_b <- numeric()
  Kmax_b <- numeric()
  chK_b <- chw_b <- chmu_b <- chLi_b <- numeric()
  chsigma_eps <- numeric()
  chalpha <- numeric()
  
  for (cl in 1:nClust){     ### Loop over clusters

    ##### Design stuff
    ##### ---------------
    if (xz.common){
      dd <- GLMM_MCMCdata(y=y, dist=mod[[cl]]$dist, id=id, time=time, x=x, z=z, random.intercept=mod[[cl]]$random.intercept)
    }else{
      dd <- GLMM_MCMCdata(y=y, dist=mod[[cl]]$dist, id=id, time=time, x=x[[cl]], z=z[[cl]], random.intercept=mod[[cl]]$random.intercept)
    }

    ifit <- GLMM_MCMCifit(do.init=FALSE, na.complete=TRUE,
                          y=dd$y, dist=dd$dist, id=dd$id, time=dd$time, x=dd$x, z=dd$z, random.intercept=dd$random.intercept,
                          xempty=dd$xempty, zempty=dd$zempty, Rc=dd$Rc, Rd=dd$Rd,
                          p=dd$p, p_fi=dd$p_fi, q=dd$q, q_ri=dd$q_ri, lalpha=dd$lalpha, dimb=dd$dimb)
    ## ifit$x[[s]], ifit$z[[s]] now contains also intercept columns if these should be included
    ## * also rows corresponding to missing values are removed
    ## * there is the same number of observations for each cluster
    
    ##### Variables common to all clusters
    ##### ------------------------------------
    if (cl == 1){
      dist1 <- dd$dist
      Cdist <- dd$ndist

      Cy_c  <- ifit$Cy_c
      Cy_d  <- ifit$Cy_d
      I     <- ifit$I
      Cn    <- ifit$Cn
      sumCn <- ifit$sumCn

      ID <- ifit$ID[[1]]
      TIME <- ifit$time

      nrow_pi <- sum(ifit$n[[1]])
    }  
    
    ##### Check whether supplied y is compatible with these used to fit mod
    ##### -------------------------------------------------------------------
    if (ncol(dd$y) != R) stop(paste("y must have ", R, " columns", sep=""))

    ##### Check whether common variables (across clusters) are really common
    ##### --------------------------------------------------------------------
    if (any(dd$dist != dist1)) stop(paste("dist of cluster ", cl, " is incompatible with that of cluster 1", sep=""))

    if (length(ifit$Cy_c) != length(Cy_c)) stop(paste("number of NA's for cluster ", cl, " leads to incompatibility in Cy_c", sep=""))
    if (length(ifit$Cy_d) != length(Cy_d)) stop(paste("number of NA's for cluster ", cl, " leads to incompatibility in Cy_d", sep=""))    
    if (ifit$I != I) stop(paste("I for cluster ", cl, " is incompatible with that of clulster 1", sep=""))
    if (length(ifit$Cn) != length(Cn)) stop(paste("Cn for cluster ", cl, " is incompatible with that of cluster 1", sep=""))
    if (any(ifit$Cn != Cn)) stop(paste("Cn for cluster ", cl, " is incompatible with that of cluster 1", sep=""))    
    
    ##### Check whether supplied x and z is compatible with these used to fit mod
    ##### Double check that there is the same number of observations for each response
    ##### ------------------------------------------------------------------------------
    for (s in 1:R){      
      if (dd$xempty[s]){
        if (mod[[cl]]$p[[s]] != 0) stop(paste("x[[", s, "]] should not be empty for cluster ", cl, sep=""))
      }else{
        if (ncol(dd$x[[s]]) != mod[[cl]]$p[s]) stop(paste("x[[", s, "]] matrix for cluster ", cl, " must have ", mod[[cl]]$p[s], " columns", sep=""))
      }

      if (dd$zempty[s]){
        if (dd$z[[s]] != "empty") stop(paste("z[[", s, "]] must be empty for cluster ", cl, sep=""))
      }else{
        if (ncol(dd$z[[s]]) != mod[[cl]]$q[s]) stop(paste("z[[", s, "]] matrix for cluster ", cl, " must have ", mod[[cl]]$q[s], " columns", sep=""))
      }

      if (any(ifit$n[[s]] != ifit$n[[1]])) stop("BUG in the function, contact AK")
    }

    ### ===== CODE REMOVED ON 02/11/2009 =====
    ##### Calculate CZitZi to be used here (it is different from that needed by GLMM_MCMC function)
    ##### and now included in ifit$CZitZi
    ##### REMARK:  Due to a loop, this is relatively slow for larger data, do it in C++ in a future???
    ##ZitZi <- numeric(0)
    ##cumn <- c(0, cumsum(ifit$n[[1]]))
    ##for (i in 1:I){                       ## loop over longitudinal profiles
    ##  for (j in 1:ifit$n[[1]][i]){        ## loop over observations within longitudinal profiles
    ##    for (s in 1:R){
    ##      if (dd$q_ri[s]){
    ##        tmpZ <- matrix(ifit$z[[s]][(cumn[i]+1):(cumn[i]+j),], ncol=ncol(ifit$z[[s]]))
    ##        tmpZtZ <- t(tmpZ) %*% tmpZ
    ##        ZitZi <- c(ZitZi, tmpZtZ[lower.tri(tmpZtZ, diag=TRUE)])
    ##      }  
    ##    }          
    ##  }  
    ##}
    ##cat("length ZitZi[", cl, "] = ", length(ZitZi), "\n", sep="")
    ### ===== END OF CODE REMOVED ON 02/11/2009 =====
    
    ##### Vectors to be supplied to C++
    ##### ---------------------------------
    Cp <- c(Cp, dd$p)
    Cq <- c(Cq, dd$q)    
    CfixedIntcpt  <- c(CfixedIntcpt, dd$CfixedIntcpt)
    CrandomIntcpt <- c(CrandomIntcpt, dd$CrandomIntcpt)
    CX     <- c(CX, ifit$CX)
    ##CXtX   <- c(CXtX, ifit$CXtX)           ## CODE REMOVED ON 02/11/2009
    CZ     <- c(CZ, ifit$CZ)
    ##CZitZi <- c(CZitZi, ZitZi)             ## CODE REMOVED ON 02/11/2009

    CshiftScale_b <- c(CshiftScale_b, mod[[cl]]$scale.b$shift, mod[[cl]]$scale.b$scale)
    
    Kmax_b <- c(Kmax_b, mod[[cl]]$prior.b$Kmax)
    if (mod[[cl]]$prior.b$priorK == "fixed"){
      chK_b  <- c(chK_b, mod[[cl]]$K_b)
      chw_b  <- c(chw_b, t(mod[[cl]]$w_b))
      chmu_b <- c(chmu_b, t(mod[[cl]]$mu_b))    
      chLi_b <- c(chLi_b, t(mod[[cl]]$Li_b))
    }else{
      stop("not implemented for variable K")
    }  
    
    if (mod[[cl]]$lalpha)  chalpha <- c(chalpha, t(mod[[cl]]$alpha))
    if (mod[[cl]]$R["Rc"]) chsigma_eps <- c(chsigma_eps, t(mod[[cl]]$sigma_eps))
  }     ### End of loop over clusters

  if (!length(chalpha))     chalpha <- 0
  if (!length(chsigma_eps)) chsigma_eps <- 0  
  
  ### !!! CX, CXtX, CZ, CZitZi contain one 0 when there is no such matrix
  ###     in a model for specific cluster

  if (missing(info)) info <- min(keepMCMC)
  if (info <= 0) info <- 1

  ##### ===============================================================
  #####                                                           #####
  ##### Validation of C++ code of GLMM_longitDA function          #####
  ##### may follow here.                                          #####
  ##### Corresponding code is available in                        #####
  ##### ~/Rlib/mixAK/Rtemp/GLMM_longitDA/GLMM_longitDA_validC.R   #####
  #####                                                           #####
  ##### ===============================================================

  if (FALSE){
    cat("Cy_c:\n")
    print(Cy_c)

    cat("Cy_d:\n")
    print(Cy_d)

    cat("CX:\n")
    print(CX)

    cat("CZ:\n")
    print(CZ)

    ##cat("CZitZi:\n")
    ##print(CZitZi)

    cat("R_c = ", Rc, ", R_d = ", Rd, ", nClust = ", nClust, ", I = ", I, "\n", sep="")
    cat("Cdist:\n")
    print(Cdist)

    cat("Cn:\n")
    print(Cn)

    cat("Cp:\n")
    print(Cp)
    
    cat("CfixedIntcpt:\n")
    print(CfixedIntcpt)

    cat("Cq:\n")
    print(Cq)
    
    cat("CrandomIntcpt:\n")
    print(CrandomIntcpt)

    cat("CshiftScale_b:\n")
    print(CshiftScale_b)
        
    cat("keepMCMC:\n")
    print(keepMCMC)        
  }  
  
  if (any(Cdist != 0)){
      
    stop("Not (yet) implemented if any of the response variables is not continuous.")
    
  }else{
      
    fit <- .C(C_GLMM_longitDA,
              Y_c          = as.double(Cy_c),
              R_c          = as.integer(Rc),
              Y_d          = as.integer(Cy_d),
              R_d          = as.integer(Rd),
              dist         = as.integer(Cdist),
              nClust       = as.integer(nClust),
              I            = as.integer(I),
              n            = as.integer(Cn),
              X            = as.double(CX),
              p            = as.integer(Cp),
              fixedIntcpt  = as.integer(CfixedIntcpt),
              Z            = as.double(CZ),
              #SZitZiS      = as.double(CZitZi),                  ### REMOVED ON 02/11/2009
              q            = as.integer(Cq),
              randIntcpt   = as.integer(CrandomIntcpt),
              shiftScale_b = as.double(CshiftScale_b),
              keepMCMC     = as.integer(keepMCMC),
              info         = as.integer(info),
              Kmax_b       = as.integer(Kmax_b),
              chsigma_eps  = as.double(chsigma_eps),
              chK_b        = as.integer(chK_b),
              chw_b        = as.double(chw_b),
              chmu_b       = as.double(chmu_b),
              chLi_b       = as.double(chLi_b),
              chalpha      = as.double(chalpha),
              pi_marg      = double(nrow_pi * nClust),
              pi_cond      = double(nrow_pi * nClust),
              pi_ranef     = double(nrow_pi * nClust),
              err          = as.integer(0),
              PACKAGE = thispackage)
  }  
  
  if (nrow_pi == 1){
    fit$pi_ranef <- w.prior * fit$pi_ranef
    fit$pi_ranef <- fit$pi_ranef / sum(fit$pi_ranef)

    fit$pi_cond <- w.prior * fit$pi_cond
    fit$pi_cond <- fit$pi_cond / sum(fit$pi_cond)

    fit$pi_marg <- w.prior * fit$pi_marg
    fit$pi_marg <- fit$pi_marg / sum(fit$pi_marg)    
  }else{     
    w.priorMat <- matrix(rep(w.prior, nrow_pi), ncol = nClust, byrow = TRUE)
    #
    fit$pi_ranef <- w.priorMat * matrix(fit$pi_ranef, ncol = nClust)
    fit$pi_ranef <- fit$pi_ranef / apply(fit$pi_ranef, 1, sum)
    #
    fit$pi_cond <- w.priorMat * matrix(fit$pi_cond, ncol = nClust)
    fit$pi_cond <- fit$pi_cond / apply(fit$pi_cond, 1, sum)
    #
    fit$pi_marg <- w.priorMat * matrix(fit$pi_marg, ncol = nClust)
    fit$pi_marg <- fit$pi_marg / apply(fit$pi_marg, 1, sum)
  }  

  RET <- list(ident = data.frame(id = ID, time = TIME),
              marg  = fit$pi_marg,
              cond  = fit$pi_cond,
              ranef = fit$pi_ranef)
  
  return(RET)  
}

Any scripts or data that you put into this service are public.

mixAK documentation built on Sept. 17, 2024, 1:06 a.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

mixAK
Multivariate Normal Mixture Models and Mixtures of Generalized Linear Mixed Models Including Model Based Clustering

R/GLMM_longitDA.R
In mixAK: Multivariate Normal Mixture Models and Mixtures of Generalized Linear Mixed Models Including Model Based Clustering

Defines functions GLMM_longitDA

Documented in GLMM_longitDA

Try the mixAK package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

mixAK Multivariate Normal Mixture Models and Mixtures of Generalized Linear Mixed Models Including Model Based Clustering

R/GLMM_longitDA.R In mixAK: Multivariate Normal Mixture Models and Mixtures of Generalized Linear Mixed Models Including Model Based Clustering

Defines functions GLMM_longitDA

Documented in GLMM_longitDA

Try the mixAK package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

mixAK
Multivariate Normal Mixture Models and Mixtures of Generalized Linear Mixed Models Including Model Based Clustering

R/GLMM_longitDA.R
In mixAK: Multivariate Normal Mixture Models and Mixtures of Generalized Linear Mixed Models Including Model Based Clustering