R/combi.R

Defines functions combi

Documented in combi

#' Perform model-based data integration
#'
#' @param data A list of data objects with the same number of samples. See details.
#' @param M the required dimension of the fit, a non-negative integer
#' @param covariates a dataframe of n samples with sample-specific variables.
#' @param distributions a character vector describing which distributional
#' assumption should be used. See details.
#' @param compositional A logical vector with the same length as "data",
#'  indicating if the datasets should be treated as compositional
#' @param maxIt an integer, the maximum number of iterations
#' @param tol A small scalar, the convergence tolerance
#' @param verbose Logical. Should verbose output be printed to the console?
#' @param confounders A dataframe or a list of dataframes with the same
#' length as data.
#'  In the former case the same dataframe is used for conditioning,
#'  In the latter case each view has its own conditioning variables (or NULL).
#' @param compositionalConf A logical vector with the same length as "data",
#'  indicating if the datasets should be treated as compositional when
#'  correcting for confounders. Numerical problems may occur when set to TRUE
#' @param minFraction a scalar, each taxon's total abundance
#' should equal at least the number of samples n times minFraction,
#'   otherwise it is trimmed.
#' @param prevCutOff a scalar, the prevalance cutoff for the trimming.
#' @param record A boolean, should intermediate estimates be stored?
#' Can be useful to check convergence
#' @param fTol The tolerance for solving the estimating equations
#' @param logTransformGaussian A boolean, should the gaussian data be
#' logtransformed, i.e. are they log-normal?
#' @param nleq.control A list of arguments to the nleqslv function
#' @param weights A character string, either 'marginal' or 'uniform', indicating
#' rrhow the feature parameters should be weighted in the normalization
#' @param meanVarFit The type of mean variance fit, see details
#' @param maxFeats The maximal number of features for a Newton-Raphson procedure
#'  to be feasible
#' @param dispFreq An integer, the period after which the variances should be
#' reestimated
#' @param allowMissingness A boolean, should NA values be allowed?
#' @param biasReduction A boolean, should bias reduction be applied to allow for
#' confounder correction in groups with all zeroes? Not guaranteed to work
#' @param maxItFeat Integers, the maximum allowed number of iterations
#' in the estimation of the feature parameters
#' @param initPower The power to be applied to the residual matrix used to
#' calculate the starting value. Must be positive; can be tweaked in case of numerical problems (i.e. infinite values returned by nleqslv)
#'
#' @return An object of the "combi" class, containing all information on the
#' data integration and fitting procedure
#'
#' @details Data can be provided as raw matrices with features in the columns, or as phyloseq, SummarizedExperiment or
#' ExpressionSet objects. Estimation of independence model and view wise parameters can be
#' parametrized. See ?BiocParallel::bplapply and ?BiocParallel::register.
#' meanVarFit = "spline" yields a cubic spline fit for the abundance-variance
#'  trend, "cubic" gives a third degree polynomial. Both converge to the
#'  diagonal line with slope 1 for small means.
#'  Distribution can be either "quasi" for quasi likelihood or "gaussian" for
#'   Gaussian data
#' @importFrom limma squeezeVar
#' @importFrom vegan rda
#' @importFrom stats sd
#' @export
#' @examples
#' data(Zhang)
#' #The method works on several datasets at once, and simply is not very fast.
#' #Hence the "Not run" statement
#' \dontrun{
#' #Unconstrained
#' microMetaboInt = combi(
#' list("microbiome" = zhangMicrobio, "metabolomics" = zhangMetabo),
#' distributions = c("quasi", "gaussian"), compositional = c(TRUE, FALSE),
#' logTransformGaussian = FALSE, verbose = TRUE)
#' #Constrained
#' microMetaboIntConstr = combi(
#'     list("microbiome" = zhangMicrobio, "metabolomics" = zhangMetabo),
#'     distributions = c("quasi", "gaussian"), compositional = c(TRUE, FALSE),
#'     logTransformGaussian = FALSE, covariates = zhangMetavars, verbose = TRUE)
#'     }
combi = function(data, M = 2L, covariates = NULL, distributions,
                   compositional, maxIt = 3e2L, tol = 1e-3, verbose = FALSE,
                   prevCutOff = 0.95, minFraction = 0.1, logTransformGaussian = TRUE,
                   confounders = NULL, compositionalConf = rep(FALSE, length(data)),
                   nleq.control = list(maxit = 1e3L, cndtol = 1e-16),
                   record = TRUE, weights = NULL, fTol = 1e-5,
                   meanVarFit = "spline", maxFeats = 2e3, dispFreq = 10L,
                   allowMissingness = FALSE, biasReduction = TRUE,
                maxItFeat = 2e1L, initPower = 1){
    #Perform checks
    stopifnot(is.numeric(M), is.logical(compositional),
              is.character(distributions), is.numeric(maxIt), is.numeric(tol),
              is.numeric(prevCutOff), is.numeric(minFraction),
              is.logical(logTransformGaussian), is.logical(compositionalConf),
              is.list(nleq.control), is.logical(record), is.numeric(fTol),
              is.character(meanVarFit), is.numeric(maxFeats),
              is.numeric(dispFreq), is.logical(allowMissingness),
              is.logical(biasReduction), is.numeric(maxItFeat))
    if(M %in% c(0L,1L) | (as.integer(M)!=M)){
        stop("Please supply non-negative integer dimension of at least 2!")
    }
    if(initPower<=0){
      stop("Please provide strictly positive initPower for starting value calculation")
    }
    if(!all(vapply(FUN.VALUE = integer(1),
                   c(length(data), length(distributions), length(compositionalConf)),
                   identical, length(compositional)))){
        stop("Make sure data, distribution, links and compositional have the same length")
    }
    if(!all(compositional[distributions=="quasi"]) ||
       any(compositional[distributions == "gaussian"])){
      stop("Quasi likelihood currently only implemented for compositional data,
           and gaussian estimation only for non compositional data")
    }
    if(!meanVarFit %in% c("cubic", "spline"))
        stop("Mean-variance trend must be either 'cubic' or 'spline'!\n")
    if(length(data)==1){
        warning("Please provide at least two views for data integration!
                Now fitting an ordination model.",
                immediate. = TRUE)}
    if(is.null(names(data))) names(data) = paste0("View", seq_along(data))
    namesData = names(compositional) = names(distributions) = names(data)
    DimNames = paste0("Dim", seq_len(M))
    #Extract otu table from phyloseq objects
    data = extractData(data, logTransformGaussian = logTransformGaussian)
    if(any(vapply(FUN.VALUE = TRUE, data, function(x){is.null(rownames(x))}))){
        stop("Make sure to provide sample names for all views!")
    }
    if(any(vapply(FUN.VALUE = TRUE, data, anyNA)) & !allowMissingness){
        stop("Missing data present. To allow fit with missing data, set allowMissingness to TRUE")
    }
    rowNames = lapply(data, function(x){sort(rownames(x))})
    if(!all(vapply(FUN.VALUE = logical(1), rowNames[-1], FUN = identical,
                   rowNames[[1]]))){
        if(allowMissingness){
        # Make sure at least some samples are shared
            if(length(Reduce(rowNames, f = intersect))==0){
                stop("No samples common to all views have been found!
                     Data integration is questionable in this case. Check rownames of objects provided.")
            } else {
                #Fill in NA's for missing samples. These observations do not contribute to the estimating equations
                allRownames = Reduce(rowNames, f = union)
                data  = lapply(data, function(dat){
                    rownamesMissing = allRownames[!allRownames %in% rownames(dat)]
                    naMatrix = matrix(NA, length(rownamesMissing), ncol(dat))
                    rownames(naMatrix) = rownamesMissing
                    dat = rbind(dat, naMatrix)
                    dat[allRownames,]
                })
            }
        } else {
            stop("Rownames do not match!
                 Fix rownames first or set 'allowMissingness' to TRUE to allow for missing data")
            }
    }
    n = nrow(data[[1]]) #Total number of samples
    zeroRows = apply(vapply(data, FUN.VALUE = logical(nrow(data[[1]])),
                            function(x){rowSums(x, na.rm = TRUE)==0}), 1, any)
    if(any(zeroRows)){
        warning("Zero rows\n", paste(which(zeroRows), collapse = " ") ,
                "\nfiltered out prior to fit", immediate. = TRUE)
        data = lapply(data, function(x){x[!zeroRows,]})
        covariates = covariates[!zeroRows,]
        confounders = confounders[!zeroRows,]
    }
    if(!is.null(confounders) && !length(confounders) %in% c(1,length(data))){
        stop("Please provide a single confounder matrix or as many as you provide views!\n")
    }
    #Assign link functions
    links = lapply(distributions, function(distr){
        switch(distr, "gaussian" = "identity", "quasi" = "log",
        "binomial" = "logit")
    })
    #Get the inverse link functions
    invLinks = lapply(links, function(link){
        if(is.function(link)){link = deparse(substitute(link))}
        #Convert to string if needed
        switch(link, "identity" = identity, "log" = exp,
               "logit" = function(x){exp(x)/(1+exp(x))})
    })
    links = lapply(links, match.fun) #Match the link functions
    #Assign weighting schemes
    weights = if(is.null(weights)) ifelse(distributions %in%
                                              c("quasi"), "marginal", "uniform") else weights
    #Define some handy quantities
    numSets = length(data) #Number of views
    seqSets = seq_len(numSets) #A sequence along the views
    names(seqSets) = namesData
    #Build confounder matrices
    oneConfMat = length(confounders) == 1 #Single confounder matrix for all views?
    confounders = lapply(seqSets, function(i){if(is.null(confounders[[i]])) NULL else
        data.frame(confounders[[i]])[!zeroRows,,drop = FALSE]}) #Coerce to data frames
    confMats = lapply(confounders, buildConfMat)
    #Build covariate matrix
    constrained = !is.null(covariates)
    # Mean variance trends
    meanVarFit = if(length(meanVarFit)==1){
        rep(meanVarFit, numSets)
    } else if(length(meanVarFit)==numSets){
        meanVarFit
    } else {
        stop("Provide mean-variance trend vector of length one or equal to number of datasets!\n")
    }
    # Prune count data, also using the confounders if needed
    data = lapply(seq_along(data), function(i){
        if(distributions[[i]] == "quasi"){
                data[[i]] = data[[i]][, colMeans(data[[i]]==0, na.rm = TRUE) <= prevCutOff &
                                        colSums(data[[i]], na.rm = TRUE) >= (n * minFraction)]
            if(!oneConfMat && !is.null(confounders[[i]]) && !biasReduction){
            data[[i]] = trimOnConfounders(data = data[[i]], prevCutOff = prevCutOff,
                                       minFraction = minFraction, n = n,
confounders = confMats[[if(length(confounders)>1) i else 1]]$confModelMatTrim)
            }
        }
        return(data[[i]])
    })
    #All zero rows, but not all NAs
    n = nrow(data[[1]])
    zeroRowsIndep = apply(
        vapply(data, FUN.VALUE = logical(n),
               function(x){rowSums(x, na.rm = TRUE)==0 &
                       !apply(x,1, function(x) all(is.na(x)))}), 1, any)
    if(any(zeroRowsIndep)){
        message("Zero rows\n", paste(which(zeroRowsIndep), collapse = " ") ,
                "\nfiltered out after filtering features")
    }
    data = lapply(data, function(x){x[!zeroRowsIndep,]})
    rowNames = lapply(data, function(x){sort(rownames(x))})
    if (constrained) {
        if(!is.data.frame(covariates)){
            stop("Please provide covariate matrix as a dataframe")
        }
        covariates = droplevels(covariates[!zeroRowsIndep,])#Drop unused levels
        tmp = buildCovMat(covariates)
        covMat = tmp$covModelMat
        numCov = ncol(covMat)
        # Already prepare the matrix that defines the
        # equations for centering the coefficients of the
        # dummy variables
        tmp = buildCentMat(tmp$datFrame)
        centMat = tmp$centMat
        covariates = tmp$datFrame
        rm(tmp)
        # Remove rows with NA's, we might want to find
        # something better or leave it to the end user
        if (anyNA(covariates)) {
            NArows = apply(covariates, 1, anyNA)
            if (all(NArows)) {
                stop("All samples have missing covariates")
            }
            data = lapply(data, function(X) X[!NArows, ])
            covariates = covariates[!NArows, ]
            if (!is.null(confounders)) {
                confMats = lapply(confMats,function(confMat){
                    confMat$confModelMatTrim = confMat$confModelMatTrim[!NArows,]
                    confMat$confModelMat = confMat$confModelMat[!NArows,]
                    confMat
                })
            }
            warning(paste("Some covariates contain missing values. We removed samples\n",
                          paste(which(NArows), collapse = ", "),
                          "\n prior to analysis. Imputation of missing values in predictors is left to the user"),
                    immediate. = TRUE)
        }
    } else {
        covModelMat = centMat = NULL
    }
    nLambda1s = if(constrained) nrow(centMat) else 1
    names(data) = namesData
    n = nrow(data[[1]])
    numVars = vapply(FUN.VALUE = integer(1), data, ncol) #Number of variables per view

    IDs = lapply(seqSets, function(i){
        (sum(numVars[seq_len(i-1)])+1):sum(numVars[seq_len(i)])
    }) # The indices of the features of each view
    newtonRaphson = numVars <= maxFeats #Indicates if too many features for Newton-Raphson

    #### MARGINAL INDEPENDENCE MODELS ####
    if(verbose){cat("Estimating independence models...\n")}
    marginModels = lapply(seqSets, function(i){
        estIndepModel(data = data[[i]], distribution = distributions[[i]],
                       link = links[[i]], compositional = compositional[[i]],
                       invLink = invLinks[[i]], tol = tol, maxIt = maxIt,
                      meanVarFit = meanVarFit[[i]], newtonRaphson = newtonRaphson[[i]],
                      control = nleq.control, dispFreq = dispFreq)
        })
    #Corresponding offsets
    offsetsMargin = lapply(seqSets, function(i){buildMarginalOffset(indepModel = marginModels[[i]],
                                                      invLink = invLinks[[i]])})
    #Assign weights
    weightsList = lapply(seqSets, function(i){
switch(weights[[i]],
       "uniform" = rep(1, numVars[[i]]),#/numVars[[i]]
       "marginal" = invLinks[[i]](marginModels[[i]]$colOff))
    })
    #Estimate the mean-variance trends for conditioning and starting value calculation
    meanVarTrends = lapply(seqSets, function(i){
        if(distributions[[i]] == "quasi"){
            estMeanVarTrend(data = data[[i]], meanMat = offsetsMargin[[i]],
                            meanVarFit = meanVarFit[[i]],
                            libSizes = invLinks[[i]](marginModels[[i]]$rowOff),
                            baseAbundances = invLinks[[i]](marginModels[[i]]$colOff))
        } else {NULL}
    })

    #### CONDITIONING ####
    if(verbose && !all(vapply(FUN.VALUE = logical(1), confounders, is.null))){
        cat("Conditioning on known confounders ...\n")}
    confVars = lapply(seqSets, function(i){
        if(!oneConfMat && is.null(confounders[[i]])){return(NULL)
            } else {filterConfounders(offSet = offsets[[i]], data = data[[i]],
                              distribution = distributions[[i]], link = links[[i]],
                              compositional = compositionalConf[[i]],
                              invLink = invLinks[[i]],
                              confMat = confMats[[if(length(confounders)>1) i else 1]]$confModelMat,
                              meanVarTrend = meanVarTrends[[i]], numVar = numVars[[i]],
                              control = nleq.control, marginModel = marginModels[[i]],
                              allowMissingness = allowMissingness,
                              biasReduction = biasReduction)
            }
        })
    #Prepare a list of independence models
    indepModels = lapply(seqSets, function(i){
        libSizes = invLinks[[i]](marginModels[[i]]$rowOff)
        colMat = matrix(marginModels[[i]]$colOff, n, numVars[[i]], byrow = TRUE) +
                    if(!is.null(confounders[[i]])){confMats[[i]]$confModelMat %*% confVars[[i]]} else 0
        return(list(colMat = colMat, libSizes = libSizes))
            })
    offsets = lapply(seqSets, function(i){
        if(distributions[[i]]=="quasi"){
            expColMat = exp(indepModels[[i]]$colMat)
            return(indepModels[[i]]$libSizes *
                       if(compositional[[i]]) expColMat/rowSums(expColMat) else expColMat)
        } else if(distributions[[i]]=="gaussian"){
            return(indepModels[[i]]$colMat + indepModels[[i]]$libSizes)
        }
    })
    numVars = lapply(offsets, ncol)
    n = nrow(offsets[[1]])
    #### MAIN MODEL ####
    # Pre-allocate some quantities
    iter = rep.int(1L, M)
    converged = logical(M)
    # Lagrange multipliers
    lambdasLatent = if(constrained) numeric((nLambda1s+1)*M + M*(M-1)/2) else
        numeric(M + M*(M-1)/2)
    lambdasParams = lapply(compositional, function(comp){
        integer(M*(2 + (M-1)/2))})
    #M more restrictions for the normalisation
    #Prepare to record the iterations if necessary
    if(record){
        if(constrained){
            alphaRec = array(0, dim = c(numCov ,M, maxIt), dimnames = list(
                colnames(covMat), DimNames, NULL
            ))
            } else{
            latentRec = array(0, dim = c(n, M, maxIt), dimnames = list(
                rowNames[[1]], DimNames, NULL
            ))
            }
        paramRec = lapply(numVars, function(numVar){
            array(0, dim = c(M, numVar, maxIt), dimnames = list(
                DimNames, NULL, NULL
            ))
        })
        #Convergence
        latentConv = matrix(nrow = maxIt, ncol =  M, dimnames = list(
            NULL, DimNames
        ))
        paramConv = array(dim = c(maxIt, numSets, M), dimnames = list(
            NULL, namesData, DimNames
        ))
    } else {
        latentConv = paramConv = alphaRec = paramRec = latentRec = NULL
    }
    if(allowMissingness){
        naIdList = lapply(data, is.na)
    }

    ### STARTING VALUES ###
    # Find starting values through svd on concatenated datasets,
    #normalized as well as possible
    concat = Reduce(lapply(seqSets, function(i){
        rawResiduals = data[[i]]-offsets[[i]]
        out = switch(distributions[[i]],
                     "gaussian" = rowMultiply(rawResiduals,
                                              1/apply(rawResiduals, 2, sd,
                                                      na.rm = TRUE)),
                     "quasi" = rawResiduals/sqrt(meanVarTrends[[i]](
                         invLinks[[i]](marginModels[[i]]$colOff),
                         invLinks[[i]](marginModels[[i]]$rowOff))))
        if(allowMissingness){
            out[naIdList[[i]]] = 0
        }
        out
    }), f = cbind)
    concat = abs(concat)^initPower*sign(concat)#shrink or increase residuals
    if(constrained){
        CCA = rda(X = concat, Y = covMat)$CCA
        # Redundancy analysis for starting values
        if (sum(!colnames(covMat) %in% CCA$alias) < M) {
            M = sum(!colnames(covMat) %in% CCA$alias)
            warning(immediate. = TRUE, "Can only fit an ordination with",
                    M, "dimensions with so few covariates!")
        }
        alphas = matrix(0, numCov, M)
        alphas[!colnames(covMat) %in% CCA$alias,
              ] = CCA$biplot[, seq_len(M)]
        alphas = t(t(alphas) - colMeans(alphas))
        #alphas = t(t(alphas)/sqrt(colSums(alphas^2)))
        latentVars = covMat %*% alphas
        paramEsts = lapply(seqSets, function(i){
            t(CCA$v[IDs[[i]],seq_len(M)])
        })
    } else {
    Svd = svd(concat)
    #Re-center parameter estimates
    tmpMats = lapply(seqSets, function(i){
        tmpMat = Svd$v[IDs[[i]], seq_len(M)]
        tmpMat = t(tmpMat) - colMeans(tmpMat*weightsList[[i]])
        tmpMat
      })
    #Transfer some weight to fit the constraints
    tmpWeights = vapply(seqSets, FUN.VALUE = double(M), function(i){
        sqrt(colSums(t(tmpMats[[i]])^2*weightsList[[i]]))
    })
    latentVars = scale((Svd$u)[,seq_len(M)], scale = FALSE,
                       center = TRUE)
    # %*% diag(Svd$d)
    #latentVars = t(t(latentVars)*exp(rowMeans(log(tmpWeights))))
    paramEsts =  lapply(seqSets, function(i){
        tmpMats[[i]]/tmpWeights[,i]
    })
    rm(Svd)
    }
    rm(concat)
    meanVarTrends = vector("list", M)
    #Estimate the mean-variance trend once, given the complete indendence model

    for(m in seq_len(M)){
        if(verbose) cat("Estimating dimension", m, "\n")
        #Modify the offset if needed
        if(m>1){
            offsets = lapply(seqSets, function(i){
                if(compositional[[i]]){
                  indepModels[[i]]$libSizes*
                        buildCompMat(indepModels[[i]]$colMat,
                                     paramEsts[[i]], latentVars, m-1)
                } else {
                buildMu(offSet = offsets[[i]], latentVar = latentVars[,m-1],
                        paramEsts = paramEsts[[i]][m-1,],
                        distribution = distributions[[i]])
                }
            })
        }
        #Prepare the jacobians
                ## Latent variables
        emptyJacLatent = buildEmptyJac(n = if(constrained) numCov else n, m = m,
                          lower = if(constrained) alphas else latentVars,
                          nLambda1s = nLambda1s,
                          normal = constrained, centMat = centMat)
                ## Feature parameters
        emptyFeatureJacs = lapply(seqSets, function(i){
            buildEmptyJac(numVars[[i]], m = m, lower = t(paramEsts[[i]]),
                          normal = compositional[[i]],
                          weights = weightsList[[i]],
                          distribution = distributions[[i]])})
        while(iter[[m]] <= maxIt && !converged[[m]]){

            #Estimate mean-variance trend
            if(((iter[[m]]-1) %% dispFreq)==0){
                if(verbose) cat("Estimating mean variance trends\n")
            meanVarTrends[[m]] = lapply(seqSets, function(i){
                if(distributions[[i]] == "quasi"){
                    estMeanVarTrend(data = data[[i]], meanMat = offsetsMargin[[i]],
                                    meanVarFit = meanVarFit[[i]],
                                    libSizes = indepModels[[i]]$libSizes,
                                    baseAbundances = invLinks[[i]](marginModels[[i]]$colOff))
                } else {NULL}
            })
            }
            #Store old estimates
            if(constrained){
                alphaOld = alphas[,m]
            } else {latentVarsOld = latentVars[,m]}
        paramEstsOld = lapply(paramEsts, function(x){x[m,]})

         #Estimate parameters
        if(verbose) cat("Estimating feature parameters of dimension", m,
                        ": iteration", iter[[m]], "\n")
        paramEstsTmp = estFeatureParameters(data = data, distributions = distributions,
                                            offsets = offsets, paramEsts = paramEsts,
                                            latentVars = latentVars, m = m,
                                            numVars = numVars,
                                            meanVarTrends = meanVarTrends[[m]],
                                            lambdasParams = lambdasParams,
                                            JacFeatures = emptyFeatureJacs,
                                            seqSets = seqSets,
                                            control = nleq.control,
                                            weights = weightsList, compositional = compositional,
                                            indepModels = indepModels, fTol = fTol,
                                            newtonRaphson = newtonRaphson,
                                            allowMissingness = allowMissingness,
                                            maxItFeat = maxItFeat)
        for (i in seqSets){
            #Enforce restrictions to stabilize algorithm
            tmp = paramEstsTmp[[i]]$x[seq_len(numVars[[i]])]
            tmp = tmp - sum(tmp*weightsList[[i]]) #Center
            if(m>1){
                paramEsts[[i]][m,] = gramSchmidtOrth(paramEsts[[i]][m,],
                                                     paramEsts[[i]][m-1,],
                                                     weights = weightsList[[i]],
                                                     norm = compositional[[i]])
            } #Orthogonalize
            paramEsts[[i]][m,] = tmp/sqrt(sum(tmp^2*weightsList[[i]])) #Scale
            lambdasParams[[i]][seqM(m, nLambda1s = 1, normal = compositional[[i]] )] =
                paramEstsTmp[[i]]$x[-seq_len(numVars[[i]])]
            #Record convergence
            if(record) paramConv[iter[[m]],i,m] = paramEstsTmp[[i]]$conv
        };rm(tmp)
        ### Estimate nuissance parameters ###
        #if(verbose) cat("Estimating nuissance parameters ...\n")
        #For each dimension, re-estimate the posterior standard deviation
        #using limma, if applicable
        varPosts = lapply(seqSets, function(i){
            if(distributions[[i]] == "gaussian"){
                varEst = colSums(na.rm = TRUE, (data[[i]] - offsets[[i]] -
                                      outer(latentVars[,m], paramEsts[[i]][m,]))^2)/
                    (n-m-1)
                squeezeVar(var = varEst, df = n-m-1)$var.post
            } else {NULL}
        })
        ### Estimate latent variables ###
        if(verbose) cat("Estimating latent variables of dimension", m,
                        ": iteration", iter[[m]], "\n")
        #Prefab the parameter matrices
        paramMats = lapply(seqSets, function(i){
            matrix(paramEsts[[i]][m,], byrow = TRUE, n, numVars[[i]])
            })
        #nleq.control$trace = TRUE
        latentVarsTmp = estLatentVars(data = data,
                            distributions = distributions,
                            offsets = offsets, paramEsts = paramEsts,
                            paramMats = paramMats,
                            latentVars = (if(constrained) alphas else latentVars)[, m],
                            latentVarsLower = (if(constrained) alphas else latentVars)[,
                                                seq_len(m-1), drop = FALSE],
                            n = n, m = m, numSets = numSets, numVars = numVars,
                            meanVarTrends = meanVarTrends[[m]], links = links,
                            lambdasLatent = lambdasLatent[
                                          seqM(m, normal = constrained,
                                                nLambda1s = nLambda1s)],
                            Jac = as.matrix(emptyJacLatent),
                            control = nleq.control, covMat = covMat,
                            numCov = numCov, centMat = centMat, nLambda1s = nLambda1s,
                            varPosts = varPosts, constrained = constrained,
                            compositional = compositional, indepModels = indepModels,
                            fTol = fTol, allowMissingness = allowMissingness)
        #nleq.control$trace = FALSE
        lambdasLatent[seqM(m, normal = constrained, nLambda1s = nLambda1s)] =
            latentVarsTmp$x[-seq_len(if(constrained) numCov else n)]
        if(constrained){
            alphas[,m] = latentVarsTmp$x[seq_len(if(constrained) numCov else n)]
            if(m>1){
            alphas[,m] = gramSchmidtOrth(alphas[,m], alphas[,m-1], norm = TRUE)
            }
            latentVars[,m] = covMat %*% alphas[,m]
        } else {
            latentVars[, m] = latentVarsTmp$x[seq_len(n)] -
                mean(latentVarsTmp$x[seq_len(n)]) #Enforce centering for stability
            if(m>1){
                latentVars[, m] = gramSchmidtOrth(latentVars[, m],
                                                  latentVars[, m-1], norm = FALSE)
            } #Orthogonalize
        }
        if(record) latentConv[iter[[m]],m] = latentVarsTmp$conv #Store convergence
        # Store intermediates if necessary
        if(record){
            if(constrained){
        alphaRec[,m,iter[[m]]] = alphas[,m]
            } else {
        latentRec[,m,iter[[m]]] = latentVars[,m]
            }
        for (i in seqSets){
            paramRec[[i]][m,,iter[[m]]] = paramEsts[[i]][m,]
        }
        }
        # Change iterator
        iter[[m]] = iter[[m]] + 1
        # Check for convergence
        converged[[m]] = all(vapply(seqSets, FUN.VALUE = logical(1),
                                    function(i){
            sqrt(mean((1-paramEsts[[i]][m,]/paramEstsOld[[i]])^2)) < tol})) &&
            (if(constrained) sqrt(mean((1-alphas[,m]/alphaOld)^2)) else
                sqrt(mean((1-latentVars[,m]/latentVarsOld)^2))) < tol
        }
    iter[[m]] = iter[[m]]-1
    }
    if(!all(converged)){
        warning("Some dimensions did not converge, please investigate cause!")
    }
    ### Assign names
    rownames(latentVars) =  rownames(data[[1]])
    colnames(latentVars) = DimNames
    if(constrained) {
        colnames(alphas) = colnames(latentVars)
        rownames(alphas) = colnames(covMat)
    }
    for(i in seqSets){
        colnames(paramEsts[[i]]) = colnames(data[[i]])
        rownames(paramEsts[[i]]) = colnames(latentVars)
    }
    #### RETURN RESULT ####
    out = list(latentVars = latentVars, paramEsts = paramEsts,
               indepModels = indepModels, lambdasLatent = lambdasLatent,
               lambdasParams = lambdasParams, iter = iter, converged = converged,
               alphas = if(constrained) alphas else NULL,
               covMat = if(constrained) covMat else NULL, covariates = covariates,
               weights = weightsList, varPosts = varPosts, latentConv = latentConv,
               paramConv = paramConv, data = data, compositional = compositional,
               meanVarTrends = meanVarTrends, distributions = distributions,
               confVars = confVars, confMats = confMats, marginModels = marginModels,
               newtonRaphson = newtonRaphson, allowMissingness = allowMissingness,
               centMat = centMat)
    if(record){
        out$paramRec = lapply(seqSets, function(i){
            tmp = paramRec[[i]][,,seq_len(max(iter))]
            colnames(tmp) = colnames(paramEsts[[i]])
            tmp})
        out$paramConv = out$paramConv[seq_len(max(iter)),,]
        out$latentConv = out$latentConv[seq_len(max(iter)),]
        if(constrained){
        out$alphaRec = alphaRec[,,seq_len(max(iter))]
        rownames(out$alphaRec) = rownames(alphas)
        } else {
        out$latentRec = latentRec[,,seq_len(max(iter))]
        rownames(latentRec) = rownames(latentVars)
        }
    }
    class(out) = "combi"
    return(out)
}
CenterForStatistics-UGent/compIntegrate documentation built on Aug. 4, 2023, 1:08 p.m.