R/source.icc.R

Defines functions QGmvicc QGicc qg.negbin.log.icc qg.Poisson.log.icc qg.binomN.probit.icc qg.binom1.probit.icc qg.Gaussian.icc

Documented in QGicc QGmvicc

#----------------------------------------------------------------
#	QGglmm R package (functions for ICC estimates)
#	Functions to estimate QG parameters from GLMM estimates
#----------------------------------------------------------------
#	Pierre de Villemereuil (2015)
#----------------------------------------------------------------

#    This program is free software: you can redistribute it and/or modify
#    it under the terms of the GNU General Public License as published by
#    the Free Software Foundation, either version 3 of the License, or
#    (at your option) any later version.
#
#    This program is distributed in the hope that it will be useful,
#    but WITHOUT ANY WARRANTY; without even the implied warranty of
#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#    GNU General Public License for more details.

#    You should have received a copy of the GNU General Public License
#    along with this program.  If not, see <http://www.gnu.org/licenses/>.

## ----------- Special functions for known analytical solutions ---------- ##

qg.Gaussian.icc <- function(mu = NULL,
                            var.comp,
                            var.p,
                            predict = NULL)
{
    if(length(mu) > 1 | length(var.comp) != 1 | length(var.p) != 1) {
        stop("The parameters mu, var.comp and var.p must be of length 1,
             please check your input.")
    }
    
    if (is.null(predict)) { 
        if(is.null(mu)) {
            stop("Please provide either mu or predict.")
        } else {
            predict <- mu
        }
    }
    
    # Nothing to be done, except averaging over predict
    if (length(predict) == 1) {
        var_fixed <- 0
    } else {
        var_fixed <- var(predict)
    }
    
    data.frame(mean.obs     = mean(predict),
               var.obs      = var.p + var_fixed, 
               var.comp.obs = var.comp, 
               icc.obs      = var.comp/(var.p + var_fixed))
}

qg.binom1.probit.icc <- function(mu = NULL,
                                 var.comp,
                                 var.p,
                                 predict = NULL,
                                 width = 10)
{
    if(length(mu) > 1 | length(var.comp) != 1 | length(var.p) != 1) {
        stop("The parameters mu, var.comp and var.p must be of length 1,
             please check your input.")
    }
    if (is.null(predict)) { 
        if(is.null(mu)) {
            stop("Please provide either mu or predict.")
        } else {
            predict <- mu
        }
    }
    
    # Observed mean
    p <- mean(1 - pnorm(0, predict, sqrt(var.p + 1)))
    
    # Observed variance
    var_obs <- p * (1 - p)
    
    # Component variance
    var_comp_obs <-
        integrate(f = function(x){
                        sapply(x, function(x_i) {
                            (mean(pnorm(x_i,
                                        -predict, 
                                        sqrt(var.p - var.comp + 1)))^2) * 
                                dnorm(x_i, 0,sqrt(var.comp))
                        })
                       }, 
                  lower = - width * sqrt(var.comp),
                  upper = width * sqrt(var.comp))$value - (p^2)
    data.frame(mean.obs     = p,
               var.obs      = var_obs,
               var.comp.obs = var_comp_obs,
               icc.obs      = var_comp_obs/var_obs)
}

qg.binomN.probit.icc <- function(mu = NULL,
                                 var.comp, var.p,
                                 n.obs,
                                 predict = NULL,
                                 width = 10)
{
    if(length(mu) > 1 | length(var.comp) != 1 | length(var.p) != 1) {
        stop("The parameters mu, var.comp and var.p must be of length 1,
             please check your input.")
    }
    if (is.null(predict)) {
        if(is.null(mu)) {
            stop("Please provide either mu or predict.")
        } else {
            predict <- mu
        }
    }
    
    # Observed mean
    p <- n.obs * mean(1 - pnorm(0, predict, sqrt(var.p + 1)))
    
    # Observed variance
    prob.sq.int <-
        mean(
            sapply(predict, 
                   function(pred_i) {
                       integrate(f = function(x){
                                         (pnorm(x)^2) * 
                                             dnorm(x, pred_i, sqrt(var.p))
                                     }, 
                                 lower = pred_i - width * sqrt(var.p),
                                 upper = pred_i + width * sqrt(var.p)
                       )$value
                   }
            )
        )
    
    var_obs <- ((n.obs^2) - n.obs) * prob.sq.int - p^2 +p
    
    # Component variance
    var_comp_obs <- 
        integrate(f = function(x){
                        sapply(x, 
                            function(x_i){
                                (mean(n.obs * 
                                      pnorm(x_i,
                                            -predict,
                                            sqrt(var.p - var.comp + 1)))^2) * 
                                    dnorm(x_i, 0,sqrt(var.comp))
                            })
                        }, 
                  lower = -width * sqrt(var.comp),
                  upper = width * sqrt(var.comp)
        )$value - (p^2)
    data.frame(mean.obs     = p,
               var.obs      = var_obs,
               var.comp.obs = var_comp_obs,
               icc.obs      = var_comp_obs/var_obs)
}

qg.Poisson.log.icc <- function(mu = NULL,
                               var.comp,
                               var.p,
                               predict = NULL)
{
    if(length(mu) > 1 | length(var.comp) != 1 | length(var.p) != 1) {
        stop("The parameters mu, var.comp and var.p must be of length 1,
             please check your input.")
    }
    if (is.null(predict)) {
        if(is.null(mu)) {
            stop("Please provide either mu or predict.")
        } else {
            predict <- mu
        }
    }
    
    # Observed mean
    lambda <- mean(exp(predict + (var.p/2)))
    # Mean of lambda square, needed for the following
    lambda_sq <- mean(exp(2 * (predict + var.p/2)))
    # Observed variance
    var_obs <- lambda_sq * exp(var.p) - lambda^2 + lambda
    # Component variance
    var_comp_obs <- exp(var.comp + var.p) * (mean(exp(predict))^2) - (lambda^2)
    data.frame(mean.obs     = lambda,
               var.obs      = var_obs,
               var.comp.obs = var_comp_obs,
               icc.obs      = var_comp_obs/var_obs)
}

qg.negbin.log.icc <- function(mu = NULL,
                              var.comp,
                              var.p,
                              theta,
                              predict = NULL)
{
    if(length(mu) > 1 | length(var.comp) != 1 | length(var.p) != 1) {
        stop("The parameters mu, var.comp and var.p must be of length 1,
             please check your input.")
    }
    if (is.null(predict)) {
        if(is.null(mu)) {
            stop("Please provide either mu or predict.")
        } else {
            predict <- mu
        }
    }
    
    # Observed mean
    lambda <- mean(exp(predict + (var.p/2)))
    
    # Mean of lambda square, needed for the following
    lambda_sq <- mean(exp(2 * (predict + var.p/2)))
    
    # Observed variance
    var_obs <- lambda_sq * exp(var.p) - 
               lambda^2 + 
               lambda + 
               (mean(exp(2 * (predict + var.p)))/theta)
    
    # Component variance
    var_comp_obs <- exp(var.comp + var.p) * (mean(exp(predict))^2) - (lambda^2)
    data.frame(mean.obs     = lambda,
               var.obs      = var_obs,
               var.comp.obs = var_comp_obs,
               icc.obs      = var_comp_obs/var_obs)
}

##  ---------------------- General functions ------------------------------ ##

QGicc <- function(mu = NULL,
                  var.comp,
                  var.p,
                  model = "",
                  width = 10,
                  predict = NULL,
                  closed.form = TRUE,
                  custom.model = NULL,
                  n.obs = NULL,
                  theta = NULL,
                  verbose = TRUE)
{
    # Error if ordinal is used (multivariate code not available yet)
    if ("ordinal" %in% model) {
        stop("ICC computations are not able to address ordinal traits (yet?).")
    }
    if (length(mu) > 1 | length(var) >1) {
        stop("The parameters mu and var must be of length 1,
             please check your input.")
    }
    
    if (is.null(predict)) {
        if(is.null(mu)) {
            stop("Please provide either mu or predict.")
        } else {
            predict <- mu
        }
    }
    
    # Using analytical solutions if possible (and asked for, see closed.form arg)
    if (model == "Gaussian" & closed.form) {
        if (verbose) {
            print("Using the closed forms for a Gaussian model
                  with identity link (e.g. LMM).")
        }
        
        qg.Gaussian.icc(mu      = mu,
                        var.comp = var.comp,
                        var.p   = var.p,
                        predict = predict)
        
    } else if (model == "binom1.probit" & closed.form) {
        if (verbose) {
            print("Using a semi-closed form for a binom1.probit model.")
        }
        warning("Some component (var.comp.obs) are not totally
                from a closed form solution: an integral is computed")
        
        qg.binom1.probit.icc(mu         = mu,
                             var.comp   = var.comp,
                             var.p      = var.p,
                             predict    = predict,
                             width      = width)
        
    } else if (model == "binomN.probit" & closed.form) {
        # Binomial - not - binary model
        if (is.null(n.obs)) {
            stop("binomN.probit model used, 
                 but no observation number (n.obs) defined.")
        }
        
        if (verbose) {
            print("Using a semi-closed form for a BinomialN-probit model.")
        }
        
        warning("Some component (var.obs and var.comp.obs) are not totally from
                a closed form solution: an integral is computed")
        
        qg.binomN.probit.icc(mu         = mu,
                             var.comp   = var.comp,
                             var.p      = var.p,
                             predict    = predict,
                             n.obs      = n.obs,
                             width      = width)
        
    } else if (model == "Poisson.log" & closed.form) {
        if (verbose) {
            print("Using the closed forms for a Poisson-log model.")
        }
        
        qg.Poisson.log.icc(mu       = mu,
                           var.comp = var.comp,
                           var.p    = var.p,
                           predict  = predict)
        
    } else if (model == "negbin.log" & closed.form){
        # NegBin - log model
        if (is.null(theta)) {
            stop("negbin model used, but theta not defined.")
        }
        if (verbose) {
            print("Using the closed forms for a NegativeBinomial-log model.")
        }
        qg.negbin.log.icc(mu        = mu,
                          var.comp  = var.comp,
                          var.p     = var.p,
                          predict   = predict,
                          theta     = theta)
        
    } else {
        
        # Use a custom model if defined, otherwise look into the "Dictionary"
        if (is.null(custom.model)) {
            if (model == "") {
                stop("The function requires either model or custom.model.")
            } else {
                funcs <- QGlink.funcs(model, n.obs = n.obs, theta = theta)
            }
        } else {
            funcs <- custom.model
        }
        
        # Observed mean computation
        if (verbose) {
            print("Computing observed mean...")
        }
        z_bar <- QGmean(mu          = mu,
                        var         = var.p,
                        link.inv    = funcs$inv.link,
                        width       = width,
                        predict     = predict)
        
        # Phenotypic variances computation
        if (verbose) {
            print("Computing phenotypic variance...")
        }
        var_exp <- QGvar.exp(mu         = mu,
                             var        = var.p,
                             link.inv   = funcs$inv.link,
                             obs.mean   = z_bar,
                             width      = width,
                             predict    = predict)
        var_dist <- QGvar.dist(mu       = mu,
                               var      = var.p,
                               var.func = funcs$var.func,
                               width    = width,
                               predict  = predict)
        var_obs <- var_exp + var_dist
        
        # Computing conditional variance (i.e. without var.comp)
        var.cond <- var.p - var.comp
        
        # Function giving the conditional expectancy
        cond_exp <- function(t) {
            sapply(t, function(t_i) {
                mean(sapply(predict, function(pred_i) {
                    integrate(f = function(u) {
                                    funcs$inv.link(u) * 
                                        dnorm(u, pred_i + t_i, sqrt(var.cond))
                                  },
                              lower = pred_i + t_i - width * sqrt(var.cond),
                              upper = pred_i + t_i + width * sqrt(var.cond)
                    )$value
                }))
            })
        }
        if (var.cond == 0) {
            cond_exp <- function(t) {
                sapply(t, function(t_i) {
                    mean(funcs$inv.link(predict + t_i))
                })
            }
        }
        
        # Computing variance of component on the observed data scale
        # Note: Using Koenig's formula
        if (verbose) {
            print("Computing component variance...")
        }
        var_comp_obs <- integrate(f = function(x) {
                                        ((cond_exp(x) - z_bar)^2) * 
                                            dnorm(x, 0,sqrt(var.comp))
                                      },
                                  lower = -width * sqrt(var.comp),
                                  upper = width * sqrt(var.comp)
                        )$value
        
        # Return a data.frame with the calculated components
        data.frame(mean.obs     = z_bar,
                   var.obs      = var_obs,
                   var.comp.obs = var_comp_obs,
                   icc.obs      = var_comp_obs/var_obs)
    }
}


QGmvicc <- function(mu = NULL,
                    vcv.comp,
                    vcv.P,
                    models,
                    predict = NULL,
                    rel.acc = 0.001,
                    width = 10, 
                    n.obs = NULL,
                    theta = NULL,
                    verbose = TRUE,
                    mask = NULL) 
{
    # Error if ordinal is used (multivariate code not available yet)
    if ("ordinal" %in% models) {
        stop("Multivariate functions of QGglmm are 
             not able to address ordinal traits (yet?).")
    }
    
    # Setting the integral width according to vcov 
    #(lower mean - w, upper mean + w)
    w1 <- sqrt(diag(vcv.P - vcv.comp)) * width
    w2 <- sqrt(diag(vcv.comp)) * width
    
    # Number of dimensions
    d <- length(w1)
    
    # If no fixed effects were included in the model
    if(is.null(predict)) {
        if(is.null(mu)) {
            stop("Please provide either mu or predict.")
        } else {
            predict <- matrix(mu, nrow = 1)
        }
    }
    
    # Defining the link/distribution functions
    # If a vector of names were given
    if (!(is.list(models))) {
        if (!is.character(models)) {
            stop("models should be either a list or a vector of characters")
        }
        
        # Need to account for n.obs
        binN <- grepl("binomN", models)
        if (any(binN) & length(n.obs) != sum(binN)) {
            stop("Please provide a number of n.obs equal to the number binomN models.")
        }
        list.n.obs <- rep(list(NULL), length(models))
        list.n.obs[binN] <- n.obs
        
        # Need to account for theta
        negbin <- grepl("negbin", models)
        if (any(negbin) & length(theta) != sum(negbin)) {
            stop("Please provide a number of theta equal to the number binomN models.")
        }
        list.theta <- rep(list(NULL), length(models))
        list.theta[negbin] <- theta
        
        models <- mapply(function(name, n.obs, theta) {
            QGlink.funcs(name = name, n.obs = n.obs, theta = theta)
        },
                         name       = models,
                         n.obs      = list.n.obs,
                         theta      = list.theta,
                         SIMPLIFY   = FALSE)
    }
    
    # Now we can compute the needed functions
    inv.links <- function(mat) {
        res <- mat
        for (i in 1:d) {
            res[i, ] <- models[[i]]$inv.link(mat[i, ])
        }
        res
    }
    var.funcs <- function(mat) {
        res <- mat
        for (i in 1:d) {
            res[i, ] <- models[[i]]$var.func(mat[i, ])
        }
        res
    }
    d.inv.links <- function(mat) {
        res <- mat
        for (i in 1:d) {
            res[i, ] <- models[[i]]$d.inv.link(mat[i, ])
        }
        res
    }
    
    # Computing the observed mean
    if (verbose) {
        print("Computing observed mean...")
    }
    
    z_bar <- QGmvmean(mu        = mu,
                      vcov      = vcv.P,
                      link.inv  = inv.links,
                      predict   = predict,
                      rel.acc   = rel.acc,
                      width     = width,
                      mask      = mask)
    
    # Computing the variance-covariance matrix
    if (verbose) {
        print("Computing phenotypic variance-covariance matrix...")
    }
    vcv.P.obs <- QGvcov(mu          = mu,
                        vcov        = vcv.P,
                        link.inv    = inv.links,
                        var.func    = var.funcs,
                        mvmean.obs  = z_bar, 
                        predict     = predict,
                        rel.acc     = rel.acc,
                        width       = width,
                        exp.scale   = FALSE,
                        mask        = mask)
    
    # Computing the logdet of vcv.P - vcv.comp
    logdet.cond <- calc_logdet(vcv.P - vcv.comp)
    
    # Function giving the conditional expectancy
    cond_exp <- function(t) {
        apply(t, 2, function(col) {
            apply(
                apply(predict, 1,
                      function(pred_i) {
                          cubature::hcubature(
                              f  = function(x) {
                                  inv.links(x) *
                                      matrix(rep(vec_mvnorm(x, 
                                                            pred_i + col, 
                                                            vcv.P - vcv.comp, 
                                                            logdet.cond), 
                                                 d),
                                             nrow = d,
                                             byrow = TRUE)
                              },
                              lowerLimit = pred_i + col - w1,
                              upperLimit = pred_i + col + w1,
                              fDim       = d,
                              tol        = rel.acc,
                              absError   = 0.0001,
                              vectorInterface = TRUE
                          )$integral
                      }
                      ), 1, mean)
        })
    }
    
    if (verbose) {
        print("Computing component variance-covariance matrix...")
    }
    
    # Computing the logdet of vcv.comp
    logdet.comp <- calc_logdet(vcv.comp)
    
    # Computing the upper - triangle matrix of "expectancy of the square"
    v <- cubature::hcubature(
        f  = function(x) {
            vec_sq_uptri(cond_exp(x)) *
            matrix(rep(vec_mvnorm(x, rep(0, d), vcv.comp, logdet.comp), (d^2 + d)/2),
                   nrow = (d^2 + d)/2,
                   byrow = TRUE)
        },
        lowerLimit = -w2,
        upperLimit = w2,
        fDim       = (d^2 + d)/2,
        tol        = rel.acc,
        absError   = 0.0001,
        vectorInterface = TRUE
    )$integral
    
    # Applying the mask if provided
    if(!is.null(mask)) {
        mask2 <- matrix(FALSE, ncol = ncol(v), nrow = nrow(v))
        mask2[((1:d) * ((1:d) + 1)) / 2, ] <- t(mask)
        v[mask2] <- NA
    }
    
    # Creating the VCV matrix
    vcv <- matrix(NA, d, d)
    vcv[upper.tri(vcv, diag = TRUE)] <- v
    vcv[lower.tri(vcv)] <- vcv[upper.tri(vcv)]
    
    # Computing the VCV matrix using Keonig's formuka
    vcv <- vcv - z_bar %*% t(z_bar)
      
    # Return a list of QG parameters on the observed scale
    list(mean.obs       = z_bar,
         vcv.P.obs      = vcv.P.obs,
         vcv.comp.obs   = vcv)
} 
devillemereuil/QGglmm documentation built on Feb. 3, 2023, 6:49 a.m.