R/calculate_cov.R

Defines functions calculate_cov

Documented in calculate_cov

#' @title Calculate covariance matrix of liabilities
#'
#' @description Compute a sample covariance matrix from \code{"phenotypes.txt"}
#' as generated by simulation functions.
#'
#' @details
#' The outputted matrix follows the order described in
#' \code{vignette("liability-distribution")}.The output can be used to validate
#' that covariances between the liabilities of family members are as
#' expected.\cr
#' The \code{sibs} parameter can be used to adjust how many siblings to
#' calculate the covariance between.
#'
#' @param pheno dataset with phenotypes, as loaded by \code{load_phenotypes()}.
#' @param sibs \emph{optional:} number of siblings to be included in covariance
#' matrix. Defaults to using all of an individual's siblings.
#'
#' @return A covariance matrix using the data from \code{pheno}.
#'
#' @import stats
#'
#' @export
calculate_cov <- function(pheno, sibs) {
  stopifnot("sibs needs to be a non-negative integer" =
            (missing(sibs) ||
               (sibs >= 0 && is.numeric(sibs)
                && length(sibs) == 1 && round(sibs) == sibs)),
            "sibs need to be at most the number of siblings in pheno" =
              (missing(sibs) || sibs <= n_sibs(pheno)))

  # If sibs isn't specified we just assign the max number of siblings in data
  if (missing(sibs)) {
    sibs <- n_sibs(pheno)
  }

  pheno[pheno == -9] <- NA
  indexes <- c(c(4:5, 8), c(seq(11, 11 + sibs * 3, by = 3)))

  # This informs the user about the distribution of the siblings
  message(noquote("Attention"))
  for (i in 0:sibs) {
    message(noquote(
      paste0("number of families with at least ", i, " siblings: ",
             sum(complete.cases(pheno[, c(seq(11, 11 + i * 3, by = 3))])))))
  }
  return(round(cov(pheno[, indexes], use = "pairwise.complete.obs"), 2))
}
FireGutter/geneference documentation built on Dec. 17, 2021, 8:27 p.m.