#' @title Calculate posterior mean genetic liabilities
#'
#' @description Determines the configuration for each individuals based on the
#' family history and assigns a posterior mean genetic liability to each
#' individual.
#'
#' @details
#' The posterior mean genetic liabilities (PMGL) are estimated using Gibbs
#' sampling. For further explanation on the process, as well as parameters used
#' in the algorithm, see \code{vignette("gibbs")}.\cr
#' Calculation of PMGL is based on which configuration an individual belongs
#' to. Configurations are determined by phenotype status in the family. Two
#' different configurations can be equivalent from a theoretical standpoint,
#' and will belong to the same configuration class, denoted by the `conf_class`
#' column. Equivalent configurations are assigned the same PMGL.\cr
#' \emph{Note:} The `sibs` argument can be used for limiting how many siblings
#' are taken into consideration when creating configurations. This allows for
#' different analyses with the same simulation.
#'
#' @param pheno_file path to file with information on liabilities, i.e.
#' the file "phenotypes.txt" generated by simulation functions.
#' @param output_file path of output file, including file extension ".txt".
#' @param alpha significance level used for deciding phenotype status based on
#' liabilities.
#' @param sibs \emph{optional:} number of siblings in the families to be used
#' for calculating posterior mean genetic liabilities. Defaults to using all
#' of an individual's siblings.
#'
#' @return Does not return anything, but writes an \code{output_file} to disk,
#' where configuration, configuration class and posterior mean genetic
#' liability is appended to the end of \code{pheno_file}.\cr
#' For a more detailed guide of how to read the configuration and configuration
#' class strings, see \code{vignette("liability-distribution")}.
#'
#' @import utils
#'
#' @export
assign_ltfh_phenotype <- function(pheno_file,
output_file = pheno_file,
alpha,
sibs) {
stopifnot("pheno_file needs to be a valid file" = file.exists(pheno_file),
"output_file needs to be a valid file path ending with '.txt'" =
(tools::file_ext(output_file) == "txt"),
"alpha needs to be a number between 0 and 1" =
(is.numeric(alpha) && length(alpha) == 1
&& 0 < alpha && alpha < 1),
"sibs needs to be a non-negative integer" =
(missing(sibs) || (is.numeric(sibs) && length(sibs) == 1
&& sibs == round(sibs) && 0 <= sibs)),
"sibs needs to be at most the number of siblings in pheno_file" =
(missing(sibs) || sibs <= n_sibs(load_phenotypes(pheno_file))))
# import phenotypes.
pheno <- load_phenotypes(pheno_file)
# if sibs isn't specified we just assign the max number of siblings in data
if (missing(sibs)) {
sibs <- n_sibs(pheno)
}
crit <- qnorm(1 - alpha)
# helper function for determining phenotype status based on liabilities
onetwo <- function(x) {
dplyr::if_else(x != -9, dplyr::if_else(x >= crit, "2", "1"), "")
}
# helper function for finding equivalent configurations
create_conf_class <- function(LTFH_key) {
key_split <- strsplit(LTFH_key, "")
child <- key_split[[1]][1]
parent_sum <- sum(as.numeric(key_split[[1]][2:3]))
n_sibs <- nchar(LTFH_key) - 3
sibs_sum <- sum(as.numeric(tail(key_split[[1]], -3)))
conf_class <- paste(child, parent_sum, n_sibs, sibs_sum, sep = "x")
return(conf_class)
}
# Liability for child, par1 and par2
liability_columns <- c(5, 8, 11)
# Liability for siblings
if (sibs > 0) {
liability_columns <- c(liability_columns, 11 + (1:sibs * 3))
}
# Create configurations (LTFH_key)
pheno <- dplyr::bind_cols(pheno,
tidyr::unite(data = tibble::as_tibble(
apply(pheno[, liability_columns], 2, onetwo)),
col = LTFH_key, sep = "", remove = F)[, 1])
# Table of configurations and which configuration class they belong to
table_1 <- dplyr::distinct(pheno, LTFH_key) %>%
dplyr::rowwise() %>%
dplyr::mutate(conf_class = create_conf_class(LTFH_key))
# Table of configuration classes and their posterior mean genetic liability
table_2 <- dplyr::distinct(table_1, conf_class, .keep_all = TRUE) %>%
dplyr::mutate(LTFH_pheno = generate_pmgl(conf = LTFH_key,
burn_in = 200,
start_value = 3,
alpha = alpha)) %>%
dplyr::select(conf_class, LTFH_pheno)
# Joining table_1 and table_2 by conf_class
table_3 <- dplyr::left_join(table_1, table_2, by = "conf_class")
# Joining such that new columns are appended to the tibble imported from
# phenofile
pheno <- dplyr::left_join(pheno, table_3, by = "LTFH_key")
#write to file
data.table::fwrite(x = pheno, file = output_file,
quote = F,
sep = " ",
col.names = T,
append = F)
}
#' @title Create phenotype status by proxy
#'
#' @description Assigns a new phenotype status to individuals by looking at
#' family members, for use in Genome Wide Association Studies by proxy (GWAX).
#'
#' @details
#' An individual, who is registered as a control, will be assigned case, if any
#' of its family members are cases. Otherwise the phenotype status is the
#' same.\cr
#' \emph{Note:} The `sibs` argument can be used for limiting how many siblings
#' are taken into consideration when creating the new phenotype This allows for
#' different analyses with the same simulation, see for instance
#' \code{vignette("geneference")}.
#'
#' @param pheno_file path to file with information on phenotype status, i.e.
#' the file "phenotypes.txt" generated by simulation functions.
#' @param output_file path of output file, including file extension ".txt".
#' @param sibs \emph{optional:} specify number of siblings to use for proxy.
#' Defaults to using all of an individual's siblings.
#'
#' @return Does not return anything, but writes \code{output_file} to disk,
#' where the GWAX phenotype column is appended to the end of \code{pheno_file}.
#'
#' @export
assign_GWAX_phenotype <- function(pheno_file,
output_file = pheno_file,
sibs) {
stopifnot("pheno_file needs to be a valid file" = file.exists(pheno_file),
"output_file needs to be a valid file path ending with '.txt'" =
(tools::file_ext(output_file) == "txt"),
"sibs needs to be a non-negative integer" =
(missing(sibs) || (is.numeric(sibs) && length(sibs) == 1
&& sibs == round(sibs) && 0 <= sibs)),
"sibs needs to be at most the number of siblings in pheno_file" =
(missing(sibs) || sibs <= n_sibs(load_phenotypes(pheno_file))))
# Read the file
pheno <- load_phenotypes(pheno_file)
# if sibs isn't specified we just assign the max number of siblings in data
if (missing(sibs)) {
sibs <- n_sibs(pheno)
}
# Columns with phenotypes for child, parent 1 and parent 2
pheno_cols <- seq(3, 10, 3)
# Columns with phenotypes for siblings
if (sibs > 0) {
pheno_cols <- c(pheno_cols, 9 + (1:sibs * 3))
}
# Set GWAX phenotype to maximum of the phenotypes in the family
pheno[, "GWAX_pheno"] <- apply(pheno[, pheno_cols], 1, max, na.rm = TRUE)
# Write to file
data.table::fwrite(x = pheno, file = output_file,
quote = F,
sep = " ",
col.names = T,
append = F)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.