inst/doc/LTFGRS_workflow_prediction.R

## ----include=FALSE, results = 'hide'------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)

## ----setup, results = 'hide',warning = FALSE, message = FALSE-----------------
library(LTFGRS)
library(dplyr)
library(lubridate)
library(rmarkdown)

## -----------------------------------------------------------------------------
# population parameters and seed
set.seed(555)
h2 = .5 # heritability
K = .3 # population prevalence

## -----------------------------------------------------------------------------
# assuming we have been provided a CIP object of the following style:
CIP = expand.grid(list(age = 1:100,
                       birth_year = 1900:2024,
                       sex = 0:1)) %>%
  group_by(sex, birth_year) %>%
  mutate(cip = (1:n() - 1)/n() * K) %>%
  ungroup() %>% 
  print(n = 10)

## -----------------------------------------------------------------------------
# hand curated trio information, taken from LTFHPlus vignette:
# https://emilmip.github.io/LTFHPlus/articles/FromTrioToFamilies.html
family = tribble(
  ~id, ~momcol, ~dadcol,
  "pid", "mom", "dad",
  "sib", "mom", "dad",
  "mhs", "mom", "dad2",
  "phs", "mom2", "dad",
  "mom", "mgm", "mgf",
  "dad", "pgm", "pgf",
  "dad2", "pgm2", "pgf2",
  "paunt", "pgm", "pgf",
  "pacousin", "paunt", "pauntH",
  "hspaunt", "pgm", "newpgf",
  "hspacousin", "hspaunt", "hspauntH",
  "puncle", "pgm", "pgf",
  "pucousin", "puncleW", "puncle",
  "maunt", "mgm", "mgf",
  "macousin", "maunt", "mauntH",
  "hsmuncle", "newmgm", "mgf",
  "hsmucousin", "hsmuncleW", "hsmuncle"
)

## -----------------------------------------------------------------------------
# creating a graph for the family
graph = prepare_graph(.tbl = family, icol = "id", mcol = "momcol", fcol = "dadcol")
# calculating the kinship matrix based on the graph
cov_mat = get_covmat(fam_graph = graph, h2 = h2, index_id = "pid")
# creating a phenotype for the family
liabs = MASS::mvrnorm(n = 1, mu = rep(0, nrow(cov_mat)), Sigma = cov_mat)

## -----------------------------------------------------------------------------
# these values are simulated only for illustrative purposes and not to make sense(!)
pheno = tibble(
  id = names(liabs),
  status = liabs > qnorm(K, lower.tail = F),
  # no consideration for generation etc in sex, fdato or birth_year:
  sex = sample(0:1, size = length(liabs), replace = TRUE),
  fdato = dmy(paste0(sample(1:28, length(liabs), replace = TRUE), "/", sample(1:12, length(liabs), replace = T), "/", sample(1940:2000, length(liabs), replace = TRUE))),
  birth_year = year(fdato),
  # age of onset only after fdato
  adhd = purrr::map2_chr(.x = status, .y = birth_year,
      ~ if(.x) paste0(sample(1:28, 1), "/", sample(1:12, 1), "/", sample((.y + 1):2010, 1)) else NA),
  # end of follow up assigned here
  indiv_eof = dmy("31/12/2010")) %>% # blanket time stop, meant to simulate end of registers
  mutate(
    # converting to date format
    adhd = dmy(adhd),
    # eof either blanket time stop or event date
    indiv_eof = pmin(indiv_eof, adhd, na.rm = TRUE)) %>% 
  filter(id != "pid_g") %>% # remove the genetic liability of the proband
  # status is not required here. Only used it to generate the age of diagnosis.
  select(-status) 
paged_table(pheno)

## -----------------------------------------------------------------------------
graph = prepare_graph(.tbl = family, 
                      icol = "id",
                      mcol = "momcol", 
                      fcol = "dadcol",
                      node_attributes = select(pheno, id, sex))

## -----------------------------------------------------------------------------
# Identify family members of degree n
family_graphs = get_family_graphs(pop_graph = graph,
                                  ndegree = 1,
                                  proband_vec = pheno$id,
                                  fid = "fid",
                                  fam_graph_col = "fam_graph")
family_graphs %>% print(n = 4)

## -----------------------------------------------------------------------------
family_graphs$fam_graph[[1]]

## -----------------------------------------------------------------------------
# calculate family specific thresholds
info = familywise_censoring(
  family_graphs = family_graphs,
  tbl = pheno,
  start = "fdato",
  end = "indiv_eof",
  event = "adhd",
  merge_by = setNames(c("id"), c("pid")))

paged_table(info)

## ----results = 'hide'---------------------------------------------------------
fam_thrs = prepare_thresholds(
  .tbl = info,
  CIP = CIP,
  age_col = "age" ,
  lower_equal_upper = FALSE,
  personal_thr = TRUE, 
  fid_col = "fid",
  personal_id_col = "pid",
  interpolation = "xgboost"
)

## -----------------------------------------------------------------------------
paged_table(fam_thrs)

## -----------------------------------------------------------------------------
# attach family specific thresholds
ltfgrs_input = familywise_attach_attributes(family_graphs = family_graphs,
                                            fam_attr = fam_thrs,
                                            fam_graph_col = "fam_graph",
                                            attached_fam_graph_col = "masked_fam_graph",
                                            cols_to_attach = c("lower", "upper"),
                                            censor_proband_thrs = TRUE)
ltfgrs_input %>% print(n = 4)

## -----------------------------------------------------------------------------
ltfgrs_input$masked_fam_graph[[1]]

## -----------------------------------------------------------------------------
ltfgrs_pa = estimate_liability(family_graphs = ltfgrs_input %>% rename(pid = fid),
                               h2 = h2, 
                               fid = "fid",
                               pid = "pid",
                               family_graphs_col = "masked_fam_graph",
                               method = "PA", # <- METHOD
                               useMixture = F)
paged_table(ltfgrs_pa)

## -----------------------------------------------------------------------------
ltfgrs_gibbs = estimate_liability(family_graphs = ltfgrs_input %>% rename(pid = fid), 
                                  h2 = h2,
                                  fid = "fid",
                                  pid = "pid",
                                  family_graphs_col = "masked_fam_graph",
                                  method = "Gibbs", # <- METHOD
                                  useMixture = F)
paged_table(ltfgrs_gibbs)

Try the LTFGRS package in your browser

Any scripts or data that you put into this service are public.

LTFGRS documentation built on Aug. 8, 2025, 7:17 p.m.