inst/doc/full_data_workflow.R

## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)
options(rmarkdown.html_vignette.check_title = FALSE)

# if local
# source("../data-raw/pretty_regression_tables.R")

## ----discord-setup, message = FALSE-------------------------------------------
# For easy data manipulation
library(dplyr)
library(tidyr)

# For kinship linkages
library(NlsyLinks)
# For discordant-kinship regression
library(discord)
# To clean data frames
library(janitor)
library(broom)
# For pipe
library(magrittr)
# For pedigree data manipulation
library(BGmisc)
# For pedigree plotting
library(ggpedigree)
library(ggplot2)

## -----------------------------------------------------------------------------
df_wide <- data.frame(
  pid = 1:5,
  id_s1 = c(101, 201, 301, 401, 501),
  id_s2 = c(102, 202, 302, 402, 502),
  age_s1 = c(30, 27, 40, 36, 30),
  age_s2 = c(28, 25, 38, 35, 28),
  height_s1 = c(175, 160, 180, 170, 165),
  height_s2 = c(170, 162, 178, 172, 168),
  weight_s1 = c(70, 60, 80, 75, 65),
  weight_s2 = c(68, 62, 78, 74, 66)
)

df_wide %>%
  slice(1:5) %>%
  knitr::kable()

## -----------------------------------------------------------------------------
df_long <- df_wide %>%
  tidyr::pivot_longer(
    cols = -pid, # keep the dyad identifier intact
    names_to = c(".value", "sibling"), # split base names and the sibling marker
    names_sep = "_s" # original suffix delimiter in column names
  )

df_long %>%
  slice(1:10) %>%
  knitr::kable()

## -----------------------------------------------------------------------------
df_long2wide <- df_long %>%
  tidyr::pivot_wider(
    names_from = sibling, # the column that indicates the sibling number
    values_from = c(id, age, height, weight), # variables to spread into paired columns
    names_sep = "_s" # ensures id_s1, id_s2,etc
  )

df_long2wide %>%
  slice(1:5) %>%
  knitr::kable()

## -----------------------------------------------------------------------------
library(NlsyLinks)
data(Links79PairExpanded)

Links79PairExpanded %>%
  arrange(ExtendedID) %>%
  filter(RelationshipPath == "Gen1Housemates" & RFull == 0.5) %>%
  select(
    ExtendedID,
    SubjectTag_S1, SubjectTag_S2,
    RelationshipPath, RFull, IsMz,
    EverSharedHouse
  ) %>% # used to make the table
  slice_head(n = 5) %>%
  knitr::kable()

## ----echo=TRUE, fig.height=3, fig.width=5, message=FALSE, warning=FALSE-------
data(potter)
ggpedigree(potter, config = list(
  label_method = "geom_text",
  label_nudge_y = .25,
  focal_fill_personID = 7,
  focal_fill_include = TRUE,
  focal_fill_force_zero = TRUE,
  focal_fill_na_value = "grey50",
  focal_fill_low_color = "darkred",
  focal_fill_high_color = "gold",
  focal_fill_mid_color = "orange",
  focal_fill_scale_midpoint = .65,
  focal_fill_component = "additive",
  focal_fill_method = "steps", #
  # focal_fill_method = "viridis_c",
  focal_fill_use_log = FALSE,
  focal_fill_n_breaks = 10,
  sex_color_include = F,
  focal_fill_legend_title = "Genetic Relatives \nof Harry Potter"
)) +
  labs(title = "Potter Pedigree Plot") +
  theme(legend.position = "right")

## -----------------------------------------------------------------------------
data(potter)
df_ped <- potter %>%
  as.data.frame() %>%
  select(
    personID, sex, famID, momID, dadID, spouseID,
    twinID, zygosity
  ) %>%
  mutate(x_var = round(rnorm(nrow(.), mean = 0, sd = 1), digits = 2))

df_ped %>%
  slice(1:5) %>%
  knitr::kable(digits = 2)

## -----------------------------------------------------------------------------
add <- ped2add(df_ped)
cn <- ped2cn(df_ped)

## -----------------------------------------------------------------------------
df_links <- com2links(
  writetodisk = FALSE,
  ad_ped_matrix = add,
  cn_ped_matrix = cn,
  drop_upper_triangular = TRUE
) %>%
  filter(ID1 != ID2)

df_links %>%
  slice(1:5) %>%
  knitr::kable(digits = 3)

## -----------------------------------------------------------------------------
df_links %>%
  group_by(addRel, cnuRel) %>%
  tally() %>%
  knitr::kable()

## -----------------------------------------------------------------------------
df_cousin <- df_links %>%
  filter(addRel == .125) %>% # only cousins %>%
  filter(cnuRel == 0) # only kin raised in separate homes

## -----------------------------------------------------------------------------
set.seed(2024)

df_synthetic <- discord::kinsim(
  mu_all = c(1, 1), # means
  cov_a = .5,
  cov_c = .1, #
  cov_e = .3,
  c_vector = rep(df_cousin$cnuRel, 3),
  r_vector = rep(df_cousin$addRel, 3)
)

## -----------------------------------------------------------------------------
df_synthetic <- df_synthetic %>%
  select(
    pid = id,
    r,
    weight_s1 = y1_1,
    weight_s2 = y1_2,
    height_s1 = y2_1,
    height_s2 = y2_2
  ) %>%
  mutate( # simulates age such that the 2nd sibling is between 1 and 5 years younger.
    age_s1 = round(rnorm(nrow(.), mean = 30, sd = 5), digits = 0),
    age_s2 = age_s1 - sample(1:5, nrow(.), replace = TRUE)
  )

df_synthetic %>%
  slice(1:5) %>%
  knitr::kable(digits = 2)

## -----------------------------------------------------------------------------
# CHOOSE ONE based on your path
# source_wide <- df_wide
# source_wide <- df_long2wide
source_wide <- df_synthetic # if you followed the pedigree path

## -----------------------------------------------------------------------------
df_discord_weight <- discord::discord_data(
  data = source_wide,
  outcome = "weight",
  predictors = c("height", "age"),
  demographics = "none",
  pair_identifiers = c("_s1", "_s2"),
  id = "pid" # or "famID"
)

df_discord_weight %>%
  slice(1:5) %>%
  knitr::kable(digits = 2, caption = "Transformed data ready for discordant-kinship regression")

## ----examine-transformation---------------------------------------------------
# Show original data for first 3 pairs
source_wide %>%
  slice(1:3) %>%
  select(pid, weight_s1, weight_s2, height_s1, height_s2) %>%
  knitr::kable(
    digits = 2,
    caption = "Original data: siblings not yet ordered by outcome"
  )

df_discord_weight %>%
  select(
    id,
    weight_1, weight_2, weight_mean, weight_diff,
    height_1, height_2, height_mean, height_diff
  ) %>%
  slice(1:3) %>%
  knitr::kable(
    digits = 2,
    caption = "After discord_data(): siblings ordered so weight_1 >= weight_2"
  )

## ----select-for-ols-----------------------------------------------------------
df_for_ols <- df_synthetic %>%
  dplyr::select(
    id = pid,
    weight = weight_s1,
    height = height_s1,
    age = age_s1
  )


df_discord_age <- discord::discord_data(
  data = source_wide,
  outcome = "age",
  predictors = c("height", "weight"),
  demographics = "none",
  pair_identifiers = c("_s1", "_s2"),
  id = "pid" # or "famID" if you followed the pedigree path
)

## ----ols-sample-output--------------------------------------------------------
df_discord_age %>%
  select(
    id, age_1, age_2,
    age_mean, age_diff, weight_1, height_1
  ) %>%
  slice(1:5) %>%
  knitr::kable(caption = "One sibling per pair for OLS regression, selecting by age", digits = 2)

df_for_ols %>%
  select(id, age, weight, height) %>%
  slice(1:5) %>%
  knitr::kable(caption = "One sibling per pair for OLS regression using original wide data", digits = 2)

## ----ols-regression-----------------------------------------------------------
ols_model <- lm(weight ~ height + age, data = df_for_ols)

## ----ols-output, message=FALSE, warning=FALSE, eval = knitr::is_html_output(),results='asis'----
stargazer::stargazer(ols_model,
  type = "html", ci = TRUE,
  digits = 3, single.row = TRUE, title = "Standard OLS Regression Results"
)

## ----ols-output-latex, message=FALSE, warning=FALSE,  eval = knitr::is_latex_output(), error=FALSE----
# stargazer::stargazer(ols_model,
#   type = "latex", ci = TRUE,
#   digits = 3, single.row = TRUE, title = "Standard OLS Regression Results"
# )

## ----between-family-----------------------------------------------------------
between_model <- lm(
  weight_mean ~ height_mean + age_mean,
  data = df_discord_weight
)

## ----between-output, results='asis', message=FALSE, warning=FALSE, eval = knitr::is_html_output()----
stargazer::stargazer(between_model,
  type = "html", ci = TRUE,
  digits = 3, single.row = TRUE, title = "Between-Family Regression Results"
)

## ----between-output-latex, echo = FALSE, eval = knitr::is_latex_output(), error=FALSE----
# stargazer::stargazer(between_model,
#   type = "latex", ci = TRUE,
#   digits = 3, single.row = TRUE, title = "Between-Family Regression Results"
# )

## ----discord-manual-----------------------------------------------------------
discord_model_manual <- lm(
  weight_diff ~ weight_mean + height_mean + height_diff + age_mean + age_diff,
  data = df_discord_weight
)

tidy(discord_model_manual, conf.int = TRUE) %>%
  knitr::kable(digits = 3, caption = "Discordant Regression (Manual)")

## ----discord-manual-output, message=FALSE, warning=FALSE, eval = knitr::is_html_output(),results='asis'----
stargazer::stargazer(between_model, discord_model_manual,
  type = "html", ci = TRUE,
  digits = 3, single.row = TRUE, title = "Between-Family and Discordant Regression Results"
)

## ----discord-manual-output-latex, echo = FALSE, eval = knitr::is_latex_output(), error=FALSE----
# stargazer::stargazer(between_model, discord_model_manual,
#   type = "latex", ci = TRUE,
#   digits = 3, single.row = TRUE, title = "Between-Family and Discordant Regression Results"
# )

## ----discord-function---------------------------------------------------------
discord_model <- discord_regression(
  data = source_wide,
  outcome = "weight",
  predictors = c("height", "age"),
  demographics = "none",
  sex = NULL,
  race = NULL,
  pair_identifiers = c("_s1", "_s2"),
  id = "pid"
)

tidy(discord_model, conf.int = TRUE) %>%
  knitr::kable(digits = 3, caption = "Discordant Regression Results")

glance(discord_model) %>%
  select(r.squared, adj.r.squared, sigma, p.value, nobs) %>%
  knitr::kable(digits = 3)

## ----discord-compare, message=FALSE, warning=FALSE, eval = knitr::is_html_output(),results='asis'----
stargazer::stargazer(discord_model,
  discord_model_manual,
  type = "html", ci = TRUE,
  digits = 3, single.row = TRUE, title = "Discordant Regression Results Comparison"
)

## ----discord-compare-latex, echo = FALSE, eval = knitr::is_latex_output(), error=FALSE----
# stargazer::stargazer(discord_model,
#   discord_model_manual,
#   type = "latex",
#   digits = 3, single.row = TRUE, title = "Discordant Regression Results Comparison"
# )

## ----all-models-comparison, message=FALSE, warning=FALSE, eval = knitr::is_html_output(),results='asis'----
stargazer::stargazer(
  ols_model,
  between_model,
  discord_model_manual,
  type = "html", ci = TRUE,
  digits = 3,
  single.row = TRUE,
  title = "Comparison of OLS, Between-Family, and Discordant Regression Models",
  column.labels = c("Standard OLS", "Between-Family", "Discordant"),
  model.names = FALSE
)

## ----all-models-comparison-latex, echo = FALSE, eval = knitr::is_latex_output(), error=FALSE----
# stargazer::stargazer(
#   ols_model,
#   between_model,
#   discord_model_manual,
#   type = "latex", ci = TRUE,
#   digits = 3,
#   single.row = TRUE,
#   title = "Comparison of OLS, Between-Family, and Discordant Regression Models",
#   column.labels = c("Standard OLS", "Between-Family", "Discordant"),
#   model.names = FALSE
# )

## -----------------------------------------------------------------------------
sessioninfo::session_info()

Try the discord package in your browser

Any scripts or data that you put into this service are public.

discord documentation built on Jan. 19, 2026, 1:07 a.m.