inst/doc/a01_overview.R

## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)

## -----------------------------------------------------------------------------
library(tidypopgen)
example_indiv_meta <- data.frame(
  id = c("a", "b", "c", "d", "e"),
  population = c("pop1", "pop1", "pop2", "pop2", "pop2")
)
example_genotypes <- rbind(
  c(1, 1, 0, 1, 1, 0),
  c(2, 0, 0, 0, NA, 0),
  c(1, 2, 0, 0, 1, 1),
  c(0, 2, 0, 1, 2, 1),
  c(1, 1, NA, 2, 1, 0)
)
example_loci <- data.frame(
  name = c("rs1", "rs2", "rs3", "rs4", "x1", "x2"),
  chromosome = c(1, 1, 1, 1, 2, 2),
  position = c(3, 5, 65, 343, 23, 456),
  genetic_dist = c(0, 0, 0, 0, 0, 0),
  allele_ref = c("A", "T", "C", "G", "C", "T"),
  allele_alt = c("T", "C", NA, "C", "G", "A")
)

## -----------------------------------------------------------------------------
example_gt <- gen_tibble(example_genotypes,
  indiv_meta = example_indiv_meta,
  loci = example_loci,
  backingfile = tempfile()
)

## -----------------------------------------------------------------------------
example_gt

## -----------------------------------------------------------------------------
example_gt %>% show_genotypes()

## -----------------------------------------------------------------------------
example_gt %>% show_loci()

## -----------------------------------------------------------------------------
example_gt %>% indiv_het_obs()

## -----------------------------------------------------------------------------
example_gt %>% mutate(het_obs = indiv_het_obs(.data$genotypes))

## -----------------------------------------------------------------------------
example_gt %>% mutate(het_obs = indiv_het_obs(genotypes))

## -----------------------------------------------------------------------------
example_pop2 <- example_gt %>% filter(population == "pop2")
example_pop2

## -----------------------------------------------------------------------------
example_gt %>% indiv_het_obs()

## -----------------------------------------------------------------------------
example_gt %>%
  filter(population == "pop2") %>%
  indiv_het_obs()

## -----------------------------------------------------------------------------
example_gt %>% mutate(het_obs = indiv_het_obs(genotypes))

## -----------------------------------------------------------------------------
loci_names(example_gt)

## -----------------------------------------------------------------------------
example_sub <- example_gt %>% select_loci(starts_with("rs"))
example_sub

## -----------------------------------------------------------------------------
loci_names(example_sub)

## -----------------------------------------------------------------------------
example_sub %>% indiv_het_obs()

## -----------------------------------------------------------------------------
example_gt %>%
  select_loci(c(2, 6, 1)) %>%
  show_loci()

## -----------------------------------------------------------------------------
example_gt %>%
  select_loci(c(2, 6, 1)) %>%
  show_genotypes()

## -----------------------------------------------------------------------------
example_gt %>% loci_maf()

## -----------------------------------------------------------------------------
sel_indices <- which((example_gt %>% loci_maf()) > 0.2)
example_gt %>%
  select_loci(all_of(sel_indices)) %>%
  show_loci()

## -----------------------------------------------------------------------------
example_gt_sub <- example_gt %>% select_loci_if(loci_maf(genotypes) > 0.2)
example_gt_sub %>% show_genotypes()

## -----------------------------------------------------------------------------
example_gt %>%
  select_loci_if(
    loci_chromosomes(genotypes) == 2 &
      loci_maf(genotypes) > 0.2
  ) %>%
  show_loci()

## -----------------------------------------------------------------------------
example_gt %>%
  group_by(population) %>%
  tally()

## -----------------------------------------------------------------------------
example_gt %>%
  group_by(population) %>%
  summarise(n = n(), mean_het = mean(indiv_het_obs(genotypes)))

## -----------------------------------------------------------------------------
example_gt %>%
  mutate(het_obs = indiv_het_obs(genotypes)) %>%
  group_by(population) %>%
  summarise(n = n(), mean_het = mean(het_obs))

## -----------------------------------------------------------------------------
example_gt %>%
  group_by(population) %>%
  reframe(loci_hwe = loci_hwe(genotypes))

## -----------------------------------------------------------------------------
example_gt %>%
  group_by(population) %>%
  loci_maf()

## -----------------------------------------------------------------------------
example_gt %>%
  group_by(population) %>%
  loci_maf(type = "list")

## -----------------------------------------------------------------------------
example_gt %>%
  group_by(population) %>%
  loci_maf(type = "matrix")

## -----------------------------------------------------------------------------
example_gt %>%
  group_by(population) %>%
  pop_fst()

## -----------------------------------------------------------------------------
example_gt %>%
  pairwise_ibs()

## -----------------------------------------------------------------------------
gt_file_name <- gt_save(example_gt)
gt_file_name

## -----------------------------------------------------------------------------
gt_get_file_names(example_gt)

## -----------------------------------------------------------------------------
new_example_gt <- gt_load(gt_file_name[1])
new_example_gt %>% show_genotypes()

## -----------------------------------------------------------------------------
bed_path_pop_a <- system.file("extdata/pop_a.bed", package = "tidypopgen")
pop_a_gt <- gen_tibble(bed_path_pop_a, backingfile = tempfile("pop_a_"))

## -----------------------------------------------------------------------------
gt_as_plink(example_gt, file = tempfile("new_bed_"))

## -----------------------------------------------------------------------------
bed_path_pop_a <- system.file("extdata/pop_a.bed", package = "tidypopgen")
bigsnp_path_a <-
  bigsnpr::snp_readBed(bed_path_pop_a, backingfile = tempfile("pop_a_"))
pop_a_gt <- gen_tibble(bigsnp_path_a)
bed_path_pop_b <- system.file("extdata/pop_b.bed", package = "tidypopgen")
bigsnp_path_b <-
  bigsnpr::snp_readBed(bed_path_pop_b, backingfile = tempfile("pop_b_"))
pop_b_gt <- gen_tibble(bigsnp_path_b)

## -----------------------------------------------------------------------------
pop_a_gt

## -----------------------------------------------------------------------------
pop_b_gt

## -----------------------------------------------------------------------------
report <- rbind_dry_run(pop_a_gt, pop_b_gt, flip_strand = TRUE)

## -----------------------------------------------------------------------------
# #create merge
merged_gt <- rbind(pop_a_gt, pop_b_gt,
  flip_strand = TRUE,
  backingfile = file.path(tempdir(), "gt_merged")
)

## -----------------------------------------------------------------------------
merged_gt

## -----------------------------------------------------------------------------
merged_gt %>% show_loci()

## -----------------------------------------------------------------------------
bed_file <- system.file("extdata", "example-missing.bed", package = "bigsnpr")
missing_gt <- gen_tibble(bed_file, backingfile = tempfile("missing_"))
missing_gt

## ----fig.alt = "Histogram of loci missingness, showing that most loci have no missing data, while some have a small proportion of missing data"----
missing_gt %>%
  loci_missingness() %>%
  hist()

## ----error = TRUE-------------------------------------------------------------
try({
missing_pca <- missing_gt %>% gt_pca_autoSVD()
})

## -----------------------------------------------------------------------------
missing_gt <- gt_impute_simple(missing_gt, method = "mode")

## -----------------------------------------------------------------------------
gt_has_imputed(missing_gt)

## -----------------------------------------------------------------------------
gt_uses_imputed(missing_gt)

## ----fig.alt = "Histogram of loci missingness as above, showing that the use of imputed data is not automatic"----
missing_gt %>%
  loci_missingness() %>%
  hist()

## ----fig.alt = "Histogram of loci missingness after setting use of imputed data to true, showing that there is no missingness"----
gt_set_imputed(missing_gt, set = TRUE)
missing_gt %>%
  loci_missingness() %>%
  hist()

## ----fig.alt = "Histogram of loci missingness after setting use of imputed data to false again"----
gt_set_imputed(missing_gt, set = FALSE)
missing_gt %>%
  loci_missingness() %>%
  hist()

## -----------------------------------------------------------------------------
missing_pca <- missing_gt %>% gt_pca_partialSVD()
missing_pca

## ----fig.alt = "Histogram of loci missingness, showing that use of imputed data is not automatically set to true, after using a PCA function"----
gt_uses_imputed(missing_gt)
missing_gt %>%
  loci_missingness() %>%
  hist()

Try the tidypopgen package in your browser

Any scripts or data that you put into this service are public.

tidypopgen documentation built on Aug. 28, 2025, 1:08 a.m.