Nothing
## ----setup, include = FALSE---------------------------------------------------
knitr::opts_chunk$set(
collapse = TRUE,
comment = "#>"
)
## -----------------------------------------------------------------------------
# this package
library(bnpsd)
# for nice colors
library(RColorBrewer)
# for visualizing coancestry matrix with plot_popkin, and other handy functions
library(popkin)
# dimensions of data/model
# number of individuals (NOTE this is 10x less than in publication!)
n_ind <- 100
# number of intermediate subpops
k_subpops <- 10
# define population structure
# subpopulation FST vector, up to a scalar
inbr_subpops <- 1 : k_subpops
# desired bias coefficient
bias_coeff <- 0.5
# desired FST for the admixed individuals
Fst <- 0.1
# admixture proportions from 1D geography
obj <- admix_prop_1d_linear(
n_ind = n_ind,
k_subpops = k_subpops,
bias_coeff = bias_coeff,
coanc_subpops = inbr_subpops,
fst = Fst
)
admix_proportions <- obj$admix_proportions
# rescaled inbreeding vector for intermediate subpopulations
inbr_subpops <- obj$coanc_subpops
# get pop structure parameters of the admixed individuals
coancestry <- coanc_admix(admix_proportions, inbr_subpops)
# verify that we got the desired FST!
fst_admix(admix_proportions, inbr_subpops)
# the mean of the diagonal of the coancestry matrix also equals FST
inbr <- diag(coancestry)
fst(inbr) # `fst` is a function in the popkin package
# verify that we got the desired `bias_coeff` too!
mean(coancestry) / Fst
## ---- fig.width = 4, fig.height = 2, fig.align = 'center'---------------------
# visualize the per-subpopulation inbreeding coefficients (FSTs)
# shrink default margins
par(mar = c(4, 4, 0, 0) + 0.2)
# colors for independent subpopulations
col_subpops <- brewer.pal(k_subpops, "Paired")
barplot(inbr_subpops, col = col_subpops, names.arg = 1 : k_subpops, ylim = c(0, 1),
xlab = 'Subpopulation', ylab = 'Inbreeding coeff.')
## ---- fig.width = 6, fig.height = 1.5, fig.align = 'center'-------------------
# visualize the admixture proportions
# shrink default margins
par(mar = c(1, 4, 0, 0) + 0.2)
barplot(
t(admix_proportions),
col = col_subpops,
border = NA,
space = 0,
ylab = 'Admixture prop.'
)
mtext('Individuals', 1)
## ---- fig.width = 4.2, fig.height = 3, fig.align = 'center'-------------------
# Visualize the coancestry matrix using "popkin"!
plot_popkin(
coancestry,
# zero inner margin (plus padding) because we have no labels
mar = 0,
leg_title = 'Coancestry'
)
## -----------------------------------------------------------------------------
# number of loci in simulation (NOTE this is 30x less than in publication!)
m_loci <- 10000
# draw all random Allele Freqs (AFs) and genotypes
# reuse the previous inbr_subpops, admix_proportions
out <- draw_all_admix(
admix_proportions = admix_proportions,
inbr_subpops = inbr_subpops,
m_loci = m_loci,
# NOTE by default p_subpops and p_ind are not returned, but here we will ask for them
want_p_subpops = TRUE,
# NOTE: asking for `p_ind` increases memory usage substantially,
# so don't ask for it unless you're sure you want it!
want_p_ind = TRUE
)
# genotypes
X <- out$X
# ancestral AFs
p_anc <- out$p_anc
# intermediate independent subpopulation AFs
p_subpops <- out$p_subpops
# individual-specific AFs
p_ind <- out$p_ind
## ---- fig.width = 4, fig.height = 2, fig.align = 'center'---------------------
# inspect distribution of ancestral AFs (~ Uniform(0.01, 0.5))
# shrink default margins for these figures
par(mar = c(4, 4, 0, 0) + 0.2)
hist(p_anc, xlab = 'Ancestral AF', main = '', xlim = c(0, 1))
# distribution of intermediate population AFs
# (all subpopulations combined)
# (will be more dispersed toward 0 and 1 than ancestral AFs)
hist(p_subpops, xlab = 'Intermediate Subpopulation AF', main = '', xlim = c(0, 1))
# distribution of individual-specific AFs (admixed individuals)
# (admixture reduces differentiation, so these resemble ancestral AFs a bit more)
hist(p_ind, xlab = 'Individual-specific AF', main = '', xlim = c(0, 1))
# genotype distribution of admixed individuals
barplot(table(X), xlab = 'Genotypes', ylab = 'Frequency', col = 'white')
## ---- fig.width = 6, fig.height = 2.8, fig.align = 'center'-------------------
# for best estimates, group individuals into subpopulations using the geography
# this averages more individuals in estimating the minimum kinship
subpops <- ceiling( ( 1 : n_ind ) / n_ind * k_subpops )
table(subpops) # got k_subpops = 10 with 100 individuals each
# now estimate kinship using popkin
kinship_estimate <- popkin(X, subpops)
# replace diagonal with inbreeding coeffs. to match coancestry matrix
coancestry_estimate <- inbr_diag(kinship_estimate)
# Visualize the coancestry matrix using "popkin"!
plot_popkin(
list( coancestry, coancestry_estimate ),
titles = c('Truth', 'Estimate'),
# second value is top margin, for panel titles
mar = c(0, 2.5),
leg_title = 'Coancestry'
)
## -----------------------------------------------------------------------------
coancestry_subpops_estimate <- popkin_af( p_subpops )
coancestry_ind_estimate <- popkin_af( p_ind )
## ---- fig.width = 6, fig.height = 2.8, fig.align = 'center'-------------------
plot_popkin(
list( diag( inbr_subpops ), coancestry_subpops_estimate ),
titles = c('Truth', 'Estimate'),
mar = c(0, 2.5),
leg_title = 'Coancestry'
)
## ---- fig.width = 6, fig.height = 2.8, fig.align = 'center'-------------------
plot_popkin(
list( coancestry, coancestry_ind_estimate ),
titles = c('Truth', 'Estimate'),
mar = c(0, 2.5),
leg_title = 'Coancestry'
)
## -----------------------------------------------------------------------------
# reuse the previous m_loci, inbr_subpops, admix_proportions
# draw ancestral AFs
p_anc <- draw_p_anc(m_loci)
# draw intermediate independent subpopulations AFs
p_subpops <- draw_p_subpops(p_anc, inbr_subpops)
# draw individual-specific AFs
p_ind <- make_p_ind_admix(p_subpops, admix_proportions)
# draw genotypes
X <- draw_genotypes_admix(p_ind)
## ---- fig.width = 4, fig.height = 2, fig.align = 'center'---------------------
# this increases the proportion of rare alleles
p_anc_beta <- draw_p_anc(m_loci, beta = 0.1)
# shrink default margins for these figures
par(mar = c(4, 4, 0, 0) + 0.2)
hist(p_anc_beta, xlab = 'Ancestral AF', main = '', xlim = c(0, 1))
## ---- fig.width = 4, fig.height = 2, fig.align = 'center'---------------------
# this increases the proportion of rare alleles
p_anc_beta <- rbeta(m_loci, shape1 = 0.1, shape2 = 1)
# shrink default margins for these figures
par(mar = c(4, 4, 0, 0) + 0.2)
hist(p_anc_beta, xlab = 'Ancestral AF', main = '', xlim = c(0, 1))
## -----------------------------------------------------------------------------
# draw genotypes for one individual from the ancestral population
# use "cbind" to turn the vector p_anc into a column matrix
# ("draw_genotypes_admix" expects a matrix)
X_anc <- draw_genotypes_admix( cbind( p_anc ) )
# returns a column matrix:
dim(X_anc)
# draw genotypes from intermediate populations
# draws one individual per intermediate population
X_subpops <- draw_genotypes_admix(p_subpops)
## -----------------------------------------------------------------------------
# data dimensions
# number of individuals
n_ind <- 100
# number of intermediate subpops
k_subpops <- 10
# define population structure
# subpopulation FST vector, up to a scalar
inbr_subpops <- 1 : k_subpops
# desired bias coefficient
bias_coeff <- 0.5
# desired FST for the admixed individuals
Fst <- 0.1
# admixture proportions from *circular* 1D geography
obj <- admix_prop_1d_circular(
n_ind = n_ind,
k_subpops = k_subpops,
bias_coeff = bias_coeff,
coanc_subpops = inbr_subpops,
fst = Fst
)
admix_proportions <- obj$admix_proportions
inbr_subpops <- obj$coanc_subpops
# get pop structure parameters of the admixed individuals
coancestry <- coanc_admix(admix_proportions, inbr_subpops)
# verify that we got the desired FST!
fst_admix(admix_proportions, inbr_subpops)
# verify that we got the desired bias_coeff too!
mean(coancestry) / Fst
## ---- fig.width = 4, fig.height = 1.2, fig.align = 'center'-------------------
# visualize the per-subpopulation inbreeding coefficients (FSTs)
# tweak margins/etc
par(mar = c(2.5, 2.5, 0.3, 0) + 0.2, lab = c(2, 1, 7), mgp = c(1.5, 0.5, 0))
# colors for independent subpopulations
col_subpops <- brewer.pal(k_subpops, "Paired")
barplot(inbr_subpops, col = col_subpops, names.arg = colnames(admix_proportions), xlab = 'Subpopulation', ylab = 'Inbr')
## ---- fig.width = 4, fig.height = 1, fig.align = 'center'---------------------
# visualize the admixture proportions
# tweak margins/etc
par(mar = c(1, 4, 0.4, 0) + 0.2, lab = c(2, 2, 7))
barplot(
t(admix_proportions),
col = col_subpops,
border = NA,
space = 0,
ylab = 'Admix prop'
)
mtext('Individuals', 1)
## ---- fig.width = 4.2, fig.height = 3, fig.align = 'center'-------------------
# Visualize the coancestry matrix using "popkin"!
plot_popkin(
coancestry,
leg_n = 3,
mar = c(0, 0.4),
leg_title = 'Coancestry'
)
## -----------------------------------------------------------------------------
# define population structure
# we'll have k_subpops = 3, each subpopulation with these sizes:
n1 <- 100
n2 <- 50
n3 <- 20
# here's the labels (for simplicity, list all individuals of S1 first, then S2, then S3)
labs <- c(
rep.int('S1', n1),
rep.int('S2', n2),
rep.int('S3', n3)
)
# data dimensions infered from labs:
# number of individuals "n_ind"
length(labs)
# number of subpopulations "k_subpops"
k_subpops <- length(unique(labs))
k_subpops
# desired admixture matrix
admix_proportions <- admix_prop_indep_subpops(labs)
# got a numeric matrix with a single 1 value per row
# (denoting the sole subpopulation from which each individual draws its ancestry)
head(admix_proportions, 2)
# construct the intermediate subpopulation FST vector
# the desired final FST
Fst <- 0.2
# subpopulation FST vector, unnormalized so far
inbr_subpops <- 1 : k_subpops
# normalized to have the desired FST
# NOTE fst is a function in the `popkin` package
inbr_subpops <- inbr_subpops / fst(inbr_subpops) * Fst
# verify FST for the intermediate subpopulations
fst(inbr_subpops)
# get coancestry of the admixed individuals
coancestry <- coanc_admix(admix_proportions, inbr_subpops)
# before getting FST for individuals, weigh then inversely proportional to subpop sizes
weights <- weights_subpops(labs) # function from `popkin` package
# verify FST for individuals (same as for intermediate subpops for this pop structure)
fst_admix(admix_proportions, inbr_subpops, weights)
## ---- fig.width = 4, fig.height = 1.2, fig.align = 'center'-------------------
# visualize the per-subpopulation inbreeding coefficients (FSTs)
# tweak margins/etc
par(mar = c(2.5, 2.5, 0, 0) + 0.2, lab = c(2, 1, 7), mgp = c(1.5, 0.5, 0))
# colors for independent subpopulations
col_subpops <- brewer.pal(k_subpops, "Paired")
barplot(inbr_subpops, col = col_subpops, names.arg = colnames(admix_proportions), xlab = 'Subpopulation', ylab = 'Inbr')
## ---- fig.width = 4, fig.height = 1, fig.align = 'center'---------------------
# visualize the admixture proportions
# tweak margins/etc
par(mar = c(1, 4, 0.4, 0) + 0.2, lab = c(2, 2, 7))
barplot(
t(admix_proportions),
col = col_subpops,
border = NA,
space = 0,
ylab = 'Admix prop'
)
mtext('Individuals', 1)
## ---- fig.width = 4.2, fig.height = 3, fig.align = 'center'-------------------
# Visualize the coancestry matrix using "popkin"!
plot_popkin(
coancestry,
leg_n = 3,
mar = c(0, 0.4),
leg_title = 'Coancestry'
)
## -----------------------------------------------------------------------------
tree_str <- '(S1:0.1,(S2:0.1,(S3:0.1,(S4:0.1,S5:0.1)N3:0.1)N2:0.1)N1:0.1)T;'
## ---- fig.width = 4, fig.height = 3.5, fig.align = 'center'-------------------
# base package for phylogenetics
library(ape)
# parses tree, creates object of class "phylo"
tree_subpops <- read.tree( text = tree_str )
# plot tree
par( mar = c(3,0,0,0) + 0.2 )
plot( tree_subpops, show.node.label = TRUE )
# add axis and label
axisPhylo( backward = FALSE )
mtext( 'Coancestry', side = 1, line = 2 )
## -----------------------------------------------------------------------------
p_subpops_tree <- draw_p_subpops_tree(
p_anc = p_anc,
tree_subpops = tree_subpops
)
## ---- fig.width = 6, fig.height = 2.8, fig.align = 'center'-------------------
# estimate coancestry
coancestry_subpops_tree_estimate <- popkin_af( p_subpops_tree )
# expected coancestry according to model
coancestry_subpops_tree <- coanc_tree( tree_subpops )
# plot
plot_popkin(
list( coancestry_subpops_tree, coancestry_subpops_tree_estimate ),
titles = c('Truth', 'Estimate'),
mar = c(2, 2.5),
leg_title = 'Coancestry',
names = TRUE
)
## -----------------------------------------------------------------------------
1 - (1 - 0.1)^4
## -----------------------------------------------------------------------------
# number of admixed individuals
n_ind <- 100
# number of subpopulations, extracted from true coancestry
k_subpops <- nrow( coancestry_subpops_tree )
# define population structure
# for simplicity here we set spread parameter `sigma` directly
# admixture proportions from 1D geography
admix_proportions <- admix_prop_1d_linear(
n_ind = n_ind,
k_subpops = k_subpops,
sigma = 0.5
)
# get pop structure parameters of the admixed individuals
coancestry <- coanc_admix(admix_proportions, coancestry_subpops_tree)
# calculate true FST of simulation
fst_admix(admix_proportions, coancestry_subpops_tree)
## ---- fig.width = 6, fig.height = 1.5, fig.align = 'center'-------------------
# visualize the admixture proportions
# colors for these independent subpopulations
col_subpops <- brewer.pal(k_subpops, "Paired")
# shrink default margins
par(mar = c(1, 4, 0, 0) + 0.2)
barplot(
t(admix_proportions),
col = col_subpops,
border = NA,
space = 0,
ylab = 'Admixture prop.'
)
mtext('Individuals', 1)
## ---- fig.width = 4.2, fig.height = 3, fig.align = 'center'-------------------
# Visualize the coancestry matrix using "popkin"!
plot_popkin(
coancestry,
# zero inner margin (plus padding) because we have no labels
mar = 0,
leg_title = 'Coancestry'
)
## -----------------------------------------------------------------------------
# number of loci in simulation (NOTE this is 30x less than in publication!)
m_loci <- 10000
# draw all random Allele Freqs (AFs) and genotypes
# reuse the previous inbr_subpops, admix_proportions
out <- draw_all_admix(
admix_proportions = admix_proportions,
tree_subpops = tree_subpops,
m_loci = m_loci
)
# genotypes
X <- out$X
## ---- fig.width = 6, fig.height = 2.8, fig.align = 'center'-------------------
# estimate kinship using popkin
# here we didn't use subpopulation labels for simplicity
kinship_estimate <- popkin(X)
# replace diagonal with inbreeding coeffs. to match coancestry matrix
coancestry_estimate <- inbr_diag(kinship_estimate)
# Visualize the coancestry matrix using "popkin"!
plot_popkin(
list( coancestry, coancestry_estimate ),
titles = c('Truth', 'Estimate'),
# second value is top margin, for panel titles
mar = c(0, 2.5),
leg_title = 'Coancestry'
)
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.