Nothing
## ----setup, include = FALSE---------------------------------------------------
knitr::opts_chunk$set(
collapse = TRUE,
comment = "#>"
)
## -----------------------------------------------------------------------------
library(popkin)
# rename for simplicity
X <- hgdp_subset
dim(X)
## -----------------------------------------------------------------------------
# shorten subpopulation labels
colnames(X)[colnames(X) == 'AFRICA'] <- 'AFR'
colnames(X)[colnames(X) == 'MIDDLE_EAST'] <- 'MDE'
colnames(X)[colnames(X) == 'EUROPE'] <- 'EUR'
colnames(X)[colnames(X) == 'CENTRAL_SOUTH_ASIA'] <- 'SAS'
colnames(X)[colnames(X) == 'EAST_ASIA'] <- 'EAS'
colnames(X)[colnames(X) == 'OCEANIA'] <- 'OCE'
colnames(X)[colnames(X) == 'AMERICA'] <- 'AMR'
# order roughly by distance from Africa
# (without crossing the Atlantic Ocean)
pop_order <- c('AFR', 'MDE', 'EUR', 'SAS', 'EAS', 'OCE', 'AMR')
# applies reordering
X <- X[, order(match(colnames(X), pop_order))]
# extract subpopulations vector
subpops <- colnames(X)
## -----------------------------------------------------------------------------
X[1:10,1:15]
## -----------------------------------------------------------------------------
kinship <- popkin(X, subpops)
## ---- fig.width = 4.2, fig.height = 3, fig.align = 'center'-------------------
plot_popkin(
kinship,
labs = subpops,
# shared bottom and left margin value, to make space for labels
mar = 1
)
## ---- fig.width = 4.2, fig.height = 3, fig.align = 'center'-------------------
plot_popkin(
inbr_diag( kinship ),
labs = subpops,
mar = 1
)
## ---- fig.width = 4.2, fig.height = 3, fig.align = 'center'-------------------
plot_popkin(
inbr_diag(kinship),
labs = subpops,
labs_even = TRUE,
labs_line = 1,
labs_cex = 0.7,
mar = 2
)
## -----------------------------------------------------------------------------
# get weights
w <- weights_subpops(subpops)
# compute FST!
# Note: don't use the output to inbr_diag(kinship) or FST will be wrong!
fst(kinship, w)
## ---- fig.width = 4, fig.height = 2, fig.align = 'center'---------------------
# vector of inbreeding coefficients
inbrs <- inbr(kinship)
# quick plot
# adjust margins
par(mar = c(4, 4, 0, 0.2) + 0.2)
# see their distribution
plot(density(inbrs), xlab = 'inbreeding coefficient', main = '')
## ---- fig.width = 4.2, fig.height = 3, fig.align = 'center'-------------------
# compute pairwise FST matrix from kinship matrix
pairwise_fst <- pwfst(kinship)
# fancy legend label
leg_title <- expression(paste('Pairwise ', F[ST]))
# NOTE no need for inbr_diag() here!
plot_popkin(
pairwise_fst,
labs = subpops,
labs_even = TRUE,
labs_line = 1,
labs_cex = 0.7,
leg_title = leg_title,
mar = c(2, 0.2)
)
## ---- fig.width = 4.2, fig.height = 3, fig.align = 'center'-------------------
indexes_AFR <- subpops == 'AFR'
# AFR subset of the kinship matrix
kinship_AFR <- kinship[indexes_AFR, indexes_AFR]
# kinship matrix plot
plot_popkin(
inbr_diag( kinship_AFR ),
mar = 0
)
# estimate FST before rescaling (this value will be wrong, too high!)
fst( kinship_AFR )
## ---- fig.width = 4.2, fig.height = 3, fig.align = 'center'-------------------
# rescale kinship_AFR
# since subpops is missing, minimum kinship value is set to zero
# (no averaging between subpopulations)
kinship_AFR <- rescale_popkin( kinship_AFR )
# kinship matrix plot
plot_popkin(
inbr_diag( kinship_AFR ),
mar = c(0, 0.3)
)
# FST is now correct, relative to the MRCA of AFR individuals
fst( kinship_AFR )
## ---- fig.width = 6, fig.height = 2.8, fig.align = 'center'-------------------
# dummy labels to have lines in second panel
subpops_AFR <- subpops[ indexes_AFR ]
plot_popkin(
# inbr_diag also works on a list of matrices
inbr_diag( list( kinship, kinship_AFR ) ),
# title of each panel
titles = c('All', 'AFR only, rescaled'),
# pass per-panel labels using a list
labs = list( subpops, subpops_AFR ),
# scalar options are shared across panels
labs_even = TRUE,
labs_line = 1,
labs_cex = 0.5,
# second value is top margin (to make space for titles)
mar = c(2, 2)
)
## ---- fig.width = 4.2, fig.height = 3, fig.align = 'center'-------------------
# create second level of labels
# first copy first-level labels
blocks <- subpops
# first block is AFR
blocks[blocks == 'AFR'] <- 'B1'
# second block is West Eurasians, broadly defined
blocks[blocks == 'MDE'] <- 'B2'
blocks[blocks == 'EUR'] <- 'B2'
blocks[blocks == 'SAS'] <- 'B2'
# third block is East Eurasians, broadly defined
blocks[blocks == 'EAS'] <- 'B3'
blocks[blocks == 'OCE'] <- 'B3'
blocks[blocks == 'AMR'] <- 'B3'
# plotting with different options per level is more complicated...
plot_popkin(
inbr_diag( kinship ),
labs = cbind( subpops, blocks ), # ... labs is now a matrix with levels on columns
labs_even = c(TRUE, FALSE), # ... even spacing for first level only
labs_line = c(1, 2), # ... put second level further out
labs_cex = c(0.7, 1), # ... don't shrink second level
labs_sep = c(FALSE, TRUE), # ... draw lines inside heatmap for second level only
ylab_adj = 0.65, # push up outer margin ylab "Individuals"
mar = 3 # increase margins again
)
## ---- fig.width = 6, fig.height = 2.8, fig.align = 'center'-------------------
plot_popkin(
inbr_diag(list(kinship, kinship_AFR)),
titles = c('All', 'AFR only, rescaled'),
# list of matrices
labs = list( cbind(subpops, blocks), subpops_AFR ),
# non-list: values are reused for both panels
labs_even = c(TRUE, FALSE),
labs_line = c(1, 2),
# make label bigger in second panel (custom per-panel values)
# list of vectors
labs_cex = list(c(0.5, 0.7), 1),
# add lines for first level of second panel (custom per-panel values)
# list of vectors
labs_sep = list(c(FALSE, TRUE), TRUE),
mar = c(3, 2)
)
## ---- fig.width = 6.5, fig.height = 2.8, fig.align = 'center'-----------------
plot_popkin(
inbr_diag(list(kinship, kinship_AFR)),
titles = c('All', 'AFR only, rescaled'),
labs = list( cbind(subpops, blocks), subpops_AFR ),
labs_even = c(TRUE, FALSE),
labs_line = c(1, 2),
labs_cex = list(c(0.5, 0.7), 1),
labs_sep = list(c(FALSE, TRUE), TRUE),
mar = c(3, 2),
# this option adds a legend per panel
leg_per_panel = TRUE
)
## -----------------------------------------------------------------------------
# number of loci (rows)
m_loci <- nrow( X )
# number of subpopulations (columns)
k_subpops <- length( pop_order )
# initialize matrix
P <- matrix( NA, nrow = m_loci, ncol = k_subpops )
# copy names from data
colnames( P ) <- pop_order
rownames( P ) <- rownames( X )
# navigate subpopulations
for ( u in 1 : k_subpops ) {
# subpopulation label name
pop <- pop_order[ u ]
# columns of interest
js <- subpops == pop
# now average genotypes into allele frequency estimates and store
P[ , u ] <- rowMeans( X[ , js ], na.rm = TRUE ) / 2
}
## -----------------------------------------------------------------------------
coancestry <- popkin_af( P )
## ---- fig.width = 4.2, fig.height = 3, fig.align = 'center'-------------------
# coancestry matrix plot
# NOTE: inbr_diag() is not needed for coancestry!
plot_popkin(
coancestry,
mar = 3,
names = TRUE,
ylab = 'Subpopulations'
)
## -----------------------------------------------------------------------------
# some subpopulation sizes
n1 <- 10
n2 <- n1
n3 <- 20
# construct unadmixed individuals
Q_pop1 <- matrix( c( 1, 0 ), ncol = 2, nrow = n1, byrow = TRUE )
Q_pop2 <- Q_pop1[ , 2:1 ] # reverse columns
# random admixture proportions
Q_admix <- rev( sort( runif( n3 ) ) )
Q_admix <- cbind( Q_admix, 1 - Q_admix )
# combine data for all populations
# add some informative row/col names too
rownames( Q_pop1 ) <- paste0( 'S1-ind', 1 : n1 )
rownames( Q_pop2 ) <- paste0( 'S2-ind', 1 : n2 )
rownames( Q_admix ) <- paste0( 'S3-ind', 1 : n3 )
Q <- rbind( Q_pop1, Q_pop2, Q_admix )
colnames( Q ) <- c('A1', 'A2')
# create subpopulation labels for later
labs1 <- c( rep.int( 'S1', n1 ), rep.int( 'S2', n2 ), rep.int( 'S3', n3 ) )
labs2 <- c( rep.int( 'Unadmixed', n1+n2 ), rep.int( 'Admixed', n3 ) )
## ---- fig.width = 6.5, fig.height = 2, fig.align = 'center'-------------------
# adjust margins
par(mar = c(2, 3, 1, 0.2) + 0.2)
# actual plot
plot_admix( Q )
## ---- fig.width = 6.5, fig.height = 3, fig.align = 'center'-------------------
# adjust margins
par(mar = c(6, 3, 1, 0.2) + 0.2)
# actual plot
plot_admix( Q, names = TRUE, xlab_line = 5 )
## ---- fig.width = 6.5, fig.height = 2.2, fig.align = 'center'-----------------
# adjust margins
par(mar = c(2, 3, 1, 0.2) + 0.2)
# actual plot
plot_admix( Q, labs = labs1 )
## ---- fig.width = 6.5, fig.height = 2.3, fig.align = 'center'-----------------
# adjust margins
par(mar = c(3, 3, 1, 0.2) + 0.2)
# actual plot
plot_admix( Q, labs = cbind( labs1, labs2 ), labs_line = c(0, 1), xlab_line = 2 )
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.