popkin.R
In popkin: Estimate Kinship and FST under Arbitrary Population Structure

## ----setup, include = FALSE---------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)

## -----------------------------------------------------------------------------
library(popkin)
# rename for simplicity
X <- hgdp_subset
dim(X)

## -----------------------------------------------------------------------------
# shorten subpopulation labels
colnames(X)[colnames(X) == 'AFRICA'] <- 'AFR'
colnames(X)[colnames(X) == 'MIDDLE_EAST'] <- 'MDE'
colnames(X)[colnames(X) == 'EUROPE'] <- 'EUR'
colnames(X)[colnames(X) == 'CENTRAL_SOUTH_ASIA'] <- 'SAS'
colnames(X)[colnames(X) == 'EAST_ASIA'] <- 'EAS'
colnames(X)[colnames(X) == 'OCEANIA'] <- 'OCE'
colnames(X)[colnames(X) == 'AMERICA'] <- 'AMR'
# order roughly by distance from Africa
# (without crossing the Atlantic Ocean)
pop_order <- c('AFR', 'MDE', 'EUR', 'SAS', 'EAS', 'OCE', 'AMR')
# applies reordering
X <- X[, order(match(colnames(X), pop_order))]
# extract subpopulations vector
subpops <- colnames(X)

## -----------------------------------------------------------------------------
X[1:10,1:15]

## -----------------------------------------------------------------------------
kinship <- popkin(X, subpops)

## ---- fig.width = 4.2, fig.height = 3, fig.align = 'center'-------------------
plot_popkin(
    kinship,
    labs = subpops,
    # shared bottom and left margin value, to make space for labels
    mar = 1
)

## ---- fig.width = 4.2, fig.height = 3, fig.align = 'center'-------------------
plot_popkin(
    inbr_diag( kinship ),
    labs = subpops,
    mar = 1
)

## ---- fig.width = 4.2, fig.height = 3, fig.align = 'center'-------------------
plot_popkin(
    inbr_diag(kinship),
    labs = subpops,
    labs_even = TRUE,
    labs_line = 1,
    labs_cex = 0.7,
    mar = 2
)

## -----------------------------------------------------------------------------
# get weights
w <- weights_subpops(subpops)
# compute FST!
# Note: don't use the output to inbr_diag(kinship) or FST will be wrong!
fst(kinship, w)

## ---- fig.width = 4, fig.height = 2, fig.align = 'center'---------------------
# vector of inbreeding coefficients
inbrs <- inbr(kinship)
# quick plot
# adjust margins
par(mar = c(4, 4, 0, 0.2) + 0.2)
# see their distribution
plot(density(inbrs), xlab = 'inbreeding coefficient', main = '')

## ---- fig.width = 4.2, fig.height = 3, fig.align = 'center'-------------------
# compute pairwise FST matrix from kinship matrix
pairwise_fst <- pwfst(kinship)
# fancy legend label
leg_title <- expression(paste('Pairwise ', F[ST]))
# NOTE no need for inbr_diag() here!
plot_popkin(
    pairwise_fst,
    labs = subpops,
    labs_even = TRUE,
    labs_line = 1,
    labs_cex = 0.7,
    leg_title = leg_title,
    mar = c(2, 0.2)
)

## ---- fig.width = 4.2, fig.height = 3, fig.align = 'center'-------------------
indexes_AFR <- subpops == 'AFR'
# AFR subset of the kinship matrix
kinship_AFR <- kinship[indexes_AFR, indexes_AFR]

# kinship matrix plot
plot_popkin(
    inbr_diag( kinship_AFR ),
    mar = 0
)

# estimate FST before rescaling (this value will be wrong, too high!)
fst( kinship_AFR )

## ---- fig.width = 4.2, fig.height = 3, fig.align = 'center'-------------------
# rescale kinship_AFR
# since subpops is missing, minimum kinship value is set to zero
# (no averaging between subpopulations)
kinship_AFR <- rescale_popkin( kinship_AFR )

# kinship matrix plot
plot_popkin(
    inbr_diag( kinship_AFR ),
    mar = c(0, 0.3)
)

# FST is now correct, relative to the MRCA of AFR individuals
fst( kinship_AFR )

## ---- fig.width = 6, fig.height = 2.8, fig.align = 'center'-------------------
# dummy labels to have lines in second panel
subpops_AFR <- subpops[ indexes_AFR ]
plot_popkin(
    # inbr_diag also works on a list of matrices
    inbr_diag( list( kinship, kinship_AFR ) ),
    # title of each panel
    titles = c('All', 'AFR only, rescaled'),
    # pass per-panel labels using a list
    labs = list( subpops, subpops_AFR ),
    # scalar options are shared across panels
    labs_even = TRUE,
    labs_line = 1,
    labs_cex = 0.5,
    # second value is top margin (to make space for titles)
    mar = c(2, 2)
)

## ---- fig.width = 4.2, fig.height = 3, fig.align = 'center'-------------------
# create second level of labels
# first copy first-level labels
blocks <- subpops
# first block is AFR
blocks[blocks == 'AFR'] <- 'B1'
# second block is West Eurasians, broadly defined
blocks[blocks == 'MDE'] <- 'B2'
blocks[blocks == 'EUR'] <- 'B2'
blocks[blocks == 'SAS'] <- 'B2'
# third block is East Eurasians, broadly defined
blocks[blocks == 'EAS'] <- 'B3'
blocks[blocks == 'OCE'] <- 'B3'
blocks[blocks == 'AMR'] <- 'B3'

# plotting with different options per level is more complicated...
plot_popkin(
	inbr_diag( kinship ),
	labs = cbind( subpops, blocks ), # ... labs is now a matrix with levels on columns
	labs_even = c(TRUE, FALSE),      # ... even spacing for first level only
	labs_line = c(1, 2),             # ... put second level further out
	labs_cex = c(0.7, 1),            # ... don't shrink second level
	labs_sep = c(FALSE, TRUE),       # ... draw lines inside heatmap for second level only
	ylab_adj = 0.65,                 # push up outer margin ylab "Individuals"
    mar = 3                          # increase margins again
	)

## ---- fig.width = 6, fig.height = 2.8, fig.align = 'center'-------------------
plot_popkin(
    inbr_diag(list(kinship, kinship_AFR)),
	titles = c('All', 'AFR only, rescaled'),
    # list of matrices
	labs = list( cbind(subpops, blocks), subpops_AFR ),
    # non-list: values are reused for both panels
	labs_even = c(TRUE, FALSE),
	labs_line = c(1, 2),
	# make label bigger in second panel (custom per-panel values)
    # list of vectors
	labs_cex = list(c(0.5, 0.7), 1),
	# add lines for first level of second panel (custom per-panel values)
    # list of vectors
	labs_sep = list(c(FALSE, TRUE), TRUE),
    mar = c(3, 2)
	)

## ---- fig.width = 6.5, fig.height = 2.8, fig.align = 'center'-----------------
plot_popkin(
    inbr_diag(list(kinship, kinship_AFR)),
	titles = c('All', 'AFR only, rescaled'),
	labs = list( cbind(subpops, blocks), subpops_AFR ),
	labs_even = c(TRUE, FALSE),
	labs_line = c(1, 2),
	labs_cex = list(c(0.5, 0.7), 1),
	labs_sep = list(c(FALSE, TRUE), TRUE),
    mar = c(3, 2),
    # this option adds a legend per panel
    leg_per_panel = TRUE
	)

## -----------------------------------------------------------------------------
# number of loci (rows)
m_loci <- nrow( X )
# number of subpopulations (columns)
k_subpops <- length( pop_order )
# initialize matrix
P <- matrix( NA, nrow = m_loci, ncol = k_subpops )
# copy names from data
colnames( P ) <- pop_order
rownames( P ) <- rownames( X )
# navigate subpopulations
for ( u in 1 : k_subpops ) {
    # subpopulation label name
    pop <- pop_order[ u ]
    # columns of interest
    js <- subpops == pop
    # now average genotypes into allele frequency estimates and store
    P[ , u ] <- rowMeans( X[ , js ], na.rm = TRUE ) / 2
}

## -----------------------------------------------------------------------------
coancestry <- popkin_af( P )

## ---- fig.width = 4.2, fig.height = 3, fig.align = 'center'-------------------
# coancestry matrix plot
# NOTE: inbr_diag() is not needed for coancestry!
plot_popkin(
    coancestry,
    mar = 3,
    names = TRUE,
    ylab = 'Subpopulations'
)

## -----------------------------------------------------------------------------
# some subpopulation sizes
n1 <- 10
n2 <- n1
n3 <- 20
# construct unadmixed individuals
Q_pop1 <- matrix( c( 1, 0 ), ncol = 2, nrow = n1, byrow = TRUE )
Q_pop2 <- Q_pop1[ , 2:1 ] # reverse columns
# random admixture proportions
Q_admix <- rev( sort( runif( n3 ) ) )
Q_admix <- cbind( Q_admix, 1 - Q_admix )
# combine data for all populations
# add some informative row/col names too
rownames( Q_pop1 ) <- paste0( 'S1-ind', 1 : n1 )
rownames( Q_pop2 ) <- paste0( 'S2-ind', 1 : n2 )
rownames( Q_admix ) <- paste0( 'S3-ind', 1 : n3 )
Q <- rbind( Q_pop1, Q_pop2, Q_admix )
colnames( Q ) <- c('A1', 'A2')
# create subpopulation labels for later
labs1 <- c( rep.int( 'S1', n1 ), rep.int( 'S2', n2 ), rep.int( 'S3', n3 ) )
labs2 <- c( rep.int( 'Unadmixed', n1+n2 ), rep.int( 'Admixed', n3 ) )

## ---- fig.width = 6.5, fig.height = 2, fig.align = 'center'-------------------
# adjust margins
par(mar = c(2, 3, 1, 0.2) + 0.2)
# actual plot
plot_admix( Q )

## ---- fig.width = 6.5, fig.height = 3, fig.align = 'center'-------------------
# adjust margins
par(mar = c(6, 3, 1, 0.2) + 0.2)
# actual plot
plot_admix( Q, names = TRUE, xlab_line = 5 )

## ---- fig.width = 6.5, fig.height = 2.2, fig.align = 'center'-----------------
# adjust margins
par(mar = c(2, 3, 1, 0.2) + 0.2)
# actual plot
plot_admix( Q, labs = labs1 )

## ---- fig.width = 6.5, fig.height = 2.3, fig.align = 'center'-----------------
# adjust margins
par(mar = c(3, 3, 1, 0.2) + 0.2)
# actual plot
plot_admix( Q, labs = cbind( labs1, labs2 ), labs_line = c(0, 1), xlab_line = 2 )

Any scripts or data that you put into this service are public.

popkin documentation built on Jan. 7, 2023, 1:26 a.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

popkin
Estimate Kinship and FST under Arbitrary Population Structure

inst/doc/popkin.R
In popkin: Estimate Kinship and FST under Arbitrary Population Structure

Try the popkin package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

popkin Estimate Kinship and FST under Arbitrary Population Structure

inst/doc/popkin.R In popkin: Estimate Kinship and FST under Arbitrary Population Structure

Try the popkin package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

popkin
Estimate Kinship and FST under Arbitrary Population Structure

inst/doc/popkin.R
In popkin: Estimate Kinship and FST under Arbitrary Population Structure