Developements of the fpga
packages were heavily inspired by gstudio
. While gstudio
provides all of the functions and much more use friendliness, we were faced with the challenge to analyse several thousands of indivudals from hunderts of populations and gstudio
was to slow. This vignette severs as a comparison between fpga
and gstudio
.
We use the arapat
data from gstudio
:
library(abind) library(fpga) library(gstudio) # We have # - 1000 Individuals # - with 3 loci # - 200 possible allels # - in 10 populations set.seed(111) dat0 <- data.frame( pop = factor(as.character(rep(1:9, each = 100))), loc1 = do.call(c, lapply(split(sample(c(1:100, NA), 900 * 2, TRUE), rep(1:900, each = 2)), locus)), loc2 = do.call(c, lapply(split(sample(c(200:250, NA), 900 * 2, TRUE), rep(1:900, each = 2)), locus)), loc3 = do.call(c, lapply(split(sample(c(1:200), 900 * 2, TRUE), rep(1:900, each = 2)), locus)) ) ## With NA dat0 <- data.frame( pop = factor(as.character(rep(1:3, each = 10))), loc1 = do.call(c, lapply(split(sample(c(1:10, NA), 30 * 2, TRUE), rep(1:30, each = 2)), locus)), # loc2 = do.call(c, lapply(split(sample(c(200:205, NA), 30 * 2, TRUE), rep(1:30, each = 2)), locus)), loc3 = do.call(c, lapply(split(sample(c(1:4), 30 * 2, TRUE), rep(1:30, each = 2)), locus)) ) # No NA dat0 <- data.frame( pop = factor(as.character(rep(1:3, each = 10))), loc1 = do.call(c, lapply(split(sample(c(1:10), 30 * 2, TRUE), rep(1:30, each = 2)), locus)), # loc2 = do.call(c, lapply(split(sample(c(200:205, NA), 30 * 2, TRUE), rep(1:30, each = 2)), locus)), loc3 = do.call(c, lapply(split(sample(c(1:4), 30 * 2, TRUE), rep(1:30, each = 2)), locus)) ) head(dat0) dat2 <- gstudio2fpga(dat0) identical(dat1, dat2) dat1 dat2 dat00 <- fpga2gstudio(dat1) identical(dat0, dat00) # data for fpga x <- lapply(column_class(dat0, class = "locus"), function(i) apply(alleles(dat0[[i]]), 2, as.integer)) dat1 <- abind(list(sapply(x, function(y) y[, 1]), sapply(x, function(y) y[, 2])), along = 3)
Frequencies
gstudio::frequencies(dat0) freq(dat1) mean(frequencies(dat0)$Frequency) == mean(freq(dat1)$freq) sum(frequencies(dat0)$Frequency) sum(freq(dat1)$freq)
Number of alleles:
system.time(na0 <- A(dat0)) system.time(na1 <- n_alls(dat1)) na0$A == na1
Observed heterozygosity:
system.time(ho0 <- Ho(dat0)) system.time(ho1 <- het_obs(dat1)) ho0$Ho == ho1
Expected heterozygosity:
system.time(he0 <- He(dat0)) system.time(he1 <- het_exp(dat1)) he0$He == he1
Effective number of allels:
system.time(na0 <- Ae(dat0)) system.time(na1 <- n_eff_alls(dat1)) na0$Ae == na1
All of the above function can be stratified by population, e.g.,
system.time(na0 <- genetic_diversity(dat0, stratum = "pop", mode = "A")) system.time(na1 <- n_alls(dat1, dat0[, 1])) sum(na0$A) == sum(na1)
gst
dest
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.