Nothing
#' @name gl.fdsim
#' @title Estimates the rate of false positives in a fixed difference analysis
#' @family distance
#' @description
#' This function takes two populations and generates allele frequency profiles
#' for them. It then samples an allele frequency for each, at random, and
#' estimates a sampling distribution for those two allele frequencies. Drawing
#' two samples from those sampling distributions, it calculates whether or not
#' they represent a fixed difference. This is applied to all loci, and the
#' number of fixed differences so generated are counted, as an expectation. The
#' script distinguished between true fixed differences (with a tolerance of
#' delta), and false positives. The simulation is repeated a given number of
#' times (default=1000) to provide an expectation of the number of false
#' positives, given the observed allele frequency profiles and the sample sizes.
#' The probability of the observed count of fixed differences is greater than
#' the expected number of false positives is calculated.
#'
#' @param x Name of the genlight containing the SNP genotypes [required].
#' @param poppair Labels of two populations for comparison in the form
#' c(popA,popB) [required].
#' @param obs Observed number of fixed differences between the two populations
#' [default NULL].
#' @param sympatric If TRUE, the two populations are sympatric, if FALSE then
#' allopatric [default FALSE].
#' @param reps Number of replications to undertake in the simulation
#' [default 1000].
#' @param delta The threshold value for the minor allele frequency to regard the
#' difference between two populations to be fixed [default 0.02].
#' @param verbose Verbosity: 0, silent, fatal errors only; 1, flag function
#' begin and end; 2, progress log; 3, progress and results summary; 5, full
#' report [default 2 or as specified using gl.set.verbosity].
#'
#' @author Custodian: Arthur Georges (Post to
#' \url{https://groups.google.com/d/forum/dartr})
#'
#' @examples
#' fd <- gl.fdsim(testset.gl[,1:100],poppair=c('EmsubRopeMata','EmmacBurnBara'),
#' sympatric=TRUE,verbose=3)
#'
#' @importFrom stats pnorm rbinom
#' @export
#' @return A list containing the following square matrices
#' [[1]] observed fixed differences;
#' [[2]] mean expected number of false positives for each comparison;
#' [[3]] standard deviation of the no. of false positives for each
#' comparison;
#' [[4]] probability the observed fixed differences arose by chance for
#' each comparison.
gl.fdsim <- function(x,
poppair,
obs = NULL,
sympatric = FALSE,
reps = 1000,
delta = 0.02,
verbose = NULL) {
# SET VERBOSITY
verbose <- gl.check.verbosity(verbose)
# FLAG SCRIPT START
funname <- match.call()[[1]]
utils.flag.start(func = funname,
build = "v.2023.3",
verbose = verbose)
# CHECK DATATYPE
datatype <- utils.check.datatype(x, verbose = verbose)
# SCRIPT SPECIFIC CHECKS
if (!(poppair[1] %in% levels(pop(x)))) {
stop(error(" Fatal Error: Population A mislabelled\n"))
}
if (!(poppair[2] %in% levels(pop(x)))) {
stop(error(" Fatal Error: Population B mislabelled\n"))
}
# DO THE JOB
# Extract the data for the two nominated populations
if (length(poppair) == 2) {
pair <-
gl.keep.pop(
x,
pop.list = poppair,
recalc = FALSE,
mono.rm = TRUE,
verbose = 0
)
} else {
stop(
error(
" Fatal Error: Must specify two populations labels from the genlight object, e.g. poppair=c(popA,popB)\n"
)
)
}
if (verbose >= 2) {
if (sympatric) {
cat(" Populations",
poppair[1],
"vs",
poppair[2],
"[sympatric]\n")
} else {
cat(" Populations",
poppair[1],
"vs",
poppair[2],
"[allopatric]\n")
}
}
if (verbose >= 3) {
cat(" Sample sizes:", table(pop(pair)), "\n")
cat(" No. of loci:", nLoc(pair), "\n")
}
# Calculate the percentage frequencies
rf <- gl.allele.freq(pair, percent=TRUE, by='popxloc', verbose = 0)
# Disaggregate the data for the two populations
rfA <- rf[rf$popn == poppair[1], ]
rfB <- rf[rf$popn == poppair[2], ]
# Identify sampling populations
if (sympatric) {
rfA <- rf
rfB <- rf
}
# Initialize storage vectors
simA <- array(data = NA, dim = nLoc(pair))
simB <- array(data = NA, dim = nLoc(pair))
fd <- array(data = NA, dim = nLoc(pair))
falsepos <- array(data = NA, dim = reps)
# Caclulate the observed fixed differences
if (is.null(obs)) {
fdmat <- gl.fixed.diff(pair, verbose = 0)
obs <- fdmat$fd[1]
}
# cat(' No. of observed fixed differences:',obs,'\n')
# Calculate the rate of false positives, given delta and actual sample sizes
if (verbose > 1) {
cat(
report(
" Calculating false positive rate with",
reps,
"replications. Please be patient\n"
)
)
}
for (j in 1:reps) {
# Repeat reps times
for (i in 1:nLoc(pair)) {
# Eliminate from consideration loci for which either frequency is missing
if (is.na(rfA$frequency[i]) ||
is.na(rfB$frequency[i])) {
simA[i] <- NA
simB[i] <- NA
# Eliminate from consideration fixed differences or near fixed differences, for given delta
} else if ((((rfA$frequency[i] / 100) < delta) &&
(1 - (rfB$frequency[i] / 100)) < delta) ||
(((rfB$frequency[i] / 100) < delta) &&
(1 -
(rfA$frequency[i] / 100)) < delta)) {
simA[i] <- NA
simB[i] <- NA
# Calculate the number of fixed differences to arise by chance
} else {
szA <- rfA$nobs[i] * 2
szB <- rfB$nobs[i] * 2
pbA <- rfA$frequency[i] / 100
pbB <- rfB$frequency[i] / 100
# Sample an allele frequency from each population, expressed as a proportion
simA[i] <-
(stats::rbinom(
n = 1,
size = szA,
prob = pbA
)) / szA
simB[i] <-
(stats::rbinom(
n = 1,
size = szB,
prob = pbB
)) / szB
# Apply Equation 5
fd[i] <-
((1 - simA[i]) ^ szA) * ((simB[i]) ^ szB) + ((simA[i]) ^ szA) * ((1 - simB[i]) ^
szB)
}
}
# Count the number of false positives
falsepos[j] <- sum(fd, na.rm = TRUE)
}
# Calculate the mean and standard deviation for the false positive rate
mn <- mean(falsepos)
sdev <- sd(falsepos)
# Calculate the probability of the observed FD count, given the simulated result
nprob <-
stats::pnorm(obs,
mean = mn,
sd = sdev,
lower.tail = FALSE)
if (verbose > 2) {
cat(" Threshold minor allele frequency for generating a false positive:",
delta,
"\n")
cat(" Estimated mean count of false positives:",
round(mean(falsepos), 1),
"\n")
cat(" Estimated SD of the count of false positives:",
round(sd(falsepos), 2),
"\n")
cat(" Prob that observed count of",
obs,
"are false positives:",
round(nprob, 8),
"\n")
}
l <-
list(
observed = obs,
mnexpected = mn,
sdexpected = sdev,
prob = nprob
)
# FLAG SCRIPT END
if (verbose > 0) {
cat(report("Completed:", funname, "\n"))
}
return(l)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.