Nothing
#' Creates a site frequency spectrum based on a dartR or genlight object
#'
#' @param x dartR/genlight object
#' @param minbinsize remove bins from the left of the sfs. For example to remove
#' singletons (alleles only occurring once among all individuals) set
#' minbinsize to 2. If set to zero, also monomorphic (d0) loci are returned.
#' @param folded if set to TRUE (default) a folded sfs (minor allele frequency
#' sfs) is returned. If set to FALSE then an unfolded (derived allele frequency
#' sfs) is returned. It is assumed that 0 is homozygote for the reference and
#' 2 is homozygote for the derived allele. So you need to make sure your
#' coding is correct.
#' @param singlepop switch to force to create a one-dimensional sfs, even
#' though the genlight/dartR object contains more than one population
#' @param plot.out Specify if plot is to be produced [default TRUE].
#' @param plot.dir Directory in which to save files [default = working directory]
#' @param plot.file Name for the RDS binary file to save (base name only, exclude extension) [default NULL]
#' @param verbose Verbosity: 0, silent or fatal errors; 1, begin and end; 2,
#' progress log ; 3, progress and results summary; 5, full report
#' [default 2, unless specified using gl.set.verbosity].
#' @return returns a site frequency spectrum, either a one dimensional vector
#' (only a single population in the dartR/genlight object or singlepop=TRUE) or
#' an n-dimensional array (n is the number of populations in the genlight/dartR
#' object). If the dartR/genlight object consists of several populations the
#' multidimensional site frequency spectrum for each population is returned
#' [=a multidimensional site frequency spectrum]. Be aware the
#' multidimensional spectrum works only for a limited number of population
#' and individuals [if too high the table command used internally will
#' through an error as the number of populations and individuals (and
#' therefore dimensions) are too large]. To get a single sfs for a
#' genlight/dartR object with multiple populations, you need to set
#' singlepop to TRUE. The returned sfs can be used to analyse demographics,
#' e.g. using fastsimcoal2.
#' @export
#' @examples
#' gl.sfs(bandicoot.gl, singlepop = TRUE)
#' gl.sfs(possums.gl[c(1:5, 31:33), ], minbinsize = 1)
#' @references Excoffier L., Dupanloup I., Huerta-Sánchez E., Sousa V. C. and
#' Foll M. (2013) Robust demographic inference from genomic and SNP data. PLoS
#' genetics 9(10)
#' @author Custodian: Bernd Gruber & Carlo Pacioni (Post to
#' \url{https://groups.google.com/d/forum/dartr})
gl.sfs <- function(x,
minbinsize = 0,
folded = TRUE,
singlepop = FALSE,
plot.out = TRUE,
plot.file = NULL,
plot.dir = NULL,
verbose = NULL) {
# SET VERBOSITY
verbose <- gl.check.verbosity(verbose)
# SET WORKING DIRECTORY
plot.dir <- gl.check.wd(plot.dir, verbose = 0)
# FLAG SCRIPT START
funname <- match.call()[[1]]
utils.flag.start(
func = funname,
build = "Jody",
verbose = verbose
)
# CHECK DATATYPE
datatype <- utils.check.datatype(x, verbose = verbose)
# FUNCTION SPECIFIC ERROR CHECKING
if (sum(is.na(as.matrix(x))) > 0 & verbose>1) {
cat(
warn(
"Your data contains missing data, better filter stronger or use gl.impute to fill those gaps meaningful!\n"
)
)
}
# only a single population....
if (nPop(x) == 0) {
cat(
warn(
"No population definition provided. I proceed, assuming your genlight/dartR object is a single population.\n"
)
)
pop(x) <- rep("A", nInd(x))
}
if (!singlepop & (prod(table(pop(x)) * 2 + 1)) > 2^30) {
cat(
error(
"Cannot create a multidimensional sfs, due to too high dimensions. Reduce the number of populations/individuals or use singlepop=TRUE."
)
)
stop()
}
# DO THE JOB
if (nPop(x) == 1 | singlepop == TRUE) {
mi <- nInd(x)
if (!folded) {
mi <- 2 * mi # double the number of slots...
}
cs <- colSums(as.matrix(x), na.rm = TRUE)
if (folded) {
sfs0 <- table(mi - (abs(mi - cs)))
} else {
sfs0 <- table(cs)
}
sfsf <- rep(0, mi + 1)
sfsf[as.numeric(names(sfs0)) + 1] <- sfs0
names(sfsf) <- paste0("d", 0:mi)
# delete minbinsize
if (minbinsize > 0) {
sfsf <- sfsf[-c(1:(minbinsize))]
}
sfs <- sfsf
# multidimensional
} else {
pp <- seppop(x)
cs <- list()
for (i in 1:length(pp)) {
mi <- nInd(pp[[i]])
if (!folded) {
mi <- 2 * mi # double the number of slots...
}
sfs0 <- colSums(as.matrix(pp[[i]]), na.rm = TRUE)
if (folded) {
cs[[i]] <- mi - (abs(mi - sfs0))
} else {
cs[[i]] <- sfs0
}
}
# add zeros and make sure they are consistent
msfs0 <- do.call(table, cs)
aa <- array(0, dim = table(pop(x)) * 2 + 1)
dimnames(aa) <-
sapply(dim(aa), function(x) {
paste0("d", 0:(x - 1))
}, simplify = F)
dn <- dimnames(msfs0)
do <- lapply(dn, function(x) {
1:length(x)
})
do1 <- expand.grid(do)
do2 <- apply(do1, 2, as.numeric)
dn1 <- expand.grid(dn)
dn2 <- apply(dn1, 2, as.numeric)
dn3 <- dn2 + 1
for (i in 1:nrow(dn3)) {
aa[t(dn3[i, ])] <- msfs0[t(do2[i, ])]
}
# delete minbinsize
if (minbinsize > 0) {
tt <- paste0("aa[", paste0(
rep(
"-c(1:minbinsize)",
length(dim(
aa
))
),
collapse = ","
), "]")
aa <- eval(parse(text = tt))
}
sfs <- aa
}
# needs to be saved and turned into a ggplot
if (plot.out) {
if (!is.array(sfs)) {
df <- data.frame(sfs)
df$names <- 1:length(sfs)
gp <-
ggplot(df, aes(x = names, y = sfs)) +
geom_bar(stat = "identity") +
xlab("bin") +
ylab("Frequency")
print(gp)
} else {
cat(report(
"The sfs has more than 2 dimensions, therefore no plot is returned\n"
))
}
}
# Optionally save the plot ---------------------
if (!is.null(plot.file)) {
tmp <- utils.plot.save(gp,
dir = plot.dir,
file = plot.file,
verbose = verbose
)
}
# FLAG SCRIPT END
if (verbose >= 1) {
cat(report("Completed:", funname, "\n"))
}
return(sfs)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.