#' @title Distance of observed taxa versus sample distribution via ABC-rejection
#'
#' @param gradient A numeric vector representing the gradient along which the taxa has been observed (e.g. concentration of nutrients).
#' @param taxa A character vector with the names of all taxa needs to be the same length at the gradient.
#' @param dist Assumed distribution for the data generating model. This can either be a lognormal (lnrom; default) or gamma (gamma) distribution.
#' @param pr1dist An argument that indicates what prior the location parameter takes "lnorm", "gamma" or "norm".
#' for the gamma distribution this is not the scale but rate parameter, defaulting "lnorm" to scale 1.
#' @param par1 An numeric argument that indicates which indicates what the scale parameter should be.
#' @param pr2dist An argument that indicates what prior the scale parameter takes for "lnorm", "gamma" or "norm".
#' for the gamma distribution this is not the scale but rate parameter, defaulting "gamma" with rate 1.
#' @param par2 An numeric argument that indicates which indicates what the scale parameter should be.
#' @param qtol The value for the quantile tolerance. A lower value selects simulated parameters that fall closer to the observed parameters. Default is 0.01 and indicates that the closest 1 percent is accepted and the rest rejected.
#' @param nsim Number of simulations used.
#' @param xlab An argument representing the name of the gradient.
#' @param size Size of the points in plot "A". Default is 6 but might be to large.
#' @param print.progress Prints the progress of the number of simulations completed (length of the priors). By default is TRUE
#' @param timing Prints the time that was needed to complete the simulations. By default is TRUE.
#' @param penalty The argument indicates whether the intervals of D and SP need to be adjusted based on the observed shift in loc linear adjustment of the distributional parameters.
#'
#' @details The function returns a list of 4 objects: a data frame with the Taxa with D an S, a data frame with S for all taxa,
#' a plotted figure (ggplot2) and a data frame with the simulate, priors and distance of the accepted values of each taxa.
#'
#' @export Dfunabc
#'
#' @examples
#' \dontrun{
#' library(readr)
#' df <- read_csv(url("https://raw.githubusercontent.com/snwikaij/Data/main/Aquatic_Botany_Kaijser_et_al._2019.csv"))
#'
#' #Place Cl (Median) in a vector
#' gradient = df$Mediaan
#'
#' #Place taxon names in a vector
#' taxa = df$Soort
#'
#' #Simulate model (may take up to 5-10 min)
#' results <- Dfunabc(df$Mediaan, df$Soort,
#' dist = "lnorm", pr1dist = "lnorm", par1 = 0.5,
#' nsim = 200000, qtol= 0.005, size=4)
#'
#' #Display the results
#' results$Plot}
Dfunabc <- function(gradient, taxa,
dist ="lnorm",
pr1dist = "lnorm", par1 = 1,
pr2dist = "gamma", par2 = 1,
nsim = 200000, qtol= 0.005,
D_min = 0, xlab = "Gradient", size = 6,
print.progress = T, timing = T,
penalty = F){
#Start timing
if(timing == T){stime <- Sys.time()}
if(qtol <= 0){stop("Quantile tolerance cannot be <= 0")}
if(qtol >= 0.05){warning("Quantile tolerance >= 0.05 is considered `high`")}
if(nsim <= 0){stop("Number of simulation cannot be <=0")}
if(nsim <= 50000){warning("Number of simulations <= 50000 is considered 'low")}
if(D_min < 0 | D_min >= 1){stop("Minimal D cannot be negative or higher than 1")}
if(D_min > 0){warning(paste("The mimimal D included in calculation of the mean DP for positive and negative
is set to", D_min,".", "If this was intentional the this warning can be ignored."))}
#Combine environmental gradient and character vector
df <- data.frame(gradient=gradient, Taxa=as.character(taxa))
df <- na.omit(df)
df <- df[!df$Taxa %in% names(table(df$Taxa)[table(df$Taxa) == 1]),]
#ABC-rejection function with quantile tolerance qtol
spqtol <- function(gr, pr1dist, par1, pr2dist, par2, dist, nsim, qtol){
llists <- list()
priorpar1 <- rep(NA, nsim)
priorpar2 <- rep(NA, nsim)
simpar1 <- rep(NA, nsim)
simpar2 <- rep(NA, nsim)
n <- length(gr)
obs <- c(mean(gr), sd(gr))
lognorm <- function(mu, si){
meanlog <- log(mu)-0.5*log((si/mu)^2+1)
sdlog <- sqrt(log((si/mu)^2+1))
return(c(meanlog, sdlog))}
gamma <- function(mu, si){
scale <- si^2/mu
shape <- mu/scale
return(c(shape, scale))}
for (p in 1:nsim){
if(pr1dist == "lnorm"){
par <- lognorm(obs[1], obs[2])
prior1 <- rlnorm(1, par[1], par1)}
else if(pr1dist == "gamma"){
prior1 <- rgamma(1, shape = obs[1], rate = par1)}
else if(pr1dist == "norm"){
prior1 <- rnorm(1, obs[1], par1)}
else{stop("Select `lnorm`, `gamma` or `norm` for pr1dist")}
if(pr2dist == "lnorm"){
par <- lognorm(obs[1], obs[2])
prior2 <- rlnorm(1, par[2], par2)}
else if(pr2dist == "gamma"){
prior2 <- rgamma(1, shape = obs[1], rate = par2)}
else if(pr2dist == "norm"){
prior2 <- rnorm(1, obs[1], obs[2])}
else{stop("Select `lnorm`, `gamma` or `norm` for pr2dist")}
if(dist == "lnorm"){
par <- lognorm(prior1, prior2)
simsamp <- rlnorm(n, par[1], par[2])}
else if(dist == "gamma"){
par <- gamma(prior1, prior2)
simsamp <- rgamma(n, shape = par[1], scale = par[2])}
else if(dist == "norm"){
simsamp <- rnorm(n, prior1, prior2)}
else{stop("Select `lnorm` or `gamma` or `norm` for the data generating model")}
llists[[p]] <- simsamp
simpar1[p] <- mean(simsamp)
simpar2[p] <- sd(simsamp)
priorpar1[p] <- prior1
priorpar2[p] <- prior2
}
#Statistics
statistics <- data.frame(simulate1=simpar1, simulate2=simpar2, prior1=priorpar1, prior2=priorpar2)
#Distance (Epsilon)
statistics$distance <- rowSums(sqrt(sweep(statistics[,1:2], 2, obs)^2))
#Select closest quantile qtol
parameters <- statistics[order(statistics$distance),][1:c(nsim*qtol),]
parameters <- cbind.data.frame(as.data.frame(matrix(ncol=2, nrow=nrow(parameters))), parameters)
colnames(parameters)[1:2] <- paste0(rep("LMadjusted",2),1:2)
nr <- 1
for(c in 1:2){
si <- c+2
pr <- c+4
parameters[,nr] <- mean(parameters[,pr])+resid(lm(parameters[,pr]~parameters[,si]))
nr <-nr+1}
allsim <- list()
for(l in rownames(parameters)){
allsim[[l]] <- llists[[as.numeric(l)]]}
allgr <- do.call(cbind, allsim)
allgr <- tidyr::gather(as.data.frame(allgr))
allgr$key <- plyr::mapvalues(allgr$key, from=unique(allgr$key), to=1:length(unique(allgr$key)))
return(list(simulates=allgr, parameters=parameters))}
#Cumulative normalization function
cus_fun <- function(z){
z[,2] <- as.numeric(z[,2])
z <- z[order(z[,2]),]
z$V3 <- cumsum(1:nrow(z))/max(cumsum(1:nrow(z)))
return(z)}
looplist <- list()
#Split dataset of species
splitdf <- split(df[,1], df[,2])
w <- sapply(splitdf, as.numeric)
y <- list()
t <- list()
print("Task 1/2: Applying ABC to taxa")
print("")
#Start the ABC loop
for(i in names(w)){
if(print.progress == T){print(i)}
k <- spqtol(w[[i]], pr1dist=pr1dist, par1=par1, pr2dist=pr2dist, par2=par2, dist=dist, nsim=nsim, qtol=qtol)
y[[i]] <- cbind(i, k[[1]])
t[[i]] <- k[[2]]}
#Number of accepted simulates
acc <- round(nsim*qtol)
looplist <- list()
print("")
print(paste("Task 2/2: Comparing", acc, "curves"))
#Start comparison of curves
for(i in 1:acc){
if(print.progress == T){print(i)}
s <- lapply(y, function(y) y[y$key==i,])
z <- do.call(rbind, s)
z <- z[-2]
r <- cus_fun(z)
colnames(r) <- paste0(rep("V", 3),1:3)
splitdfspec <- split(r, r$V1)
q <- lapply(splitdfspec, function(x) cbind(x[,1:2], cumsum(1:dim(x)[1])/max(cumsum(1:dim(x)[1]))))
lpdf <- data.frame(Taxa=rep(NA, length(q)), Distance=rep(NA, length(q)), Split=rep(NA, length(q)))
count <- 0
for(l in names(q)){
count <- count+1
s <- q[[l]]
rownames(s) <-NULL
n <- r[r$V1==l,]
Dl <- n$V3-s[,3]
m <- abs(Dl)
D <- max(m)
lpdf[count,] <- cbind(l, D*sign(Dl[which.max(m)]), s$V2[which.max(m)])}
looplist[[i]] <- lpdf}
#Combine dataset and extract quantiles
f <- do.call(rbind, looplist)
res <- aggregate(data=f, .~Taxa, function(x) c(mean(as.numeric(x)), quantile(as.numeric(x), c(.025, 0.975))))
results <- cbind.data.frame(res$Taxa, res$Distance, res$Split)
colnames(results) <- c("Taxa", "D", "D1", "D2", "S", "S1", "S2")
results <- cbind.data.frame(results[c("Taxa")], apply(results[-1], 2, as.numeric))
results <- cbind.data.frame(results, do.call(rbind, tapply(df$gradient, df$Taxa, quantile)))
#Create pos and neg deviance points
if(D_min > 0){cutD <- D_min}else{cutD <- 0}
pos <- as.numeric(f$Split[f$Taxa %in% results$Taxa[results$D >= cutD]])
neg <- as.numeric(f$Split[f$Taxa %in% results$Taxa[results$D < -cutD]])
#Create data of grouping
pr <- df$gradient[df$Taxa %in% results$Taxa[results$D >= cutD]]
nr <- df$gradient[df$Taxa %in% results$Taxa[results$D < -cutD]]
grouping <- data.frame(response=c(rep("Negative", length(nr)), rep("Positive", length(pr))), values=c(nr, pr))
#Extract quantiles of all species and group
d <- rbind.data.frame(quantile(neg, c(.025, .975)), c(quantile(pos, c(.025, .975))))
d <- setNames(cbind(c("Negative", "Positive"), c(mean(neg), mean(pos)), d), c("Group", "S", "S1", "S2"))
rownames(d) <- NULL
d$Group <- factor(d$Group, levels = c("Positive", "Negative"))
#Apply penalty although unsubstantiated by evidence
if(penalty == T){
penalty1 <- as.data.frame(t(sapply(t, function(x) quantile(x$prior1, c(.025, 0.975))/quantile(x$LMadjusted1, c(.025, 0.975))))*t(sapply(t, function(x) quantile(x$prior2, c(.025, 0.975))/quantile(x$LMadjusted2, c(.025, 0.975)))))
results$S1 <- results$S1/penalty1[,1]
results$S2 <- results$S2*penalty1[,2]
results$D1 <- results$D1/penalty1[,1]
results$D2 <- results$D2*penalty1[,2]
d$S1 <- d$S1/mean(penalty1[,1])
d$S2 <- d$S2/mean(penalty1[,2])}
#Plot all species
pl1 <- ggplot(results, aes(x=S, y=reorder(Taxa, -S)))+
ylab("")+xlab("")+
geom_errorbarh(aes(xmin = S1, xmax = S2, height = 0))+
geom_point(aes(fill = results$D), fill = ifelse(results$D > 0, "white", "black"), pch=21, size = abs(results$D)*size)+
theme_classic()+
theme(axis.line = element_line(colour = "black"),
axis.text.y = element_text(colour = "black"),
axis.title.y = element_text(colour = "black"),
legend.position = "none")
#Plot only positive and negative response
pl2 <- ggplot(d, aes(x=S, y=Group))+
ylab("")+xlab(xlab)+
xlim(ggplot_build(pl1)$layout$panel_scales_x[[1]]$range$range)+
geom_errorbarh(aes(xmin = S1, xmax = S2, height = 0))+
geom_point(fill = c("black", "white"), pch=21, size=6)+
theme_classic()+
theme(axis.line = element_line(colour = "black"),
axis.text.y = element_text(colour = "black"),
axis.title.y = element_text(colour = "black"),
legend.position = "none")
plot <- cowplot::plot_grid(pl1,pl2,labels = "AUTO", align = "v", ncol=1,
rel_heights = c(1.6, 0.4))
#Stop timing
if(timing == T){etime <- Sys.time()
print(etime-stime)}
return(list(Taxa=results, Binary=d, Plot=plot, parameters=t, distance=do.call(rbind,t)[,7], grouping=grouping))}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.