Nothing
#' Sampling Simulated Data and Estimation of Multivariate Standard Errors
#'
#' For each simulated data set, this function performs repeated sampling across a range of effort levels
#' and estimates the corresponding MultSE (pseudo-multivariate standard error) using dissimilarity-based methods.
#'
#' @param dat.sim A list of simulated data sets generated by \code{\link{simdata}}.
#' @param Par A list of parameters estimated by \code{\link{assempar}}.
#' @param transformation Mathematical transformation to reduce the influence of dominant species: one of "square root", "fourth root", "Log (X+1)", "P/A", or "none".
#' @param method Dissimilarity metric to use, passed to \code{\link[vegan]{vegdist}} (e.g., "bray", "jaccard", "gower").
#' @param n Maximum number of sampling units per site (must be <= total units available).
#' @param m Maximum number of sites to sample per data set (must be <= total number of sites).
#' @param k Number of repetitions of each sampling configuration (samples × sites) for each data set.
#'
#' @details
#' For multi-site simulations, the function selects subsets of sites (from 2 to \code{m}) and then draws \code{n} samples per site
#' using a two-stage sampling method with inclusion probabilities (Tillé, 2006). For single-site simulations, repeated samples of size
#' 2 to \code{n} are taken without replacement.
#'
#' Each sample undergoes the selected transformation and a dissimilarity matrix is computed.
#' MultSE is estimated using:
#' \itemize{
#' \item \emph{Single site:} pseudo-variance, with \eqn{MultSE = \sqrt(V/n)}
#' \item \emph{Multiple sites:} mean squares from a PERMANOVA model (residual and site effects)
#' }
#' This procedure is computationally intensive, especially with large \code{k}. Start with low values for exploration.
#'
#' @return A matrix containing the estimated MultSE values for each simulated data set, sampling effort combination,
#' and repetition. This matrix is used by \code{\link{summary_ssp}}.
#'
#' @note
#' For quick exploratory analysis, use small \code{k}. Once optimal sampling effort is explored,
#' rerun with larger \code{k} (e.g. 100). Computation time will increase accordingly.
#'
#' @references
#' Anderson, M. J., & Santana-Garcon, J. (2015). Measures of precision for dissimilarity-based multivariate analysis of ecological communities. Ecology Letters, 18(1), 66–73.
#'
#' Guerra-Castro, E. J., Cajas, J. C., Simoes, N., Cruz-Motta, J. J., & Mascaro, M. (2021). SSP: An R package to estimate sampling effort in studies of ecological communities. Ecography, 44(4), 561–573. \doi{10.1111/ecog.05284}
#'
#' Tillé, Y. (2006). Sampling Algorithms. Springer, New York.
#'
#' @seealso \code{\link{assempar}}, \code{\link{simdata}}, \code{\link{summary_ssp}}, \code{\link[vegan]{vegdist}}
#'
#' @examples
#' ## Single site example
#' data(micromollusk)
#' par.mic <- assempar(data = micromollusk, type = "P/A", Sest.method = "average")
#' sim.mic <- simdata(par.mic, cases = 3, N = 20, sites = 1)
#' sam.mic <- sampsd(dat.sim = sim.mic, Par = par.mic, transformation = "P/A",
#' method = "jaccard", n = 10, m = 1, k = 3)
#'
#' ## Multiple site example
#' data(sponges)
#' par.spo <- assempar(data = sponges, type = "counts", Sest.method = "average")
#' sim.spo <- simdata(par.spo, cases = 3, N = 20, sites = 3)
#' sam.spo <- sampsd(dat.sim = sim.spo, Par = par.spo, transformation = "square root",
#' method = "bray", n = 10, m = 3, k = 3)
#'
#' @importFrom vegan vegdist
#' @importFrom stats model.matrix
#' @importFrom sampling balancedtwostage
#'
#' @export
sampsd <- function(dat.sim, Par, transformation, method, n, m, k) {
names(dat.sim) <- sprintf("%i", 1:length(dat.sim))
N<-max(dat.sim[[1]][,'N'])
sites<-max(as.numeric(dat.sim[[1]][,'sites']))
if (sites == 1) {
multi.site = FALSE
} else {multi.site = TRUE}
if (n > N){
stop("'n' must be equal or less than 'N'")}
if (m > sites){
stop("'m' must be equal or less than 'sites'")}
if (multi.site == TRUE) {
# Function to estimate the multivariate standard errors (multiple sites and sample replicates)
model.MSE <- function(D, y) {
D = as.matrix(D)
N = dim(D)[1]
g = length(levels(factor(y$sites)))
X = model.matrix(~factor(y$sites)) #model matrix
H = X %*% solve(t(X) %*% X) %*% t(X) #Hat matrix
I = diag(N) #Identity matrix
A = -0.5 * D^2
G = A - apply(A, 1, mean) %o% rep(1, N) - rep(1, N) %o% apply(A, 2, mean) + mean(A)
MS1 = sum(G * t(H))/(g - 1) #Mean square of sites
MS2 = sum(G * t(I - H))/(N - g) #Mean square of residuals
#Components of variation and MultSE for each source of variation
if (MS1 > MS2) {
CV1 = (MS1 - MS2)/(N/g)
} else {
CV1 = 0
}
MSE1 = sqrt(CV1/g)
MSE2 = sqrt(MS2/(N/g))
return(c(MSE1, MSE2))
}
#Function to sample and estimate MultSE
samp<-function (X = NULL){
#Matrix to store the results
n <- seq(2, n, by = 1)
m <- seq(2, m, by = 1)
mse.results <- matrix(nrow = length(n) * length(m) * k, ncol = 6)
mse.results[, 2] <- base::rep(1:k, times = length(n) * length(m))
mse.results[, 3] <- base::rep(m, times = length(n), each = k)
mse.results[, 4] <- base::rep(n, times = 1, each = length(m) * k)
colnames(mse.results) <- c("dat.sim", "k", "m", "n", "MSE.sites", "MSE.n")
#Objects needed for loop in samp
Y <- cbind(1:(N * sites))
YPU <- as.numeric(as.vector(gl(sites, N)))
NN<-nrow(mse.results)
mm<-mse.results[,3]
nn <- rep(NA, NN)
for (i in seq_len(NN)){
nn[i] <- mse.results[i, 3] * mse.results[i, 4]
}
Par<-Par
Sest<-Par[[3]]
#Sampling and estimation of MultSE for each sampling size
for (i in seq_len(NN)){
sel <- balancedtwostage(Y, selection = 1, m = mm[i], n = nn[i], PU = YPU, FALSE)
#replace incorrect probabilities (p < 0 and p > 1) produced by balancetwostage
sel[sel[,1]<= -1, 1] <- 0
sel[sel[,1]>= 2, 1] <- 1
#getting data
dat.df<-X
rownames(sel) <- c(1:nrow(dat.df))
m<-sel[, 1]
y <- dat.df[m==1,]
dat <- y[ , 1:Sest]
dat$dummy <- 1 #dummy constant
dat<-as.matrix(dat)
#Transformation of and estimatation of a dissimilarity matrix "D"
if (transformation == "square root") {
dat.t <- sqrt(dat)
rm(dat)
D <- vegdist(dat.t, method = method)
}
if (transformation == "fourth root") {
dat.t <- sqrt(sqrt(dat))
rm(dat)
D <- vegdist(dat.t, method = method)
}
if (transformation == "Log (X+1)") {
dat.t <- log(dat + 1)
rm(dat)
D <- vegdist(dat.t, method = method)
}
if (transformation == "P/A") {
dat.t <- 1 * (dat > 0)
rm(dat)
D <- vegdist(dat.t, method = method, binary = TRUE)
}
if (transformation == "none") {
dat.t <- dat
rm(dat)
D <- vegdist(dat.t, method = method)
}
#estimation of MultSE for each source of variation
mse <- tryCatch(model.MSE(D = D, y = y), error = function(e) {return(c(NA, NA))})
#Store results
mse.results[i,5] <- mse[1]
mse.results[i,6] <- mse[2]
}
return(mse.results)
}#end of samp
#Apply samp to simulated data
results<-lapply(dat.sim, samp)
results<-do.call(rbind, results)
#Identity of each simulated data
n <- seq(2, n, by = 1)
m <- seq(2, m, by = 1)
results[, 1] <- base::rep(1:length(dat.sim), each = length(n) * length(m) * k)
return(results)
} # end of loop for multisites
if (multi.site == FALSE) {
# Function to estimate the multivariate standard error for a single site
mSE <- function(D) {
n = dim(as.matrix(D))
ss = sum(D^2)/n
v = ss/(n - 1)
x = sqrt(v/n)
return(x[1])
}
#Function to sample and estimate MultSE
samp<-function (X = NULL){
#Matrix to store the results
n <- seq(2, n, by = 1)
mse.results <- matrix(nrow = length(n) * k, ncol = 4)
mse.results[, 2] <- rep(1:k, times = length(n))
mse.results[, 3] <- rep(n, times = 1, each = k)
colnames(mse.results) <- c("dat.sim", "k", "n", "mSE")
#Objects needed for loop in samp
NN<-nrow(mse.results)
Par<-Par
Sest<-Par[[3]]
#estimation of MultSE for each sampling size
for (i in seq_len(NN)) {
y <- sample(nrow(X), mse.results[i,3])
dat<-X[y,1:Sest]
dat$dummy <- 1 #dummy constant
dat<-as.matrix(dat)
if (transformation == "square root") {
dat.t <- dat^0.5
rm(dat)
D <- vegdist(dat.t, method = method)
}
if (transformation == "fourth root") {
dat.t <- dat^0.25
rm(dat)
D <- vegdist(dat.t, method = method)
}
if (transformation == "Log (X+1)") {
dat.t <- log(dat + 1)
rm(dat)
D <- vegdist(dat.t, method = method)
}
if (transformation == "P/A") {
dat.t <- 1 * (dat > 0)
rm(dat)
D <- vegdist(dat.t, method = method, binary = TRUE)
}
if (transformation == "none") {
D <- vegdist(dat, method = method)
}
#estimation of MultSE
mse <- tryCatch(mSE(D = D), error = function(e) {
return(NA)
})
#Store results
mse.results[i, 4] <- mse
}
return(mse.results)
}
#Apply samp to simulated data
results<-lapply(dat.sim, samp)
results<-do.call(rbind, results)
#Identity of each simulated data
n <- seq(2, n, by = 1)
results[, 1] <- base::rep(1:length(dat.sim), each = length(n) * k)
return(results)
} # end of loop for a single site
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.