#' The guts for simulating & analysing datasets that are biased by over-dispersion in N (Scenario 2)
#'
#' It's recommended to use "simulation_function_N" instead of this one as
#' "simulation_function_N" has better error handling (hopefully).
#' This function is one of the master functions in this package. It simulates and
#' analyzes a single dataset that is over-dispersed in true abundance, N.
#' To do this many times, use an apply function. It's currently simplified and
#' assumes constant abundance, detection, transect length, NO simulated goodness-of-fit
#' metrics. It simulates both point count and distance data and analyses both datasets
#' using both unmarked and optim.
#'
#' @param n_sites number of sites (transects)
#' @param n_samps number of samples (replicates) per site
#' @param lambda1 partial mean abundance at every site (single draw per site that stays constant across samples)
#' @param lambda2 partial mean abundance at every replicate (new draw for every sample at every site)
#' @param det_prob mean detection probability
#' @param sigma detection parameter (meters). Just leave this blank and it will be calculated from det_prob
#' @param W transect half-width (meters)
#' @param reps_to_analyze the number of samples/replicates to analyze. If NA, it will analyse all replicates in the data.
#' @param return What to return from the function call. Currently the only option is 'results'. May change this to only analyze simulated goodness-of-fit metrics.
#' @param savefilename The simulated datasets and results ARE saved to file (currently not optional). This provides the path and filename for saving the intermediate steps in the analysis.
#'
#' @return if everything works well, it returns a data.frame with the results of simulating a single dataset, analyzing it in 4 ways, and calculating randomized quantile residuals a la Knape et al. 2018. It also saves a list with the simulated dataset, dataframe of results (minus rqr residual info), and the actual rqr residuals to a savefilename inside the folder path 'working directory'/results/Scenario 2/savefilename. If there is an error, the function returns NA and also saves a file to 'working directory'/Scenario 2/set x/errors with the simulated dataset and the results data.frame but no rq-residuals (it's basically assumed that the rq-residuals were the source of the error.) Similarly, with a warning the function returns the results data.frame and also saves a file to 'working directory'/Scenario 2/set x/warnings with the simulated dataset and the results data.frame but no rq-residuals (it's basically assumed that the rq-residuals were the source of the error.) The user will have to go back and try to calculate rq-residuals from the output later.
#'
#' @examples
#' base_simulation_function_N()
#'
#' @import tidyverse
#' @import nortest
#'
#' @export
base_simulation_function_N = function(n_sites = 50, # number of sites
n_samps = 6, # number of samples per site
lambda1 = 5,
lambda2 = 5,
det_prob = 0.42, # mean detection probability
sigma = NA, # detection parameter, calculated from det_prob
W = 20, # transect half-width
reps_to_analyze = 3, # number of replicate surveys at each location/site
# sampling_method = c('distance'),
# analysis_method = c('optim'),
# simulate_gof_pvals = T,
# simulate_gof_sims = 5,
# simulate_gof_parallel = T,
return = 'results',
savefilename = file.path('set 1', 'datasets', 'data')) {
# wrap everything in a tryCatch call
out <- tryCatch(
{
# simulate new data
simdat <- sim_data_N(n_sites = n_sites,
n_samps = n_samps,
lambda1 = lambda1,
lambda2 = lambda2,
det_prob = det_prob,
sigma = sigma,
W = W)
# analyze the simulated data
resL <- analyse_data(simulated_data = simdat,
reps_to_analyze = reps_to_analyze,
sampling_method = c('distance', 'pointcount'),
analysis_method = c('optim', 'unmarked'),
simulate_gof_pvals = FALSE,
return = return,
W = W)
pco <- resL$inputs$include_pc_optim
# get the results data.frame
res <- resL[['df']]
# calculate several types of residuals & save everything
{
# Calculate randomized-quantile residuals
# first, rename some parameters for convenience
{
K <- round(max(simdat$true_N)) * 10
lam_od <- res$lambda_est[which(res$LL_method == 'optim' &
res$sampling_method == 'Distance')]
lam_op <- res$lambda_est[which(res$LL_method == 'optim' &
res$sampling_method == 'pointcount')]
lam_ud <- res$lambda_est[which(res$LL_method == 'unmarked' &
res$sampling_method == 'Distance')]
lam_up <- res$lambda_est[which(res$LL_method == 'unmarked' &
res$sampling_method == 'pointcount')]
sig_od <- res$sigma_est[which(res$LL_method == 'optim' &
res$sampling_method == 'Distance')]
sig_op <- res$sigma_est[which(res$LL_method == 'optim' &
res$sampling_method == 'pointcount')]
sig_ud <- res$sigma_est[which(res$LL_method == 'unmarked' &
res$sampling_method == 'Distance')]
sig_up <- res$sigma_est[which(res$LL_method == 'unmarked' &
res$sampling_method == 'pointcount')]
detp_od <- res$det_p_est[which(res$LL_method == 'optim' &
res$sampling_method == 'Distance')]
detp_op <- res$det_p_est[which(res$LL_method == 'optim' &
res$sampling_method == 'pointcount')]
detp_ud <- res$det_p_est[which(res$LL_method == 'unmarked' &
res$sampling_method == 'Distance')]
detp_up <- res$det_p_est[which(res$LL_method == 'unmarked' &
res$sampling_method == 'pointcount')]
}
# residuals for optim & distance sampling
{
rqrM_od <- rqResMar_optim(n_obs = simdat$n_obs,
lambda_est = lam_od)
rqrS_od <- rqResSum_optim(n_obs = simdat$n_obs,
K = K,
p_est = detp_od,
lambda_est = lam_od)
# observation residuals are throwing some errors occassionally. I think only when sample sizes at a site are 0 and hence inadequate for estimating N distributions.
# But anyway, I'm going to add in a trycatch to hopefully get rid of the error.
rqrO_od <- tryCatch(expr = {
rqResObs_optim(n_obs = simdat$n_obs,
p_est = detp_od,
K = K,
lambda_est = lam_od,
sigma_est = sig_od,
W = W,
y_list = simdat[['y_list']],
sampling_method = 'distance')
},
error = function(cond) {
print(cond)
return(NA)
})
}
# residuals for optim & point count sampling
if(pco){
rqrM_op <- rqResMar_optim(n_obs = simdat$n_obs,
lambda_est = lam_op)
rqrS_op <- rqResSum_optim(n_obs = simdat$n_obs,
K = K,
p_est = detp_op,
lambda_est = lam_op)
# observation residuals are throwing some errors occassionally. I think only when sample sizes at a site are 0 and hence inadequate for estimating N distributions.
# But anyway, I'm going to add in a trycatch to hopefully get rid of the error.
rqrO_op <- tryCatch(expr = {
rqResObs_optim(n_obs = simdat$n_obs,
p_est = detp_op,
K = K,
lambda_est = lam_op,
sigma_est = sig_op,
W = W,
y_list = simdat[['y_list']],
sampling_method = 'point count')
},
error = function(cond) {
print(cond)
return(NA)
})
}
# residuals for unmarked & distance sampling
{
rqrM_ud <- rqResMar_dist(umFit = resL$unmarked_dist)
rqrS_ud <- rqResSum_dist(umFit = resL$unmarked_dist)
rqrO_ud <- tryCatch(expr = {
rqResObs_dist(umFit = resL$unmarked_dist)
},
error = function(cond) {
print(cond)
return(NA)
})
}
# residuals for unmarked & point count sampling
{
rqrM_up <- rqResMar(umFit = resL$unmarked_pc)
rqrS_up <- rqResSum(umFit = resL$unmarked_pc)
rqrO_up <- tryCatch(expr = {
rqResObs(umFit = resL$unmarked_pc)
},
error = function(cond) {
print(cond)
return(NA)
})
}
res2 <- cbind(res,
'n_sites' = rep(n_sites, nrow(res)),
'n_samps' = rep(n_samps, nrow(res)),
'lambda_total' = rep(lambda1 + lambda2, nrow(res)),
'lambda1' = rep(lambda1, nrow(res)),
'lambda2' = rep(lambda2, nrow(res)),
'rho' = rep(lambda1 / (lambda1 + lambda2), nrow(res)),
'sigma_true' = rep(find_sigma(W = W, p_true = det_prob), nrow(res)))
# save the simulated data & residuals for later
# (saving simulated data and analysed objects so that I can calculate c-hat values later)
savd <- list(simulated_data = simdat,
analyzed_data = resL,
rqrM_optim_distance = rqrM_od,
rqrS_optim_distance = rqrS_od,
rqrO_optim_distance = rqrO_od,
rqrM_optim_pointcount = ifelse(test = pco, yes = rqrM_op, no = NA),
rqrS_optim_pointcount = ifelse(test = pco, yes = rqrS_op, no = NA),
rqrO_optim_pointcount = ifelse(test = pco, yes = rqrO_op, no = NA),
rqrM_unmarked_distance = rqrM_ud,
rqrS_unmarked_distance = rqrS_ud,
rqrO_unmarked_distance = rqrO_ud,
rqrM_unmarked_pointcount = rqrM_up,
rqrS_unmarked_pointcount = rqrS_up,
rqrO_unmarked_pointcount = rqrO_up)
savd[['analyzed_data']][['res']] <- res2
# make sure the directly exists
foldername <- file.path(getwd(),
'results',
'Scenario 2',
dirname(savefilename))
filename <- paste0(basename(savefilename),
'_',
gsub(pattern = ' ', replacement = '_',
x = gsub(pattern = ':',
replacement = '',
x = Sys.time())),
'_',
paste(Sys.info()[['nodename']], Sys.getpid(), sep='_'),
'.RDS')
dir.create(path = foldername, recursive = TRUE, showWarnings = F)
saveRDS(object = savd,
file = do.call(what = file.path,
args = list(foldername,
filename)))
}
# calculate pesky p-values
{
resid_pvals <- data.frame('rqres_Mar_pval_shapiro_wilks' = rep(NA,4),
'rqres_Mar_pval_anderson_darling' = rep(NA,4),
'rqres_Mar_pval_kolmogorov_Smirnov' = rep(NA,4),
'rqres_SiteSum_pval_shapiro_wilks' = rep(NA,4),
'rqres_SiteSum_pval_anderson_darling' = rep(NA,4),
'rqres_SiteSum_pval_kolmogorov_Smirnov' = rep(NA,4),
'rqres_Obs_pval_shapiro_wilks' = rep(NA,4),
'rqres_Obs_pval_anderson_darling' = rep(NA,4),
'rqres_Obs_pval_kolmogorov_Smirnov' = rep(NA,4))
# shapiro-wilks, anderson-darling, and Lilliefors (Kolmogorov-Smirnov) tests of normality
resid_pvals[1,] <- c(normtest(rqrM_od[,1], 's1_M_pval'),
normtest(rqrS_od, 's1_S_pval'),
if(length(rqrO_od)==1) rep(rqrO_od, length(resid_pvals[1,])) else normtest(rqrO_od, 's1_O_pval'))
# shapiro-wilks, anderson-darling, and Lilliefors (Kolmogorov-Smirnov) tests of normality
if(pco){
resid_pvals[2,] <- c(normtest(rqrM_op[,1], 's2_M_pval'),
normtest(rqrS_op, 's2_S_pval'),
if(length(rqrO_op) == 1) rep(rqrO_op, length(resid_pvals[2,])) else normtest(rqrO_op, 's2_O_pval'))
}
# shapiro-wilks, anderson-darling, and Lilliefors (Kolmogorov-Smirnov) tests of normality
resid_pvals[3,] <- c(normtest(rqrM_ud[,1], 's3_M_pval'),
normtest(rqrS_ud, 's3_S_pval'),
if(length(rqrO_ud) == 1) rep(rqrO_ud, length(resid_pvals[3,])) else normtest(rqrO_ud, 's3_O_pval'))
# shapiro-wilks, anderson-darling, and Lilliefors (Kolmogorov-Smirnov) tests of normality
resid_pvals[4,] <- c(normtest(rqrM_up[,1], 's4_M_pval'),
normtest(rqrS_up, 's4_S_pval'),
if(length(rqrO_up) == 1) rep(rqrO_up, length(resid_pvals[4,])) else normtest(rqrO_up, 's4_O_pval'))
# remove the point-count optim p-values if that analysis method isn't in use.
if(! pco) resid_pvals <- resid_pvals[-2,]
}
# stuff to return
cbind(res2,
resid_pvals)
},
error=function(cond) {
folders <- c(getwd(),
'results',
'Scenario 2',
dirname(dirname(savefilename)),
'errors')
foldername <- do.call('file.path', as.list(folders))
dir.create(foldername, recursive = T, showWarnings = FALSE)
saveRDS(object = list(message = cond,
simdat = simdat,
results = resL,
n_obs = simdat$n_obs,
lambda_est_op = lam_op),
file = file.path(foldername,
paste0(gsub(pattern = ' ', replacement = '_',
x = gsub(pattern = ':',
replacement = '',
x = Sys.time())),
paste(Sys.info()[['nodename']], Sys.getpid(), sep='_'),
'.RDS')))
# Choose a return value in case of error
return(NA)
},
warning=function(cond) {
folders <- c(getwd(),
'results',
'Scenario 2',
dirname(dirname(savefilename)),
'warnings')
foldername <- do.call('file.path', as.list(folders))
dir.create(foldername, recursive = T, showWarnings = FALSE)
saveRDS(object = list(message = cond,
simdat = simdat,
results = resL,
n_obs = simdat$n_obs,
lambda_est_op = lam_op),
file = file.path(foldername,
paste0(gsub(pattern = ' ', replacement = '_',
x = gsub(pattern = ':',
replacement = '',
x = Sys.time())),
paste(Sys.info()[['nodename']], Sys.getpid(), sep='_'),
'.RDS')))
# Choose a return value in case of warning
return(res2)
}
)
return(out)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.