#' Probability mass distribution
#'
#' Calculate probability mass distribution for the proportion of ants from
#' the upper part of the nest among ants removed from outside the nest
#'
#' @param removed_1 The number of marked ants from the upper part of the nest which
#' were removed
#' @param removed_2 The number of the marked ants from the lower part of the nest which
#' were removed
#' @param unmarked The number of the removed ants which had not been marked
#' @param marked_1 The total number of the marked ants from the upper part of the nest
#' @param marked_2 The total number of the marked ants from the lower part of the nest
#' @param N1 The total number of ants from the upper part of the nest
#' @param N2 The total number of ants from the lower part of the nest
#' @param ... Additional arguments to be omitted
#' @param null_model logical; if set to TRUE the probability mass distribution
#' for the null model is calculated; the null model assumes an equal probability of
#' being removed for an ant from the upper and lower part of the nest; if set to FALSE
#' the probability takes into account the numbers of marked ans which have been removed
#' from the outside of nest and the model assumes that a marked ant has the same
#' probability of being removed as an unmarked one.
#' @return A data frame with the all possible proportions of ants from the upper part
#' of the nest among all ants active outside the nest and the corresponding probabilities.
#' @examples
#' data(time_series_results)
#' data(colony_stats)
#' dataset <- dplyr::left_join(time_series_results,
#' dplyr::select(colony_stats, colony, N1, N2, marked_1, marked_2))
#' do.call(get_prob_mass, as.list(unlist(dataset[2,])))
#' @importFrom stats dhyper
#' @export
get_prob_mass <- function(removed_1, removed_2, unmarked, marked_1, marked_2,
N1, N2, ..., null_model = FALSE) {
removed_total <- removed_1 + removed_2 + unmarked
probabs <- numeric(0)
if(null_model)
ant_seq <- 0:removed_total
else
# find all possible numbers of removed ants from upper part of the nest (marked or not)
ant_seq <- max(removed_total - N2, removed_1):min(removed_total - removed_2, N1)
for (removed_upper_all in ant_seq){
if(null_model)
# calculate the probabilities of obtaining all possible numbers of marked
# ants from the upper part of the nest given the sample size and assuming that
# the probability is equal to group size divided by colony size
probab <- dhyper(removed_upper_all, N1, N2, removed_total)
else
# calculate posterior distribution using the Bayes rule
probab<-dhyper(removed_1, marked_1, N1-marked_1, removed_upper_all)*
dhyper(removed_2, marked_2, N2-marked_2, (removed_total - removed_upper_all))
probabs <- c(probabs, probab)
}
# normalize sum of the probabilities to 1
return(data.frame(prob = probabs/sum(probabs),
upper_part_proportion = ant_seq/removed_total))
}
#' Interval for the proportion of ants
#'
#' Calculate confidence/credible interval for a vector of probabilities deriving
#' from the probability mass distribution
#'
#' @param data Data frame returned by \code{get_prob_mass}
#' @param alpha Numeric value setting the maximum value for the type 1 error
#' @return A numeric vector with elements corresponding to lower limit, maximum
#' likelihood, and the upper limit for the proportion of ants coming from the
#' upper part of the nest.
#' @export
calculate_CI <- function(data, alpha){
if (!is.data.frame(data)) stop(paste0(sQuote("data"), " shoud be a data frame"))
if (any(is.na(data$prob))) stop("NA probability values")
if (round(sum(data$prob), 5) != 1) stop("Probability distribution should sum up to 1")
if (!is.numeric(alpha)) stop("Alpha should be numeric")
if (length(alpha) != 1) stop("Alpha shlould be of length one")
if (alpha > 1) stop("Alpha should not be greater than one")
max_prob_index <- which.max(data$prob)
r <- l <- max_prob_index
cumprob <- data$prob[max_prob_index]
while (round(cumprob, 4) < (1 - alpha)){
new_limits <- c(l-1, r+1)
new_limits <- new_limits[new_limits > 0 & new_limits <= nrow(data)]
selected_index <- new_limits[which.max(data$prob[new_limits])]
l <- min(l, selected_index)
r <- max(r, selected_index)
cumprob <- sum(data$prob[l:r])
}
c(lower_limit = data$upper_part_proportion[l],
maximum_likelihood = data$upper_part_proportion[max_prob_index],
upper_limit = data$upper_part_proportion[r])
}
#' Calculate credible interval
#'
#' Calculate credible interval for data obtained over the course of experiment
#'
#' This function uses \code{time_series_results} and \code{colony_stats} datasets to
#' calculate how confidence interval for the null hypothesis and credible interval for
#' actual data changes over the course of experiment; this issue was examined following
#' the suggestion of the reviewer
#' @param time_series_result A dataset with the results of the ant removal experiment;
#' retrieved by \code{data("time_series_result", package = "overwintering")}.
#' @param colony_stats A dataset containing numeric parameters describing colony state and
#' experiment preparation;retrieved by \code{data("colony_stats", package = "overwintering")}.
#' @return an updated version of of \code{time_series_results} data frame with CI values
#' added (long format)
#' @importFrom dplyr %>%
#' @export
track_CI_change <- function(time_series_result, colony_stats){
data("time_series_results", package = "overwintering", envir = environment())
data("colony_stats", package = "overwintering", envir = environment())
removal_experiment_results <- time_series_results %>% dplyr::left_join(select(colony_stats, N1, N2, marked_1, marked_2, colony))
removal_experiment_results <- removal_experiment_results %>% dplyr::mutate(N1b = N1 - round(Dead_unm*(N1/(N1+N2))) - Upper_part_dead,
N2 = N2 - round(Dead_unm*(N2/(N1+N2)))- Lower_part_dead, # adjust for dead ants
N1 = N1b) %>%
dplyr::mutate(marked_1 = marked_1 - Upper_part_dead, marked_2 = marked_2 - Lower_part_dead)
removal_experiment_results <- split(removal_experiment_results, removal_experiment_results$colony)
for (i in seq_along(removal_experiment_results)){
# calculate the number of days from the start of the measurements
removal_experiment_results[[i]]$duration <- as.numeric(removal_experiment_results[[i]]$Date - min(removal_experiment_results[[i]]$Date))
}
removal_experiment_results <- do.call(rbind,removal_experiment_results)
CI_limits <- pmap(removal_experiment_results, get_prob_mass) %>%
lapply(calculate_CI, alpha = 0.05) %>%
do.call(rbind, .) %>%
cbind(removal_experiment_results[,c("colony", "Date", "duration")]) %>%
dplyr::mutate(CI = "empirical")
CI_limits_null <- pmap(removal_experiment_results, get_prob_mass, null_model = TRUE) %>%
lapply(calculate_CI, alpha = 0.05) %>%
do.call(rbind, .) %>%
cbind(removal_experiment_results[,c("colony", "Date", "duration")]) %>%
dplyr::mutate(CI = "null")
return(rbind(CI_limits, CI_limits_null))
}
#' Sample probability distribution
#'
#' Take samples from the set with defined probabilities
#'
#' @param data data.frame returned by the \code{get_prob_mass}
#' @param sample_size int defines the number of draws from the probability
#' distribution
#' @return the values of the random variable whose distribution was sampled; in
#' this case these are the proportions of ants from the upper nest segment
#' @export
sample_distribution <- function(data, sample_size = 1e4){
stopifnot(is.data.frame(data),
all((c("prob", "upper_part_proportion") %in% colnames(data))))
draw_res <- apply(rmultinom(sample_size, 1, data$prob),2,as.logical)
apply(draw_res, 2, function(x) data$upper_part_proportion[x])
}
cond_sample <-function(x, permutation = TRUE){
if (!permutation) return(x)
sample(x)
}
#' Slopes based on random variables
#'
#' Calculate slopes of the linear regression of proportion of ants on time. The
#' proportion for each time point and colony is sampled from the distribution
#' returned by the \code{get_prob_mass} function.
#'
#' @param time_series_result A dataset with the results of the ant removal experiment;
#' retrieved by \code{data("time_series_result", package = "overwintering")}.
#' @param colony_stats A dataset containing numeric parameters describing colony state and
#' experiment preparation;retrieved by \code{data("colony_stats", package = "overwintering")}.
#' @param randomize A logical indicating whether slopes should be calculated using
#' permuted data, see below.
#' @param sample_size A numeric indicating the number of samples taken from the
#' distribution of the possible proportion of ants from the upper part of the nest
#' among all ants being active outside the nest, see below.
#' @details
#' This function uses \code{get_prob_mass} function on experiment data provided in the
#' \code{time_series_results} and \code{colony_stats} datasets to calculate the probability distribution
#' of the proportion of ants captured outside the nest which come from the upper
#' part of the nest. The inference is based on the numbers of marked ants among all
#' captured individuals with marking label indicating the nest segment (upper or lower).
#' The probability distributions, obtained separately for each colony and time point,
#' is sampled many times, producing multiple data vectors for linear regression. These
#' vectors can additionally be permuted to have the null distribution of regression
#' slopes.
#' @importFrom purrr pmap_dbl pmap
#' @importFrom dplyr %>%
calculate_regr_coeffs <- function(time_series_results,
colony_stats,
randomize = FALSE,
sample_size = 1e4) {
data_combined <- dplyr::left_join(time_series_results, dplyr::select(colony_stats, "N1", "N2", "marked_1", "marked_2", "colony"))
exp_data <- split(data_combined, data_combined$colony)
coeffs <- list()
for (i in 1:length(exp_data)){
# calculate the number of days from the start of the measurements
timing <- as.numeric(exp_data[[i]]$Date - min(exp_data[[i]]$Date))
coeffs[[i]] <- pmap(exp_data[[i]], get_prob_mass) %>%
lapply(sample_distribution, sample_size = sample_size) %>%
do.call(cbind, .) %>%
as.data.frame() %>%
pmap_dbl(~lm(cond_sample(c(...), permutation = randomize)~timing)$coeff[2])
names(coeffs)[i] <- as.character(exp_data[[i]]$colony[1])
}
coeffs
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.