#' @title Summarize bandings
#' @description Counts the number of bird banded by year. Grouping is done on the
#' *b.year* column.
#' @param df A banding dataframe
#' @return A summary dataframe with a new column named *n_banded* which contains
#' the number of banded birds by year
#' @rdname summarize_bandings
#' @export
summarize_bandings <- function(df) {
res = df %>%
group_by(b.year) %>%
summarise(n_banded = sum(count_of_birds))
return(res)
}
#' @title Summarize recoveries
#' @description Counts the number of recoveries of birds banded in the same year.
#' If the data is not grouped by bands, it will be grouped by age class
#' (*age_short* column), banding year(*b.year*) and recovery year (*r.corrected_year*).
#' @param df A recovery dataframe.
#' @param by_band Should the results be sorted by band type? Default: FALSE
#' @return The summarized dataframe
#' @rdname summarize_recoveries
#' @export
summarize_recoveries <- function(df, by_band = FALSE) {
by_groups <- (if (by_band) {
c("b.year", "add_info")
} else {
c("age_short", "b.year", "r.corrected_year")
})
res <- df %>% filter(b.year == r.corrected_year) %>%
group_by_at(by_groups) %>%
summarise(n_recoveries = n())
return(res)
}
#' @title Get direct recoveries
#' @description Get all direct recoveries by merging summaries of banding and
#' recovery data. This function takes raw datasets, filters them if needed,
#' summarizes them, merge them and calculates recovery rate with variance and
#' standard error.
#' @param banding_df A dataframe contaning banding data extracted from Gamebirds
#' and already cleaned.
#' @param recoveries_df A dataframe contaning recovery data extracted from Gamebirds
#' and already cleaned.
#' @param filters Filters to apply to the dataframes. Can contain database
#' specific filters. This argument will be used if the data is not filtered
#' and *banding_filters* or *recoveries_filters* are not provided, Default: NULL
#' @param banding_filters Filters to apply to the banding dataset, Default: NULL
#' @param recoveries_filters Filters to apply to the recoveries dataset, Default: NULL
#' @param filtered Are the datasets already filtered?, Default: FALSE
#' @return A dataframe containing summarized banding and recoveries data with
#' a recovery rate, variance and standard error.
#' @details The species for which the data is compiled is stored in the SPEC
#' attribute of the returned dataframe
#' @rdname get_direct_recoveries
#' @seealso \code{\link{clean_dataset}}, \code{\link{summarize_bandings}},
#' \code{\link{summarize_recoveries}}
#' @export
get_direct_recoveries <-
function(banding_df,
recoveries_df,
filters = NULL,
banding_filters = NULL,
recoveries_filters = NULL,
filtered=FALSE) {
if (!filtered) {
b_filters <-
if (is.null(banding_filters))
filters
else
banding_filters
banding_db <-
filter_database(db = banding_df,
filters = b_filters,
db_type = "b")
r_filters <-
if (is.null(recoveries_filters))
filters
else
recoveries_filters
rec_db <-
filter_database(db = recoveries_df,
filters = filters,
db_type = "r")
SPEC = b_filters$SPEC
} else {
banding_db <- banding_df
rec_db <- recoveries_df
SPEC = unique(banding_db$SPEC)
}
bandings <- summarize_bandings(banding_db)
recoveries <- summarize_recoveries(rec_db)
dr_df <- merge(bandings, recoveries)
dr_df <-
dr_df %>% mutate(recov_rate = n_recoveries / n_banded) %>%
mutate(var_recov_rate = (recov_rate * (1 - recov_rate)) / (n_banded - 1)) %>%
mutate(se_recov_rate = sqrt(var_recov_rate))
attr(dr_df, "SPEC") = SPEC
return(dr_df)
}
has_reporting_probas <- function(df) {
return (all(c("rho", "var_rho") %in% colnames(df)))
}
has_recovery_rate <- function(df) {
return (all(c("recov_rate", "var_recov_rate") %in% colnames(df)))
}
get_reporting_probabilities <- function(df, rho_df) {
if (!has_reporting_probas(df)) {
if (is.null(rho_df)) {
rho_df = gb_reporting_probas
}
df <- inner_join(df, rho_df, by = "b.year")
}
return (df)
}
#' @title Calculate the harvest rate based on reporting probabilities
#' @description Takes a dataframe generated by \code{\link{get_direct_recoveries}},
#' a dataframe containing reproting probabilities and calculates the harvest rates
#' based on recovery data of banded birds. Variance, standard error, confidence
#' levels and coefficient of variation are also calculated for the harvest rate.
#' The harvest rate is estimated based on data described in Alisauskas *et al.*, (2014)
#'
#' @param df A dataframe generated by \code{\link{get_direct_recoveries}}, Default: NULL
#' @param rho_df A dataframe with reporting probabilities, Default: NULL
#' @param ... If `df` is not provided, additionnal arguments passed to
#' \code{\link{get_direct_recoveries}} to generate the direct directories dataframe
#' @return A dataframe with reporting probabilities and havest rate with
#' variance, standard error, confidence levels and coefficient of variation.
#' @rdname get_harvest_rate
#' @seealso \code{\link{get_direct_recoveries}}
#' @export
get_harvest_rate <-
function(df = NULL,
rho_df = NULL,
...) {
hr_df <- if (is.null(df)) {
get_direct_recoveries(...)
} else {
if (!has_recovery_rate(df)) {
stop(
paste(
"Missing columns for df argument. Please make sure the dataframe",
"has been generated by the get_direct_recoveries() function"
)
)
}
df
}
hr_df <- get_reporting_probabilities(hr_df, rho_df)
hr_df <- hr_df %>%
mutate(harv_rate = recov_rate / rho) %>%
mutate(var_harv_rate = (var_recov_rate / (rho ^ 2)) + ((recov_rate ^ 2 * var_rho) / rho ^ 4)) %>%
mutate(se_harv_rate = sqrt(var_harv_rate)) %>%
mutate(cl_harv_rate = se_harv_rate * 1.96) %>%
mutate(cv_harv_rate = se_harv_rate / harv_rate)
return(hr_df)
}
get_confidence_levels <- function(df) {
return(as.data.frame(cbind(
b.year = df$b.year,
start = (df$h - df$cl_h),
end = (df$h + df$cl_h)
)))
}
get_comparison_dataframe <-
function(df = NULL,
filters = NULL,
pos = 1,
...) {
if (is.null(df)) {
if (is.null(filters)) {
stop(sprintf(
paste(
"Dataframe to compare is missing. You must either",
"provide a filtered data frame with the `df%s` argument or a",
"list of filters with the `filters%s` argument."
),
pos,
pos
))
}
df <- get_harvest_rate(filters = filters, ...)
if ("filter_name" %in% names(filters)) {
df["type"] <- filters$filter_name
}
}
if (!"type" %in% colnames(df)) {
df["type"] = paste0("filters", pos)
}
return(df)
}
#' @title Compares two harvest rates dataframes created from different filters
#' @description This function takes either two dataframes returned by
#' \code{\link{get_harvest_rate}} or creates two from different filters.
#' It then compares the harvest rates for the two dataframes over the years
#' to see if differences exist.
#' @param df1 First dataframe to compare, Default: NULL
#' @param df2 Second dataframe to compare, Default: NULL
#' @param filters1 Filters used to create *df1* if it is not provided, Default: NULL
#' @param filters2 Filters used to create *df2* if it is not provided, Default: NULL
#' @param plot Plot the comparisons, Default: TRUE
#' @param check_overlap Check if all confidence levels overlap, Default: TRUE
#' @param ... Additional arguments needed by \code{\link{get_harvest_rate}} to
#' create the dataframes if they are not provided
#' @return A logical specifiying if all confidence levels overlap if *check_overlap*
#' is set to TRUE.
#' @rdname compare_harvest_rates
#' @seealso \code{\link{get_harvest_rate}}
#' @export
compare_harvest_rates <-
function(df1 = NULL,
df2 = NULL,
filters1 = NULL,
filters2 = NULL,
plot = TRUE,
check_overlap = TRUE,
...) {
# All Bands
df1 <- get_comparison_dataframe(df1, filters1, 1, ...)
df2 <- get_comparison_dataframe(df2, filters2, 2, ...)
if (plot) {
library(ggplot2)
df <- rbind(df1, df2)
both_plot <-
ggplot(data = df, aes(x = b.year, y = harv_rate)) +
geom_point(aes(color = type)) +
geom_errorbar(aes(
ymin = harv_rate - cl_harv_rate,
ymax = harv_rate + cl_harv_rate
))
print(both_plot)
}
if (check_overlap) {
## Compute overlap
common_years <- intersect(df1$b.year, df2$b.year)
# Get confidence levels
cl1 <-
get_confidence_levels(df1[df1$b.year %in% common_years, ])
cl2 <-
get_confidence_levels(df2[df2$b.year %in% common_years, ])
# Check if there is an overlap
overlap <-
(cl2$start <= cl1$end) &
(cl2$end >= cl1$start)
# Return TRUE if all confidence levels overlap
return(all(overlap))
}
}
#' @title Get Lincoln estimates
#' @description This is the function that calculates the Lincoln estimates based
#' on the paper by Alisauskas *et al.*, (2014) using harvest estimates and
#' a harvest rate dataframe created by \code{\link{get_harvest_rate}}
#' @param df A harvest rate dataframe generated by \code{\link{get_harvest_rate}}
#' , Default: NULL
#' @param harvest_df A dataframe containing harvest data estimates for the target
#' species and target years. The dataframe must have a column named b.year,
#' Default: NULL
#' @param harvest_correction_factor A factor used to correct the harvest
#' estimations which tend to be overstimated. By default, uses the factor for
#' goose species collected after 1999 as described by Padding and Royle (2012),
#' Default: 0.61
#' @param plot_estimates Should the estimates be plotted, Default: TRUE
#' @param save_estimates Should the estimates be saved in csv format, Default: TRUE
#' @param save_path The path where estimates should be saved, Default: '.'
#' @param ... Additional arguments needed by \code{\link{get_harvest_rate}} to
#' create the *df* if it is not provided
#' @return A dataframe containing the Lincoln estimates
#' @rdname get_lincoln_estimates
#' @export
get_lincoln_estimates <-
function(df = NULL,
harvest_df = NULL,
harvest_correction_factor = 0.61,
plot_estimates = TRUE,
save_estimates = TRUE,
save_path = ".",
...) {
if (is.null(df)) {
df <- get_harvest_rate(...)
}
if (is.null(harvest_df)) {
# Should be an error since it is a big portion of estimates
stop("A harvest dataframe for the selected species should be provided")
}
lincoln_df <- inner_join(df, harvest_df, by = "b.year")
lincoln <- lincoln_df %>%
mutate(harvest_adj = harvest * harvest_correction_factor) %>%
mutate(se_harvest_adj = se_harvest * harvest_correction_factor) %>%
mutate(var_harvest_adj = se_harvest_adj ^ 2) %>%
mutate(N = ((((n_banded + 1) * (harvest_adj + 1) * rho
) / (n_recoveries + 1)) - 1)) %>%
mutate(var_N_b.r = ((n_banded + 1) * (n_banded - n_recoveries)) / (((n_recoveries +
1) ^ 2) * (n_recoveries + 2))) %>%
mutate(var_N_bH.r = ((n_banded / n_recoveries) ^ 2) * var_harvest_adj + harvest_adj ^
2 * var_N_b.r) %>%
mutate(var_N = (((
n_banded * harvest_adj
) / n_recoveries) ^ 2) * var_rho + rho ^
2 * var_N_bH.r) %>%
mutate(se_N = sqrt(var_N)) %>%
mutate(cl_N = se_N * 1.96)
if (plot_estimates) {
library(ggplot2)
plt <- ggplot(data = lincoln, aes(x = b.year, y = N)) +
geom_point() + geom_smooth() +
scale_y_continuous(name = "Abundance (Lincoln)", limits = c(0, 500000)) +
geom_errorbar(aes(ymin = N - cl_N, ymax = N + cl_N)) +
labs(x = "Year")
print(plt)
}
lincoln_est <-
as.data.frame(cbind(lincoln$b.year, lincoln$N, lincoln$cl_N))
colnames(lincoln_est) <- c("year", "N", "NCL")
if (save_estimates) {
print(attributes(df)$SPEC)
SPEC <- if ("SPEC" %in% names(attributes(df))) {
attributes(df)$SPEC
} else {
warning("Species was not found in the estimation dataframe. Please
make sure df was created by the get_harvest_rate() function.
Using 'UNKN' as species.")
"UNKN"
}
file_path = file.path(save_path,
sprintf(
'%s_Lincoln_2000_2019_%s.csv',
SPEC,
format(Sys.time(), "%b%d_%Y")
))
write.csv(lincoln_est,
file_path)
}
return(lincoln_est)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.