#' CompositeTrend function
#'
#' @description This function can be used to produce composite metrics of change
#' (indicators), whilst propagating uncertainty in the individual
#' species trend estimates through to the final composite trend
#' metric. This function takes in a dataframe of sampled annual
#' occupancy estimates across multiple species and returns a
#' a single composite trend metric with uncertainty. This approach
#' is only suitable for species without missing years.
#'
#' @param indata The file path to an .rdata file containing a dataframe
#' (called samp_post - GP to change this) which contains
#' year columns (prefixed with "X", e.g "X1985"), a species
#' column ("species"), and an iteration identifier ("iter"). The
#' year columns contain the annual occupancy estimates for
#' the species-year-iteration combination in question.
#' @param output_path The location where the outputs should be saved.
#' @param trend_choice The approach used to combine the individual species
#' estimates into a single composite trend. See details.
#' @param group_name The name of the species group we are running, used for
#' naming output files.
#' @param save_iterations Do we want to save the composite trend estimates
#' for each individual iteration, these are generally used for
#' estimating temporal trends with uncertainty.
#' @param TrendScale Traditionally some indicators are scaled so the first
#' year is set to a given number, 100 in the case of the UK
#' biodiversity indicators. This value can be chosen here, with no
#' scaling as the default.
#' @param plot_output plot the resulting composite indicator: TRUE or FALSE.
#' @details There are a number of model to choose from:
#' \itemize{
#' \item{\code{"arithmetic_logit_occ"}}{ - The raw occupancy values are
#' converted to the log odds scale (using the logit function). The
#' arithmetic mean across species is used to create a composite trend for each
#' iteration. These means are then converted back to the odds scale (exp).}
#' \item{\code{"geometric_raw_occ"}}{ - Take the geometric mean across species
#' raw occupancy estimates }
#' \item{\code{"arithmetic_raw_occ"}}{ - potentially drop this, as not used.}
#' }
#' @return A summary file. This .csv is saved in the output_path location and
#' contains the annual composite indicator estimate (summarized across
#' the iterations as wither the mean or median). The unique number of
#' species contributing to the indicator is shown in the "spp_num" column.
#' Various forms of uncertainty (estimated across the iterations) for the
#' annual composite trend are presented, including the upper and lower
#' 95% credible intervals, SD of the mean and standard error of the mean.
#' @import ggplot2
#' @import reshape2
#' @import car
#' @export
CompositeTrend <- function(indata, output_path, trend_choice = "arithmetic_logit_occ", group_name,
save_iterations = "yes", TrendScale = NULL, plot_output = TRUE){
# add small custom functions - bad practice doing this here, move to a seperate useful functions file
geomean <- function(x){exp(mean(log(x)))}
quan_0.05 <- function(x) quantile(x, probs = 0.05, na.rm = TRUE)
quan_0.95 <- function(x) quantile(x, probs = 0.95, na.rm = TRUE)
quan_0.5 <- function(x) quantile(x, probs = 0.5, na.rm = TRUE)
cust_a_mean <- function(x) mean(x, na.rm = T)
sem <- function(x) sd(x)/sqrt(length(x))
# load data
load(indata)
names(samp_post) <- gsub("iter", "iteration", names(samp_post))
names(samp_post) <- gsub("spp", "species", names(samp_post))
number_of_spp <- length(unique(as.character(samp_post$species))) # How many species contribute to the indicator?
# loop through iterations - later convert to array and apply across array, should be quicker #
composite_trend <- NULL
for (j in 1:length(unique(samp_post$iteration))){
print(j)
temp_table <- NULL
temp_table <- samp_post[samp_post$iteration == j,]
t_table <- temp_table[,(1:(ncol(temp_table)-2))] # convert shape of the table
# geomean on the occ scale #
#log_temp_table <- t_table
#log_temp_table <- log(log_temp_table)
# geometric mean raw occupancy #
if(trend_choice == "geometric_raw_occ"){
composite_trend_temp <- apply(t_table, 2, geomean)
composite_trend <- rbind(composite_trend, composite_trend_temp)
}
# arithmetic mean raw occupancy #
if(trend_choice == "arithmetic_raw_occ"){
composite_trend_temp <- apply(t_table, 2, cust_a_mean)
composite_trend <- rbind(composite_trend, composite_trend_temp)
}
# arithmetic log odds (logit) occupancy back converted to occupancy scale with inv.logit
if(trend_choice == "arithmetic_logit_occ"){
logit_temp_table <- t_table
logit_temp_table <- as.data.frame(car::logit(as.matrix(logit_temp_table)))
composite_trend_temp <- apply(logit_temp_table, 2, cust_a_mean)
composite_trend <- rbind(composite_trend, composite_trend_temp)
}
}
# if the trend is based on logit, back convert to odds (rather than occupancy) following Steve Freeman's comments #
if(trend_choice == "arithmetic_logit_occ"){
composite_trend <- exp(composite_trend)
}
# Are we scaling the indicator? - Scale to 100 for UK biodiversity indicators
if(!is.null(TrendScale)){
multiplier <- TrendScale/mean(composite_trend[,1]) # identify multiplier
composite_trend <- composite_trend * multiplier # scale logit arithmetic mean so mean value in year 1 = the input value for TrendScale #
}
if(save_iterations == "yes"){
write.csv(composite_trend, file = paste(output_path, group_name, "_", trend_choice, "_composite_trend_iterations.csv", sep = "") , row.names = FALSE)
}
# save the summarised iterations #
composite_trend_summary <- data.frame(
year = as.numeric(gsub("year_", "", colnames(composite_trend))),
mean_occupancy = apply(composite_trend, 2, mean),
median_occupancy = apply(composite_trend, 2, median),
lower_5_perc_CI_occ = apply(composite_trend, 2, quan_0.05),
upper_95_perc_CI_occ = apply(composite_trend, 2, quan_0.95),
sd_occupancy = apply(composite_trend, 2, sd),
sem_occupancy = apply(composite_trend, 2, sem)
)
# add species number column #
composite_trend_summary$spp_num <- number_of_spp
write.csv(composite_trend_summary, file = paste(output_path, group_name, "_", trend_choice, "_composite_trend_summary.csv", sep = ""), row.names = FALSE)
if(plot_output == TRUE){
ggplot(composite_trend_summary, aes_string(x = "year", y = "mean_occupancy")) +
theme_bw() +
geom_ribbon(data = composite_trend_summary, aes_string(group = 1, ymin = "lower_5_perc_CI_occ", ymax = "upper_95_perc_CI_occ"), alpha = 0.2) +
geom_line(size = 1, col = "black") +
geom_point(size = 2) +
#geom_hline(yintercept = 100, linetype = "dashed") +
ylab("Index") +
xlab("Year") +
scale_y_continuous(limits = c(0, max(composite_trend_summary$upper_95_perc_CI_occ)))
ggsave(paste(group_name, "_", trend_choice, "_composite_trend.png", sep = ""), plot = last_plot(), path = output_path, width=6, height=4, units="in", dpi = 300)
}
return(composite_trend_summary)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.