# Code to calculate statistics from saved WGN simulated data
# Calculate Statistics
#======================================================================================
#' @importFrom stats sd cor var
NULL
#' Calculate statistics from a climate variable timeseries.
#'
#' \code{calc_ts_stats} returns a list of statistics calculated from an input timeseries.
#'
#' The statistics calculated are:
#' \itemize{
#' \item annual maximum values (name: \emph{ann_max})
#' \item annual total values (name: \emph{ann_tot})
#' \item monthly mean climatology (name: \emph{mon_mean})
#' \item monthly standard deviation(name: \emph{mon_sd})
#' \item correlation of subsequent non-zero days by month (name: \emph{cor_wetdays_daily}),
#' \item variance of non-zero days by month (name: \emph{var_wetdays_daily}),
#' \item proportion of dry (zero rainfall) days within a month (name: \emph{pdry}).
#' }
#'
#' @param wgn_data_vec A named vector of numbers with names as character dates of the form
#' \code{\%Y-\%m-\%d}; the input daily timeseries.
#' @return A named list of vectors containing the calculated statistics.
#' @export
calc_ts_stats <- function(wgn_data_vec) {
# Conditions on arguments 1) Numeric with named date of format %Y-%m-%d
#=====================================================================================================
if (!is.numeric(wgn_data_vec)) {
abort_bad_argument("wgn_data_vec", must = "be numeric", not = wgn_data_vec)
}
if(is.null(names(wgn_data_vec))) {
abort_bad_argument("Names of wgn_data_vec", must = "be strings convertible as Dates of format %Y-%m-%d")
}
if (any(is.na(as.Date(names(wgn_data_vec), "%Y-%m-%d")))) {
abort_bad_argument("Names of wgn_data_vec", must = "be strings convertible as Dates of format %Y-%m-%d")
}
ann_max <- sort(tapply(wgn_data_vec, as.POSIXlt(names(wgn_data_vec))$year, max))
ann_tot <- sort(tapply(wgn_data_vec, as.POSIXlt(names(wgn_data_vec))$year, sum))
mon_indices <- get_mon_indices(names(wgn_data_vec))
mon_mean <- rep(NA, 12)
mon_sd <- rep(NA, 12)
for (i_mon in 1:12) {
mon_data <- tapply(wgn_data_vec[mon_indices[[i_mon]]], as.POSIXlt(names(wgn_data_vec[mon_indices[[i_mon]]]))$year, sum)
mon_mean[i_mon] <- mean(mon_data)
mon_sd[i_mon] <- sd(mon_data)
rm(mon_data)
}
cor_wetdays_daily <- rep(NA, 12)
var_wetdays_daily <- rep(NA, 12)
pdry <- rep(NA, 12)
wgn_data_vec_wetdays <- wgn_data_vec
wgn_data_vec_wetdays[wgn_data_vec_wetdays == 0] <- NA
for (i_mon in 1:12) {
tot_days <- length(mon_indices[[i_mon]])
# subset data by month (may be possible to avoid copying - rethink cor calculation)
wgn_data_mon <- wgn_data_vec_wetdays[mon_indices[[i_mon]]]
# correlation of consecutive wet days in the month
cor_wetdays_daily[i_mon] <- cor(wgn_data_mon[-tot_days], wgn_data_mon[-1], use = "na.or.complete")
# variance of wet days in the month
var_wetdays_daily[i_mon] <- var(wgn_data_mon, use = "na.or.complete")
# proportion of dry days
pdry[i_mon] <- sum(is.na(wgn_data_mon)) / tot_days
rm(wgn_data_mon)
}
rm(wgn_data_vec_wetdays)
return(list(ann_max = ann_max,
ann_tot = ann_tot,
mon_mean = mon_mean,
mon_sd = mon_sd,
cor_wetdays_daily = cor_wetdays_daily,
var_wetdays_daily = var_wetdays_daily,
pdry = pdry))
}
#' Calculate statistics from all the replicates of a generated climate variable timeseries.
#'
#' \code{calc_rep_stats} returns a list of statistics calculated from an input list of timeseries
#' replicates.
#'
#' Uses the function \code{calc_ts_stats} to calculate the statistics for each replicate. The statistics
#' from all the replicates are saved in a dataframe corresponding to each statistic.
#'
#' Similar to \code{calc_ts_stats}, the statistics calculated are:
#' \itemize{
#' \item annual maximum values (name: \emph{ann_max})
#' \item annual total values (name: \emph{ann_tot})
#' \item monthly mean climatology (name: \emph{mon_mean})
#' \item monthly standard deviation(name: \emph{mon_sd})
#' \item correlation of subsequent non-zero days by month (name: \emph{cor_wetdays_daily}),
#' \item variance of non-zero days by month (name: \emph{var_wetdays_daily}),
#' \item proportion of dry (zero rainfall) days within a month (name: \emph{pdry}).
#' }
#'
#' @param wgn_data_rep A list of vectors containing the replicates of input timeseries.
#' Each element of the list should be a named vector of numbers with names as character dates of the form
#' \code{\%Y-\%m-\%d}.
#' @param nyears number of years of data in the input, \code{wgn_data_rep}
#' @return A named list of dataframes containing the calculated statistics.
#' @export
calc_rep_stats <- function(wgn_data_rep, nyears = NULL) {
# Conditions on arguments 1) Numeric list
#=====================================================================================================
if (!is.list(wgn_data_rep)) {
abort_bad_argument("wgn_data_rep", must = "be a list of data vectors")
}
# WGN statistics calculation - Loop over replicates
#===================================================================================================
# Annual rainfall total, Annual rainfall maximum - sorted
# Monthly rainfall total - mean and standard deviation across years
# Correlation & variance of wet day rainfall by month (using all nyears of data)
# Proportion of dry days by month (using all nyears of data)
replicates <- length(wgn_data_rep)
# !!!!!!!!! NTS: Easier to take in nyears as an arg - do that
if (is.null(nyears)) {nyears <- length(unique(as.POSIXlt(names(wgn_data_rep[[1]]))$year))}
ann_tot_rep <- matrix(NA, nrow = nyears, ncol = replicates)
ann_max_rep <- matrix(NA, nrow = nyears, ncol = replicates)
mon_mean_rep <- matrix(NA, nrow = replicates, ncol = 12)
mon_sd_rep <- matrix(NA, nrow = replicates, ncol = 12)
cor_wetdays_daily_rep <- matrix(NA, nrow = replicates, ncol = 12)
var_wetdays_daily_rep <- matrix(NA, nrow = replicates, ncol = 12)
pdry_rep <- matrix(NA, nrow = replicates, ncol = 12)
for (i_rep in 1:replicates) {
wgn_data_vec <- wgn_data_rep[[i_rep]]
wgn_data_stats <- calc_ts_stats(wgn_data_vec)
# Save the statistics for each replicate
#=================================================================================================
ann_tot_rep[ ,i_rep] <- wgn_data_stats$ann_tot
ann_max_rep[ ,i_rep] <- wgn_data_stats$ann_max
mon_mean_rep[i_rep, ] <- wgn_data_stats$mon_mean
mon_sd_rep[i_rep, ] <- wgn_data_stats$mon_sd
cor_wetdays_daily_rep[i_rep, ] <- wgn_data_stats$cor_wetdays_daily
var_wetdays_daily_rep[i_rep, ] <- wgn_data_stats$var_wetdays_daily
pdry_rep[i_rep, ] <- wgn_data_stats$pdry
rm(wgn_data_vec, wgn_data_stats)
}
# Name dimensions of statistics
#===================================================================================================
# Dimension names -------------------------------
year_names <- paste(1:nyears)
rep_names <- paste(1:replicates)
mon_names <- c("Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec")
# Assigning dimension names ---------------------
df_ann_tot <- data.frame(year_names, ann_tot_rep)
colnames(df_ann_tot) <- c("Years", rep_names)
df_ann_max <- data.frame(year_names, ann_max_rep)
colnames(df_ann_max) <- c("Years", rep_names)
df_mon_mean <- data.frame(rep_names, mon_mean_rep)
colnames(df_mon_mean) <- c("Replicates", mon_names)
df_mon_sd <- data.frame(rep_names, mon_sd_rep)
colnames(df_mon_sd) <- c("Replicates", mon_names)
df_cor_wetdays_daily <- data.frame(rep_names, cor_wetdays_daily_rep)
colnames(df_cor_wetdays_daily) <- c("Replicates", mon_names)
df_var_wetdays_daily <- data.frame(rep_names, var_wetdays_daily_rep)
colnames(df_var_wetdays_daily) <- c("Replicates", mon_names)
df_pdry <- data.frame(rep_names, pdry_rep)
colnames(df_pdry) <- c("Replicates", mon_names)
rm(ann_tot_rep, ann_max_rep, mon_mean_rep, mon_sd_rep, cor_wetdays_daily_rep, var_wetdays_daily_rep, pdry_rep)
return(list(ann_max = df_ann_max,
ann_tot = df_ann_tot,
mon_mean = df_mon_mean,
mon_sd = df_mon_sd,
cor_wetdays_daily = df_cor_wetdays_daily,
var_wetdays_daily = df_var_wetdays_daily,
pdry = df_pdry))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.