#' Exact density for Pearson's correlation coefficient
#'
#' @param r numeric, sample correlation
#' @param rho numeric, "true" correlation coefficient
#' @param N numeric, sample size
#'
#' @return numeric, the probability density at *r*
#' @export
#'
#' @examples
#' f_corr_coef(r = 0.5, rho = 0.7, N = 10)
f_corr_coef <- function(r, rho, N) {
numerator_part <- (N - 2) * gamma(N - 1) * (1 - rho^2)^((N - 1) / 2) * (1 - r^2)^((N - 4) / 2)
denominator_part <- (2 * pi)^0.5 * gamma(N - (1 / 2)) * (1 - rho * r)^(N - (3 / 2))
hypergeo_part <- hypergeo::hypergeo(1 / 2, 1 / 2, (2 * N - 1) / 2, (rho * r + 1) / 2)
out <- (numerator_part / denominator_part) * hypergeo_part
return(Re(out))
}
#' Quickest Detection calculation
#'
#' @param rolling_window_stats data frame of rolling window statistics that is output from \code{\link{calc_rolling_stats}}
#' @param var_cols character vector, variable names that are columns of *data* containing time series to calculate quickest detection stats on
#' @param widths numeric vector, rolling window width(s) to be used, default is c(21)
#' @param stats_to_qd character vector, rolling window statistics to run quickest detection method on, defaults to all available: c("sd", "Ar1")
#' @param A_adj numeric, the threshold in the S-R statistic at which an alarm is triggered
#' @param exp_lakes character vector, values in *Lake* column of *rolling_window_stats* that correspond to experimental lakes
#' @param ref_lake character vector, values in *Lake* column of *rolling_window_stats* that correspond to the reference lake
#' @param ar1_alarm_rho numeric value between 0 and 1 (exclusive), 'true' value of lag-1 autocorrelation in alarm state, default is 0.95
#'
#' @return a data frame of values from the quickest detection method; TODO add more details on columns
#' @export
#'
#' @examples
#' rw_stats <- calc_rolling_stats(ts_data, var_cols = c("DO_Sat"))
#' qd_stats <- run_qd(rw_stats, var_cols = c("DO_Sat"))
run_qd <- function(rolling_window_stats, var_cols, widths = c(21), stats_to_qd = c("SD", "Ar1"), A_adj = 1E7, exp_lakes = c("R", "T"), ref_lake = "L", ar1_alarm_rho = 0.95) {
# QD data frame setup
stats_to_qd_completed <- c(stats_to_qd, paste0(stats_to_qd, "_sd"))
rolling_window_stats_Exp <- rolling_window_stats[rolling_window_stats$Lake %in% c(exp_lakes) & rolling_window_stats$Stat %in% stats_to_qd_completed & rolling_window_stats$Width %in% widths, ]
rolling_window_stats_Ref <- rolling_window_stats[rolling_window_stats$Lake %in% c(ref_lake) & rolling_window_stats$Stat %in% stats_to_qd_completed & rolling_window_stats$Width %in% widths, ]
rolling_window_stats_Exp$Exp_or_Ref <- "Exp"
rolling_window_stats_Ref$Exp_or_Ref <- "Ref"
rolling_window_stats_Exp_pivot <- tidyr::pivot_wider(rolling_window_stats_Exp,
id_cols = c("Year", "DOYtrunc", "Width", "Lake"),
values_from = dplyr::all_of(var_cols),
names_from = c("Stat", "Exp_or_Ref"),
names_glue = "{.value}_{Stat}_{Exp_or_Ref}"
)
rolling_window_stats_Ref_pivot <- tidyr::pivot_wider(rolling_window_stats_Ref,
id_cols = c("Year", "DOYtrunc", "Width"),
values_from = dplyr::all_of(var_cols),
names_from = c("Stat", "Exp_or_Ref"),
names_glue = "{.value}_{Stat}_{Exp_or_Ref}"
)
rolling_window_stats_forCalcs0 <- merge(rolling_window_stats_Exp_pivot, rolling_window_stats_Ref_pivot)
# get rid of rows where all variable columns are NA
varsAllColnames <- colnames(rolling_window_stats_forCalcs0)[!(colnames(rolling_window_stats_forCalcs0) %in% c("Lake", "Year", "DOYtrunc", "Width"))]
inds_allNA <- apply(rolling_window_stats_forCalcs0[, varsAllColnames], MARGIN = 1, FUN = function(x) all(is.na(x)))
rolling_window_stats_forCalcs <- rolling_window_stats_forCalcs0[!inds_allNA, ]
# create data frame to hold stats
QD_statCols <- paste(stats_to_qd, rep(c("Lambda", "R_vec", "R_squeals", "Pooled_sd", "Alarm"), each = length(stats_to_qd)), sep = "_")
newCols <- paste(rep(var_cols, each = length(QD_statCols)), QD_statCols, sep = "_")
rolling_window_stats_forCalcs[, newCols] <- NA
# iterate through each variable, do set-up calculations for QD calculation
for (i in 1:length(var_cols)) {
curVar <- var_cols[i]
# Calculate pooled sd for SD
if ("SD" %in% stats_to_qd) {
rolling_window_stats_forCalcs[, paste0(curVar, "_SD_Pooled_sd")] <- sqrt(rolling_window_stats_forCalcs[, paste0(curVar, "_SD_sd_Exp")]^2 + rolling_window_stats_forCalcs[, paste0(curVar, "_SD_sd_Ref")]^2)
}
# Calculate lambdas for AC
f.L_AC_hold <- rep(NA, nrow(rolling_window_stats_forCalcs))
g.L_AC_hold <- rep(NA, nrow(rolling_window_stats_forCalcs))
inds_allInfoForHGM <- !is.na(rolling_window_stats_forCalcs[, paste0(curVar, "_Ar1_Exp")]) &
!is.na(rolling_window_stats_forCalcs[, paste0(curVar, "_Ar1_Ref")])
f.L_AC_hold[inds_allInfoForHGM] <- log(
f_corr_coef(
r = rolling_window_stats_forCalcs[inds_allInfoForHGM, paste0(curVar, "_Ar1_Exp")],
rho = rolling_window_stats_forCalcs[inds_allInfoForHGM, paste0(curVar, "_Ar1_Ref")],
N = rolling_window_stats_forCalcs[inds_allInfoForHGM, "Width"]
)
)
g.L_AC_hold[inds_allInfoForHGM] <- log(
f_corr_coef(
r = rolling_window_stats_forCalcs[inds_allInfoForHGM, paste0(curVar, "_Ar1_Exp")],
rho = ar1_alarm_rho,
N = rolling_window_stats_forCalcs[inds_allInfoForHGM, "Width"]
)
)
rolling_window_stats_forCalcs[, paste0(curVar, "_Ar1_Lambda")] <- exp(g.L_AC_hold - f.L_AC_hold)
# calculate lambdas for SD
f.L_SD_hold <- stats::dnorm(rolling_window_stats_forCalcs[, paste0(curVar, "_SD_Exp")], mean = rolling_window_stats_forCalcs[, paste0(curVar, "_SD_Ref")], sd = rolling_window_stats_forCalcs[, paste0(curVar, "_SD_Pooled_sd")], log = TRUE)
g.L_SD_hold <- stats::dnorm(rolling_window_stats_forCalcs[, paste0(curVar, "_SD_Exp")], mean = rolling_window_stats_forCalcs[, paste0(curVar, "_SD_Ref")] + 2 * rolling_window_stats_forCalcs[, paste0(curVar, "_SD_Pooled_sd")], sd = rolling_window_stats_forCalcs[, paste0(curVar, "_SD_Pooled_sd")], log = TRUE)
rolling_window_stats_forCalcs[, paste0(curVar, "_SD_Lambda")] <- exp(g.L_SD_hold - f.L_SD_hold)
}
# order data by Lake, Width, Year, and then DOY
rolling_window_stats_forCalcs <- rolling_window_stats_forCalcs[order(rolling_window_stats_forCalcs$Lake, rolling_window_stats_forCalcs$Width, rolling_window_stats_forCalcs$Year, rolling_window_stats_forCalcs$DOYtrunc), ]
# Compute the S-R statistic for each lake, each day that has a rolling window stat calculated for it
widths <- unique(rolling_window_stats_forCalcs$Width)
for (w in 1:length(widths)) {
curWidth <- widths[w]
for (v in 1:length(var_cols)) {
curVar <- var_cols[v]
print(paste("Current variable QD-ing:", curVar))
indices_Stat_notNA <- !is.na(rolling_window_stats_forCalcs[, paste0(curVar, "_Ar1_Lambda")])
years <- unique(rolling_window_stats_forCalcs$Year)
for (y in 1:length(years)) {
Lakes <- exp_lakes
for (l in 1:length(Lakes)) {
doys <- rolling_window_stats_forCalcs$DOYtrunc[rolling_window_stats_forCalcs$Year == years[y] & rolling_window_stats_forCalcs$Lake == Lakes[l] & rolling_window_stats_forCalcs$Width == widths[w] & indices_Stat_notNA]
if (length(doys) > 0) {
for (d in 1:length(doys)) {
curInd <- rolling_window_stats_forCalcs$Year == years[y] & rolling_window_stats_forCalcs$Lake == Lakes[l] & rolling_window_stats_forCalcs$DOYtrunc == doys[d] & rolling_window_stats_forCalcs$Width == widths[w]
if (d == 1) {
rolling_window_stats_forCalcs[curInd, paste0(curVar, "_Ar1_R_vec")] <- 0
rolling_window_stats_forCalcs[curInd, paste0(curVar, "_Ar1_R_squeals")] <- 0
rolling_window_stats_forCalcs[curInd, paste0(curVar, "_Ar1_Alarm")] <- 0
rolling_window_stats_forCalcs[curInd, paste0(curVar, "_SD_R_vec")] <- 0
rolling_window_stats_forCalcs[curInd, paste0(curVar, "_SD_R_squeals")] <- 0
rolling_window_stats_forCalcs[curInd, paste0(curVar, "_SD_Alarm")] <- 0
} else {
prevInd <- rolling_window_stats_forCalcs$Year == years[y] & rolling_window_stats_forCalcs$Lake == Lakes[l] & rolling_window_stats_forCalcs$DOYtrunc == doys[d - 1] & rolling_window_stats_forCalcs$Width == widths[w]
# calculate S-R stat and see if alarm for AC
cur_R_AC <- (1 + rolling_window_stats_forCalcs[prevInd, paste0(curVar, "_Ar1_R_vec")]) * rolling_window_stats_forCalcs[curInd, paste0(curVar, "_Ar1_Lambda")]
rolling_window_stats_forCalcs[curInd, paste0(curVar, "_Ar1_R_squeals")] <- cur_R_AC
if (!is.na(cur_R_AC) & cur_R_AC <= A_adj) {
rolling_window_stats_forCalcs[curInd, paste0(curVar, "_Ar1_R_vec")] <- cur_R_AC
rolling_window_stats_forCalcs[curInd, paste0(curVar, "_Ar1_Alarm")] <- 0
} else if (!is.na(cur_R_AC) & cur_R_AC > A_adj) {
rolling_window_stats_forCalcs[curInd, paste0(curVar, "_Ar1_R_vec")] <- 0
rolling_window_stats_forCalcs[curInd, paste0(curVar, "_Ar1_Alarm")] <- 1
} else {
rolling_window_stats_forCalcs[curInd, paste0(curVar, "_Ar1_R_vec")] <- rolling_window_stats_forCalcs[prevInd, paste0(curVar, "_Ar1_R_vec")]
rolling_window_stats_forCalcs[curInd, paste0(curVar, "_Ar1_Alarm")] <- 0
}
# calculate S-R stat and see if alarm for SD
cur_R_SD <- (1 + rolling_window_stats_forCalcs[prevInd, paste0(curVar, "_SD_R_vec")]) * rolling_window_stats_forCalcs[curInd, paste0(curVar, "_SD_Lambda")]
rolling_window_stats_forCalcs[curInd, paste0(curVar, "_SD_R_squeals")] <- cur_R_SD
if (!is.na(cur_R_SD) & cur_R_SD <= A_adj) {
rolling_window_stats_forCalcs[curInd, paste0(curVar, "_SD_R_vec")] <- cur_R_SD
rolling_window_stats_forCalcs[curInd, paste0(curVar, "_SD_Alarm")] <- 0
} else if (!is.na(cur_R_SD) & cur_R_SD > A_adj) {
rolling_window_stats_forCalcs[curInd, paste0(curVar, "_SD_R_vec")] <- 0
rolling_window_stats_forCalcs[curInd, paste0(curVar, "_SD_Alarm")] <- 1
} else {
rolling_window_stats_forCalcs[curInd, paste0(curVar, "_SD_R_vec")] <- rolling_window_stats_forCalcs[prevInd, paste0(curVar, "_SD_R_vec")]
rolling_window_stats_forCalcs[curInd, paste0(curVar, "_SD_Alarm")] <- 0
}
}
}
}
}
}
}
}
return(rolling_window_stats_forCalcs)
}
#' Format quickest detection results for plotting
#'
#' @param qd_stats data frame, output from \code{\link{run_qd}}
#' @param var_cols character vector, variable names that are columns of *data* containing time series to calculate rolling window stats on
#' @param stats character vector, statistics to format alarms for, default = c("SD", "Ar1)
#' @param bloom_fert_df data frame of fertilization start and end dates, see \code{\link{bloom_fert_dates}} for default and formatting
#' @param exp_lakes character vector, values in *Lake* column of qd_stats that correspond to experimental lakes
#' @param just_alarm_DOYs TRUE or FALSE (default), should results be filtered to just DOYs that have an alarm (in any variable)?
#'
#' @return data frame with quickest detection alarm info
#' @export
#'
#' @examples
#' rw_stats <- calc_rolling_stats(ts_data, var_cols = c("DO_Sat"))
#' qd_stats <- run_qd(rw_stats, var_cols = c("DO_Sat"))
#' qd_alarms <- format_qd(qd_stats, var_cols = c("DO_Sat"))
format_qd <- function(qd_stats, var_cols = c("Manual_Chl", "BGA_HYLB", "DO_Sat", "pH"), stats = c("SD", "Ar1"), bloom_fert_df = bloom_fert_dates, exp_lakes = c("T", "R"), just_alarm_DOYs = FALSE) {
# transform QD df to make it easy to add points
alarmCols <- paste0(rep(var_cols, times = length(stats)), rep(paste0("_", stats, "_Alarm"), each = length(var_cols)))
if (just_alarm_DOYs == TRUE) {
inds_anyAlarm <- apply(qd_stats[, alarmCols], MARGIN = 1, FUN = function(x) !all(is.na(x)) & any(x == 1, na.rm = TRUE))
QD_long0 <- as.data.frame(
tidyr::pivot_longer(qd_stats[inds_anyAlarm, ], cols = dplyr::starts_with(var_cols), names_to = "Var_Stat", values_to = "Value")
)
} else {
QD_long0 <- as.data.frame(
tidyr::pivot_longer(qd_stats, cols = dplyr::starts_with(var_cols), names_to = "Var_Stat", values_to = "Value")
)
}
# separate variables and stats into separate columns
vars_to_remove <- paste(paste0(var_cols, "_"), collapse = "|")
QD_long0 <- QD_long0 %>%
dplyr::mutate(Stat = gsub(pattern = vars_to_remove, replacement = "", x = .data$Var_Stat))
stats_to_remove <- paste(paste0("_", unique(QD_long0$Stat)), collapse = "|")
QD_long0 <- QD_long0 %>%
dplyr::mutate(Var = gsub(pattern = stats_to_remove, replacement = "", x = .data$Var_Stat))
# pull out just the stats we want for plotting
QD_long <- QD_long0[QD_long0[, "Stat"] %in% c(paste0(stats, "_Exp"), paste0(stats, "_Alarm")), c("Year", "Lake", "DOYtrunc", "Width", "Var", "Stat", "Value")]
Alarms <- QD_long[grep(pattern = "Alarm", QD_long$Stat) & !is.na(QD_long[, "Value"]) & QD_long[, "Value"] %in% c(0, 1), ]
Alarms[, "Stat"] <- gsub(pattern = "_Alarm", replacement = "", x = Alarms[, "Stat"])
colnames(Alarms)[colnames(Alarms) == "Value"] <- "Alarm"
statsAll <- QD_long[QD_long[, "Stat"] %in% paste0(stats, "_Exp"), ]
statsAll[, "Stat"] <- gsub(pattern = "_Exp", replacement = "", x = statsAll[, "Stat"])
# combine stats and alarms
statsAlarms <- dplyr::full_join(statsAll, Alarms)
statsAlarms$Stat <- factor(statsAlarms$Stat, levels = stats)
# classify alarms
statsAlarms <- dplyr::left_join(statsAlarms, bloom_fert_df)
statsAlarms <- statsAlarms %>%
dplyr::mutate(fertStartDOY = ifelse(is.na(fertStartDOY), 367, fertStartDOY)) %>%
dplyr::mutate(bloomStartDOY = ifelse(is.na(bloomStartDOY), 368, bloomStartDOY))
statsAlarms$AlarmType <- "NA"
statsAlarms <- statsAlarms %>%
dplyr::mutate(AlarmType = ifelse(.data$DOYtrunc < fertStartDOY & !is.na(.data$Alarm) & .data$Alarm == 1, "False", .data$AlarmType)) %>%
dplyr::mutate(AlarmType = ifelse(.data$DOYtrunc >= fertStartDOY & .data$DOYtrunc < bloomStartDOY & !is.na(.data$Alarm) & .data$Alarm == 1, "True", .data$AlarmType)) %>%
dplyr::mutate(AlarmType = ifelse(.data$DOYtrunc >= bloomStartDOY & !is.na(.data$Alarm) & .data$Alarm == 1, "Late", .data$AlarmType))
statsAlarms$AlarmType <- factor(statsAlarms$AlarmType, levels = c("False", "True", "Late"), ordered = TRUE)
statsAlarms_out <- statsAlarms %>%
dplyr::filter(!is.na(.data$Value) & !is.na(.data$Alarm)) %>%
dplyr::select(-c(fertStartDOY, fertEndDOY, bloomStartDOY))
return(statsAlarms_out)
}
#' Plot rolling window statistics and qd alarms
#'
#' @param rolling_window_stats data frame of rolling window statistics that is output from \code{\link{calc_rolling_stats}}
#' @param qd_alarms data frame of quickest detection alarms, output from \code{\link{format_qd}}
#' @param var_col character vector, variable names that are columns of *data* containing time series to calculate rolling window stats on
#' @param stats statistics to plot, default and options are c("SD", "Ar1")
#' @param title title to put on the plot
#' @param widths numeric vector, rolling window width(s) to be used, default is c(21)
#' @param plot_bloom_lines TRUE or FALSE (default), should lines for the bloom dates be plotted?
#' @param bloom_dates if plot_bloom_lines = TRUE, this data frame is used to determine where
#'
#' @return a ggplot object
#' @import ggplot2
#' @export
#'
#' @examples
#' use_vars <- c("Manual_Chl", "BGA_HYLB", "DO_Sat", "pH")
#' rw_stats <- calc_rolling_stats(ts_data, var_cols = use_vars)
#' qd_stats <- run_qd(rw_stats, var_cols = use_vars)
#' qd_alarms <- format_qd(qd_stats, bloom_fert_df = bloom_fert_dates)
#' plot_qd(rw_stats, qd_alarms, use_vars[1],
#' stats = c("SD", "Ar1"),
#' plot_bloom_lines = TRUE, bloom_dates = bloom_fert_dates
#' )
plot_qd <- function(rolling_window_stats, qd_alarms, var_col, stats = c("SD", "Ar1"), title = "R.W. width = 21", widths = c(21), plot_bloom_lines = FALSE, bloom_dates) {
# get just the alarms
statsAlarms <- qd_alarms %>%
dplyr::filter(!is.na(.data$Alarm) & .data$Alarm == 1 & .data$Var == var_col)
# add the points
p <- ggplot(rolling_window_stats[rolling_window_stats$Stat %in% stats & rolling_window_stats$Width %in% widths, ], aes(x = DOYtrunc, y = get(var_col), color = Lake)) +
facet_grid(cols = vars(Year), rows = vars(Stat), scales = "free_y") +
geom_line(size = 1.25) +
geom_point(data = statsAlarms, mapping = aes(x = DOYtrunc, y = Value, color = Lake, fill = AlarmType), shape = 21, size = 3, color = "black") + # , fill="red", color="black"
scale_fill_manual(values = c("#FFE77AFF", "#5F9260FF", "darkgrey")) +
# geom_point(data=statsAlarms, aes(x=DOYtrunc, y=Value, color=Lake), size = 2) +
theme_bw(base_size = 16) +
labs(y = var_col, x = "Day of Year")
if (length(unique(rolling_window_stats$Lake)) == 2) {
p <- p + scale_color_manual(values = c("dodgerblue", "orangered3"))
} else if (length(unique(rolling_window_stats$Lake)) == 3) {
p <- p + scale_color_manual(values = c("dodgerblue", "orangered3", "grey10"))
}
if (plot_bloom_lines) {
p <- p + geom_vline(data = bloom_dates, aes(xintercept = bloomStartDOY, color = Lake), linetype = "solid") +
geom_vline(data = bloom_dates, aes(xintercept = fertStartDOY), color = "black", linetype = "dashed")
}
if (!is.na(title)) {
p <- p + ggtitle(title)
}
return(p)
}
#' Calculate rates of true and false alarms
#'
#' @param qd_alarms data frame of quickest detection alarms, output from \code{\link{format_qd}}
#' @param bloom_fert_df data frame of fertilization start and end dates, see \code{\link{bloom_fert_dates}} for default and formatting
#'
#' @return data frame with true and false positive rates for combinations of variable and statistic
#' @export
#'
#' @examples
#' use_vars <- c("Manual_Chl", "BGA_HYLB", "DO_Sat", "pH")
#' rw_stats <- calc_rolling_stats(ts_data, var_cols = use_vars)
#' qd_stats <- run_qd(rw_stats, var_cols = use_vars)
#' qd_alarms <- format_qd(qd_stats, bloom_fert_df = bloom_fert_dates)
#' calc_alarm_rates(qd_alarms)
calc_alarm_rates <- function(qd_alarms, bloom_fert_df = bloom_fert_dates) {
# add DOYs of fert and bloom start
qd_alarms_wDates <- dplyr::left_join(qd_alarms, bloom_fert_df) %>%
dplyr::mutate(fertStartDOY = ifelse(is.na(fertStartDOY), 367, fertStartDOY)) %>%
dplyr::mutate(bloomStartDOY = ifelse(is.na(bloomStartDOY), 368, bloomStartDOY))
# assign DOYs to time periods
qd_alarms_wDates$TimePeriod <- NA
qd_alarms_wDates <- qd_alarms_wDates %>%
dplyr::mutate(TimePeriod = ifelse(.data$DOYtrunc < fertStartDOY, "preFert", .data$TimePeriod)) %>%
dplyr::mutate(TimePeriod = ifelse(.data$DOYtrunc >= fertStartDOY & .data$DOYtrunc < bloomStartDOY, "alarmPeriod", .data$TimePeriod)) %>%
dplyr::mutate(TimePeriod = ifelse(.data$DOYtrunc >= bloomStartDOY, "postBloom", .data$TimePeriod))
# calculate rates of positive alarms
qd_alarm_rates <- qd_alarms_wDates %>%
dplyr::filter(.data$TimePeriod != "postBloom") %>%
dplyr::group_by(.data$Var, .data$Stat, .data$TimePeriod) %>%
dplyr::summarise(nAlarms = sum(.data$Alarm == 1), nDays = sum(.data$Alarm %in% c(0, 1))) %>%
dplyr::ungroup() %>%
dplyr::mutate(Rate = .data$nAlarms / .data$nDays, PositiveType = ifelse(.data$TimePeriod == "preFert", FALSE, TRUE))
return(qd_alarm_rates)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.