################################################################################
#' mk_test
#'
#' @description
#' mk_test performs a Mann-Kendall Test and returns multiple test statistics given
#' a variable y and a variable x in vector form.
#'
#' @param y_var a vector of numbers representing the y variable
#' @param x_var a vector of numbers representing the x variable
#' @param z_low a number representing the z-value for the lower confidence level,
#' defaulted to 0.52, equivalent to the 70 percent confidence threshold
#' @param z_high a number representing the z-value for the upper confidence level,
#' defaulted to 1.28, equivalent to the 90 percent confidence threshold
#' @param keep_z a boolean for whether or not z-value will be included in the
#' output, defaulted to FALSE
#'
#' @return a dataframe of the form || slope || intercept || ConfidenceLevel || z_value ||
#'
#' @details
#'1. y and x variables should have equal length
#'2. Although the function does allow input y to contain duplicate values, please
#' take into account that an excess amount of duplicates within y will likely
#' decrease the accuracy of the result generated by the Mann-Kendall test. For
#' data sets with too many duplicates in the dependant variable, consider
#' testing for the trend using alternative methods, such as generalized linear
#' model. Refer to other functions such as hurdle_test and negbin for more
#' information.
#'
#' @export
mk_test <- function(y_var, x_var, z_low=0.52, z_high=1.28, keep_z=FALSE){
sen <- zyp.sen(y_var~x_var)
slope <- round(sen$coefficients[[2]],2)
intercept <- round(sen$coefficients[[1]],2)
corr<- cor.test(y_var, x_var, method = "kendall", alternative = "two.sided", exact = FALSE)
Z <-abs(corr$statistic[["z"]])
CATTrend <- case_when(Z>=z_high ~ "Confident",
Z>=z_low & Z<z_high ~ "Likely",
Z<z_low ~ "Uncertain")
years.for.trend <- sum(!is.na(y_var))
ret<- data.frame(slope=slope,intercept=intercept,CATTrend=CATTrend,
years.for.trend=years.for.trend, z_value=Z)
if(!keep_z){
ret <- ret %>% dplyr::select(-z_value)
}
return(ret)
}
################################################################################
#' hurdle_test
#'
#' @description
#' hurdle_test performs a hurdle test and returns multiple test statistics given
#' a variable y and a variable x. Hurdle test can be used to fit a linear model
#' when a data set contains an excess amount of zeros.
#'
#' @param y_var a vector of numbers representing the y variable
#' @param x_var a vector of numbers representing the x variable
#' @param zero_fam a character string representing the model family for the zero
#' portion of the data. Defaulted to "binomial" indicating binomial family
#' @param count_dist a character string representing the count distribution.
#' Defaulted to 'negbin', the negative binomial distribution
#' @param zero_dist a character string representing the zero distribution.
#' Defaulted to "binomial", the binomial distribution
#' @param link a character string representing the link for binomial zero hurdle
#' model. Defaulted to 'logit', the logarithm function
#'
#' @return a dataframe of the form || slope || intercept || ConfidenceLevel ||,
#' or a character string if the test is not applicable;
#'
#' @details
#' The hurdle test deals with zero and non-zero portions separatly using two
#' different models: the negative binomial distribution for the non-zero portion
#' and the binomial distribution for the zero portion. Confidence intervals are
#' created to report the level of confidence in the trend detected. A generalized
#' linear model (glm) will be then established between the two distributions with
#' a logarithm link function. The slope and intercept of the resulting model will
#' be recorded and returned.
#' Things to take notice of:
#' 1. y and x variables need to have the same length
#' 2. Although the detail section only describes the condition with a negative
#' binomial count distribution and a binomial zero distribution, this function
#' can be used for other distributions as well.
#' 3. Do note that this function does not check for the accuracy of the fitted
#' model, neither does it provide any recommendations on what distribution
#' fits the best. In order to use other families, it is highly suggested to
#' look at the concept of general linear models beforehand.
#' 4. The program will return an error if the y variable does not contain any zeros.
#' In order to calculate the trend using this function, make sure that there
#' is at least one zero value in the y variable. If the error message "Data
#' cannot be fitted using hurdle model" appears, user might want to consider
#' using negative binomial model (provided as the negbin() function in the
#' same package) for cases with excess amount of zeros, or Mann-Kendall Test
#' for cases with limited ties and zeros.
#'
#' @export
hurdle_test <- function(y_var, x_var, zero_fam="binomial", count_dist="negbin",
zero_dist="binomial", link="logit"){
# Are we confident there is a trend?
# Count portion of hurdle
y_count <- y_var[y_var>0]
x_count <- x_var[y_var>0]
model.count <- tryCatch(MASS::glm.nb(y_count~x_count),
error=function(e){return("A")})
low.c<- ifelse(is.character(model.count), NA, exp(confint(profile(model.count), level=0.9))[2,1])
hi.c <- ifelse(is.character(model.count), NA, exp(confint(profile(model.count), level=0.9))[2,2])
# Zero portion of hurdle
zero_indicator <- !I(y_var==0)
model.zero <- tryCatch(glm(zero_indicator~x_var, family=zero_fam),
error=function(e){return("A")})
low.z<-ifelse(is.character(model.zero), NA, exp(confint(profile(model.zero), level=0.9))[2,1])
hi.z<-ifelse(is.character(model.zero), NA, exp(confint(profile(model.zero), level=0.9))[2,2])
if(!is.na(low.c)&!is.na(hi.c)&!is.na(low.z)&!is.na(hi.z)){
# combined confidence test @ 70% and 90% confidence
pass <- ifelse(all(low.c*low.z<=1, hi.c*hi.z>=1), "Maybe?", "Confident")
if (pass == "Maybe?"){
low.c<- exp(confint(profile(model.count), level=0.7))[2,1]
hi.c <- exp(confint(profile(model.count), level=0.7))[2,2]
low.z<-exp(confint(profile(model.zero), level=0.7))[2,1]
hi.z<-exp(confint(profile(model.zero), level=0.7))[2,2]
pass <- ifelse(all(low.c*low.z<=1, hi.c*hi.z>=1), "Uncertain", "Likely")
}
# Get slope and intercept from hurdle
model <- hurdle(y_var~x_var, dist=count_dist, zero.dist=zero_dist, link=link)
fitted <- unname(model$fitted.values)
slope <- round((fitted[length(fitted)] - fitted[1])/
(max(x_var) - min(x_var)),2)
intercept <- round((fitted[1]-min(x_var)*((fitted[length(fitted)] - fitted[1])/
(max(x_var) - min(x_var)))),2)
years.for.trend <- sum(!is.na(y_var))
data.frame(slope=slope,intercept=intercept,CATTrend=pass,years.for.trend=years.for.trend)
}else{
"Error: Data cannot be fitted using hurdle model"
data.frame(slope=NA,intercept=NA,CATTrend=NA,years.for.trend=NA)
}
}
################################################################################
#' negbin
#'
#' @description
#' negbin fits a negative binomial model and returns multiple test statistics
#' given a variable y and a variable x.
#'
#' @param y_var a vector of numbers representing the y variable
#' @param x_var a vector of numbers representing the x variable
#'
#' @return a dataframe of the form || slope || intercept || ConfidenceLevel ||
#'
#' @details
#' The negbin function fits a negative binomial model to a sets of x and y variables
#' Confidence intervals are created to measure the level of confidence in the trend.
#' The slope and intercept of the resulting model are recorded and returned.
#' Things to take notice:
#' 1. y and x variables need to have the same length
#' 2. Theoretically saying, negbin is the most versatile among the three trend tests.
#' The preferred condition to use negbin function is when there is neither an
#' excess amount of zeros, nor can the trend be calculated by Mann-Kendall test.
#' For those conditions, consider using the two other functions instead.
#'
#' @export
negbin <- function(y_var, x_var){
model <- tryCatch(glm.nb(y_var~x_var),
error=function(e){return("A")})
low<-ifelse(is.character(model), NA, exp(confint(profile(model), level=0.9))[2,1])
hi<-ifelse(is.character(model), NA, exp(confint(profile(model), level=0.9))[2,2])
if (!is.na(low)&!is.na(hi)){
# confidence test @ 70% and 90% confidence
pass <- ifelse(all(low<=1, hi>=1), "Maybe?", "Confident")
if (pass=="Maybe?"){
low <- exp(confint(profile(model), level=0.7))[2,1]
hi <-exp(confint(profile(model), level=0.7))[2,2]
pass <- ifelse(all(low<=1, hi>=1),"Uncertain", "Likely")
}
# Get slope from negative binomial
fitted <- unname(model$fitted.values)
slope <- round((fitted[length(fitted)] - fitted[1])/
(max(x_var) - min(x_var)),2)
intercept <- round((fitted[1]-min(x_var)*((fitted[length(fitted)] - fitted[1])/
(max(x_var) - min(x_var)))),2)
years.for.trend <- sum(!is.na(y_var))
data.frame(slope=slope,intercept=intercept,CATTrend=pass,years.for.trend=years.for.trend)
}else{
"Error: Data cannot be fitted using negative binomial model"
data.frame(slope=NA,intercept=NA,CATTrend=NA,years.for.trend=NA)
}
}
################################################################################
#' bday_calculation
#'
#' @description
#' bday_calculation calculates the last and first bday for specified years from
#' the hydrometric record at a station.
#'
#' @param station a character string representing the station number that exists
#' in hydat
#' @param year a vecter of numbers representing the list of years used for calculating
#' bdays
#'
#' @return a dataframe of the format || station || year || lastbday || firstbday ||
#'
#' @details
#' bdays are days in the HYDAT hydrometric database with data symbol "B", which
#' represent ice conditions.
#'
#' @export
bday_calculation <- function(station, year="all"){
flow.daily <- hy_daily_flows(station)
flow.daily$Year <- as.numeric(format(flow.daily$Date, "%Y"))
flow.daily$Month<- as.numeric(format(flow.daily$Date, "%m"))
if (year=="all"){
valid_year=unique(flow.daily$Year)
}else if(any(!year %in% flow.daily$Year)){
return("Some of the year provided are not in HYDAT")
valid_year=year[(year %in% unique(flow.daily$Year))]
}else{
valid_year=year
}
first_vec = c()
last_vec = c()
for (i in valid_year){
print(i)
# First B day
flow.late <- flow.daily %>% filter(Year==i & Month > 8)
firstbday <- flow.late %>% filter(Symbol == "B", Month >= 10) %>% head(1)
firstbday.date <- ifelse(nrow(firstbday)>0, substring(firstbday$Date,1,10), NA)
firstbday.long <- ifelse(is.na(firstbday.date),paste0(i,"-10-01"),
ifelse(firstbday.date > as.Date(paste0(i, "-10-01")),
paste0(i, "-10-01"),firstbday.date))
first_vec = c(first_vec, firstbday.long)
# Last B day
flow.early <- flow.daily %>% filter(Year == i & Month < 6)
lastbday <- flow.early %>% filter(Symbol == "B", Month < 6) %>% tail(1)
lastbday.date <- ifelse(nrow(lastbday)>0, substring(lastbday$Date,1,10), NA)
lastbday.long<- ifelse(is.na(lastbday.date),paste0(i,"-05-01"),
ifelse(lastbday.date < as.Date(paste0(i, "-05-01")),
paste0(i,"-05-01"),lastbday.date))
last_vec = c(last_vec, lastbday.long)
}
ret = data.frame(Station=rep(station, length(valid_year)),
Year = valid_year,
LastBday = last_vec,
FirstBday = first_vec)
return(ret)
}
################################################################################
#' Threshold_Build
#'
#' @description
#' Threshold_Build calculates streamflows at high and low percentiles identified
#' as thresholds (hi & low) based on streamflow during a reference period, between
#' year_from and year_to, for a list of stations. Only streamflow during the ice
#' free period of the reference period, as identified with the bday_calculation
#' function, is used in the calculation of the lower threshold.
#'
#'
#' @param stations a vector of characters indicating the list of stations we want
#' to calculate the thresholds for
#' @param yfrom a number indicating the year threshold calculation starts,
#' defaulted to 1981;
#' @param yto a number indicating the year threshold calculation ends, defaulted
#' to 2010;
#' @param hi a number indicating the upper threshold, defaulted to 0.95;
#' @param low a number indicating the lower threshold, defaulted to 0.05
#' @param write_csv a boolean value indicating whether or not the output will be
#' written to a csv file, defaulted to FALSE;
#'
#' @return
#' A dataframe of the form || station || UpperThreshold || LowerThreshold ||
#'
#' @export
Threshold_Build <- function(stations, yfrom=1981, yto=2010, hi=0.95, low=0.05,
write_csv=FALSE){
Upper = c()
Lower = c()
for (i in 1:length(stations)){
stn.id <- stations[i]
print(stn.id)
flow.daily <- hy_daily_flows(stn.id)
flow.daily$Year = as.numeric(substr(flow.daily$Date, 1, 4))
ref <- flow.daily %>% filter(Year>=yfrom, Year<=yto)
summ = ref %>% group_by(Year) %>% summarise(count=length(Value[!is.na(Value)]))
if (length(summ$Year)<20 | length(summ$count[summ$count>=150])<length(summ$count)){
Upper=append(Upper, NA)
Lower=append(Lower, NA)
print(paste0(stn.id, "---not enough data"))
}else{
Upper_All=ref$Value
for (i in 1:length(Upper_All)){
if (is.na(Upper_All[i])){
Upper_All[i]=0
}
}
yr=summ$Year
num_leap_yrs=length(yr[as.numeric(yr)%%4==0])
total_length=length(yr)*365+num_leap_yrs
Upper_All=append(Upper_All, rep.int(0, total_length-length(Upper_All)))
Lower_All=c()
# First B day
flow.daily$Month = as.numeric(format(flow.daily$Date, "%m"))
for (j in 1:length(summ$Year)){
year.op=summ$Year[j]
fbday=bday_calculation(stn.id, year.op)
lastbday = (fbday %>% filter(Year==year.op))[["LastBday"]]
firstbday = (fbday %>% filter(Year==year.op))[["FirstBday"]]
Value_within=(ref %>% filter(as.Date(Date)>=as.Date(lastbday)&a
s.Date(Date)<=as.Date(firstbday)))$Value
Value_within=Value_within[Value_within!=0]
Lower_All=append(Lower_All, Value_within)
}
Upper=append(Upper,quantile(Upper_All, probs = hi, names = FALSE, na.rm = TRUE))
Lower=append(Lower,quantile(Lower_All, probs = low, names = FALSE, na.rm = TRUE))
print(stn.id)
}
}
Threshold = data.frame(STATION_NUMBER = stations, Q_Upper=Upper, Q_Lower=Lower)
if (write_csv){
write.csv(Threshold, file = "./Output/Threshold_New.csv", row.names = FALSE)
}
return(Threshold)
}
################################################################################
#' percentile.ranked
#'
#' @description
#' Calculates the percentage of values in a vector smaller than value
#'
#' @param a.vector a vector of any comparable data type, ideally numeric;
#' @param value a comparable data type to the components in a.vector, ideally
#' numeric
#'
#' @return the percentage of values within the vector that are smaller than the
#' specified value
#'
#' @export
percentile.ranked <- function(a.vector, value) {
numerator <- length(sort(a.vector)[a.vector < value])
denominator <- length(a.vector)
round(numerator/denominator,2)*100
}
################################################################################
#' Get_ann_mean_flow
#'
#' @description
#' Get_ann_mean_flow calculates the annual mean flow based at a station for
#' specified years.
#'
#' @param station a character string indicating the station number to calculate
#' the annual mean flow
#' @param year a vector indicating the list of years when the annual mean flow is
#' calculated. Default is 'all' indicating all recorded years at the station
#'
#' @return
#' a dataframe of the form || station || Year || annual mean flow ||
#'
#' @export
Get_ann_mean_flow <- function(station, year='all'){
ret=data.frame()
mean_flow = c()
flow.daily <- hy_daily_flows(station)
flow.daily$Year <- as.numeric(format(flow.daily$Date, "%Y"))
if (year=="all"){
valid_year=unique(flow.daily$Year)
}else if(any(!year %in% flow.daily$Year)){
return("Some of the year provided are not in HYDAT")
valid_year=year[(year %in% unique(flow.daily$Year))]
}else{
valid_year=year
}
for (i in valid_year){
flow.dates = flow.daily[flow.daily$Year==i,]
if (nrow(flow.dates %>% filter(!is.na(Value)))<150){
ann_mean_flow = NA
}else{
if (nrow(flow.dates%>%filter(!is.na(Value))) > 0.9 *
ifelse(flow.dates$Year[1]%%4==0, 366, 365)){
ann_mean_flow <- flow.dates %>% mutate(mean_flow = mean(Value, na.rm = TRUE))
ann_mean_flow <- ann_mean_flow[1, "mean_flow"] %>% round(2)
ann_mean_flow <- ann_mean_flow$mean_flow
} else {
Mode <- flow.daily %>% group_by(Year) %>% filter(!is.na(Value)) %>%
summarise(MODE=names(sort(table(Value), decreasing = TRUE)[1]))
Mode.multi <- mean(as.numeric(Mode$MODE))
if (Mode.multi <0.01){
ann_mean_flow <- flow.dates %>% filter(!is.na(Value)) %>% group_by(Year) %>%
summarise(TOTAL=sum(Value)/n())
ann_mean_flow <- round(ann_mean_flow$TOTAL,2)
} else {
ann_mean_flow <- NA
}
}
}
mean_flow <- c(mean_flow, ann_mean_flow)
}
print(station)
ret = data.frame(STATION_NUMBER=rep(station, length(mean_flow)),
Year = valid_year,
ann_mean_flow = mean_flow)
return(ret)
}
################################################################################
#' Get_flood_metrics
#'
#' @description
#' Get_flood_metrics calculates four flood metrics at a specified hydrometric station
#' for a range of years specified:
#' pot_threshold: the upper threshold calculated in Threshold_Build function;
#' pot_days: days with streamflow over the threshold within a year;
#' pot_events: independent flood events over a year;
#' pot_max_dur: the maximum consecutive days over the threshold within a year
#' The four metrics are recorded in a table and returned, along with the maximum
#' flow in each year.
#'
#' @param station a character string representing a station we want the flood
#' metrics to be calculated
#' @param year a vector representing the list of years for which we want the flood
#' metrics to be calculated. Defaulted to 'all', indicating all the years with
#' data at the station
#'
#' @return
#' A dataframe of the form || STATION_NUMBER || Year ||pot_threshold || pot_days
#' || pot_events || pot_max_dur || X1_day_max ||
#'
#' @details
#' POT stands for peaks-over-threshold.
#' Note that there are requirements regarding the independence conditions for
#' pot_event metric; we implement a check for the correlation between flood events.
#' Referring to Lang et al. (1999), we set two conditions:
#' 1) R > 5+log(A/1.609^2); and
#' 2) Xmin < 0.75 * min(Xi, Xj).
#' where: R is the time in days between two peaks
#' A is the watershed area in squared kilometers
#' Xmin is the minimum flow between the adjacent flow peaks Xi and Xj
#' In order for two adjacent peaks to be considered as independent flood events,
#' both conditions need to be met. If either of the conditions fail, we consider
#' the two events as the same one. R is defined within the iteration.
#'
#' @references
#' Slater et al.(2016). On the impact of gaps on trend detection un extreme
#' streamflow time series. Int.J.climatol. 37:3976-3983(2017). doi: 10.1002/joc.4954
#'
#'#' @export
Get_flood_metrics <- function(station, year='all'){
ret = data.frame()
flow.daily <- hy_daily_flows(station)
flow.daily$Year <- as.numeric(format(flow.daily$Date, "%Y"))
if (year=="all"){
valid_year=unique(flow.daily$Year)
}else if(any(!year %in% unique(flow.daily$Year))){
return("Some of the year provided are not in HYDAT")
valid_year=year[(year %in% unique(flow.daily$Year))]
}else{
valid_year=year
}
station_row = Threshold_Build(station)
max_flow = c()
pot_days = c()
pot_events = c()
pot_threshold = c()
pot_max_dur = c()
hydat_info <- hy_stations(station)
stn_area <- hydat_info$DRAINAGE_AREA_GROSS
R <- 5 + log(stn_area/(1.609^2))%>%round(1)
for (yr in valid_year){
flow.dates = flow.daily %>% filter(Year == yr)
# do not calculate values for years with less than 150 days
if (any((nrow(flow.dates %>% filter(!is.na(Value)))<150),is_empty(flow.dates$Date))){
pot_days = c(pot_days, NA)
pot_events =c(pot_events, NA)
pot_threshold = c(pot_threshold, NA)
pot_max_dur = c(pot_max_dur, NA)
max_flow <- c(max_flow,NA)
}else{
max.flow <- flow.dates %>% arrange(desc(Value)) %>% slice(1)
max.flow <- round(max.flow[["Value"]], digits = 2)
max_flow <- c(max_flow,max.flow)
if (is.na(station_row$Q_Upper)){
pot_days = c(pot_days, NA)
pot_events =c(pot_events, NA)
pot_threshold = c(pot_threshold, NA)
pot_max_dur = c(pot_max_dur, NA)
}else{
threshold <- as.numeric(station_row$Q_Upper)
# POT function triggers 'computational singularity' warning or error when threshold >=10
# This factor scales inputs and one of the outputs below to avoid the singularity.
if (max.flow<threshold){
pot_days = c(pot_days, 0)
pot_events =c(pot_events, 0)
pot_threshold = c(pot_threshold, threshold)
pot_max_dur = c(pot_max_dur, 0)
}else{
factor <- case_when( threshold >= 1000 ~ 1000,
threshold >= 100 ~ 100,
threshold >= 10 ~ 10,
threshold >= 0 ~ 1)
pot2 <- possibly(evir::pot, otherwise = NA)
pot <- pot2(flow.dates$Value[!is.na(flow.dates$Value)]/factor, threshold/factor)
if (!is.list(pot)) {
pot_days = c(pot_days, NA)
pot_events =c(pot_events, NA)
pot_threshold = c(pot_threshold, NA)
pot_max_dur = c(pot_max_dur, NA)
} else {
pot_days =c(pot_days, length(pot$data))
pot.dates <- data.frame(dates = attr(pot$data, "times"))
pot.dates$lag <- pot.dates$dates - lag(pot.dates$dates, 1)
pot_threshold = c(pot_threshold, threshold)
seq_blocks <- split(pot.dates$dates, cumsum(c(TRUE, diff(pot.dates$dates)!=1)))
# We iterate within seq_blocks to find maximum flow within each events, before testing
# their independence
events_count <- c()
for (a in 1:length(seq_blocks)){
events_count <- append(events_count, length(seq_blocks[[as.character(a)]]))
}
events_array <- c()
for (b in 1:length(events_count)){
c <- events_count[b]
while (c > 0){
events_array<-append(events_array,b)
c <- c-1
}
}
pot_dat <- data.frame(data = pot$data, dates=attr(pot$data, "times"),event_num=events_array)
max_peak <- pot_dat %>% group_by(event_num) %>% summarise(max=max(data))
max_peak_central<-c()
for (d in 1:length(max_peak$max)){
max_peak_dates<-(pot_dat %>% filter(data==max_peak$max[d],dates>=seq_blocks[[d]][1],
dates<=seq_blocks[[d]][length(seq_blocks[[d]])]))$dates
if (length(max_peak_dates)%%2==1){
max_peak_dates=max_peak_dates[(length(max_peak_dates)+1)/2]
} else {
max_peak_dates=(max_peak_dates[length(max_peak_dates)/2]+
max_peak_dates[length(max_peak_dates)/2+1])/2
}
max_peak_central=append(max_peak_central, max_peak_dates)
max_peak_dates=c()
}
max_peak$central_dates<-max_peak_central
R_vec<-c()
for (t in 1:length(max_peak$max)){
R_vec<-append(R_vec, R)
}
max_peak$R_vec<-R_vec
Xmin_vec<-c()
if (length(max_peak$max)==1){
Xmin_vec<-append(Xmin_vec, NA)
}else{
for( e in 1:(length(max_peak$max)-1)){
Xmin_vec<-append(Xmin_vec,
0.75*min(max_peak$max[e],max_peak$max[e+1])*factor)
}
Xmin_vec<-append(Xmin_vec, NA)
}
max_peak$Xmin_vec<-Xmin_vec
ind_events<-c()
if (length(seq_blocks)>1){
for (j in 1:(length(seq_blocks)-1)){
range_flow <- flow.dates %>% filter(Date>=as.Date(max_peak$central_dates[j], origin=paste0(yr, "-01-01")),
Date<=as.Date(max_peak$central_dates[j+1], origin=paste0(yr, "-01-01")))
if (all(min(range_flow$Value, na.rm=TRUE)<Xmin_vec[j],
((max_peak$central_dates[j+1] - max_peak$central_dates[j])>R))){
ind_events <- append(ind_events, 1)
}else{
ind_events <- append(ind_events, 0)
}
}
ind_events <- append(ind_events, 1)
} else {
ind_events <- c(1)
}
max_peak$ind_events<-ind_events
pot_events <- c(pot_events, sum(ind_events))
max_dur_count <- c()
count<-0
for (f in 1:length(ind_events)){
if (ind_events[f]==1 | is.na(ind_events[f])){
max_dur_count = append(max_dur_count, count + events_count[f])
count = 0
} else { # i.e. in_events[i] == 0
count <- count + events_count[f]}
}
pot_max_dur = c(pot_max_dur, max(max_dur_count))
}
}
}
}
}
ret = data.frame(STATION_NUMBER=rep(station,length(valid_year)),
Year=valid_year,
pot_threshold=pot_threshold,
pot_days=pot_days,
pot_events=pot_events,
pot_max_dur=pot_max_dur,
X1_day_max=max_flow)
return(ret)
}
################################################################################
#' Get_drought_metrics
#'
#' @description
#' The function calculates four drought metrics at a specified hydrometric station
#' for a range of years specified:
#' dr_threshold: the lower threshold calculated in Threshold_Build function;
#' dr_days: days with streamflow below the threshold within a year;
#' dr_events: independent drought events over a year;
#' dr_max_dur: the maximum consecutive days under the threshold within a year;
#' The four metrics are recorded in a table and returned, along with the 7-day
#' minimum flow in each year.
#'
#' @param station a character string representing a station where we want the drought
#' metrics to be calculated
#' @param year a vector representing the list of years for which we want the drought
#' metrics to be calculated. Defaulted to 'all', indicating all the years with data
#' at the station
#'
#' @return
#' A dataframe of the form || STATION_NUMBER || Year || dr_threshold || dr_days ||
#' dr_events || dr_max_dur || X7_day_min ||
#'
#' @details
#' The four metrics are calculated only between the last and first bday of each
#' year, as calculated with the bday_calculation function.
#' In order to ensure independence between drought events, we set the following
#' conditions:
#' 1) t >= 5; and
#' 2) the excess volume, Vabove / min(Vbelowi, Vbelowj)>=0.1.
#' where: t is the time between events in days
#' Vabove is the volume exceeding the threshold
#' Vbelowi and Vbelowj are the volumes below the threshold during
#' events i and j.
#' For two adjacent troughs to be considered as independent drought events,
#' both conditions need to be met. If either of the conditions fail, we consider
#' the two events as the same one.
#'
#' @references
#' WMO World Meteorological Organization (2008). Manual on Low-Flow Estimation
#' and Prediction. 1339 Operational Hydrology.
#'
#' @export
Get_drought_metrics <- function(station, year='all'){
ret = data.frame()
flow.daily <- hy_daily_flows(station)
flow.daily$Year <- as.numeric(format(flow.daily$Date, "%Y"))
if (year=="all"){
valid_year=unique(flow.daily$Year)
}else if(any(!year %in% unique(flow.daily$Year))){
return("Some of the year provided are not in HYDAT")
valid_year=year[(year %in% unique(flow.daily$Year))]
}else{
valid_year=year
}
station_row = Threshold_Build(station)
bday = bday_calculation(station, valid_year)
min7days = c()
dr_days = c()
dr_events = c()
dr_threshold = c()
dr_max_dur = c()
for (yr in valid_year){
bd=bday %>% filter(Year==yr)
flow.dates = flow.daily %>% filter(Year == yr)
dates_value=flow.dates$Value
# do not calculate values for years with less than 150 days of data
if (nrow(flow.dates %>% filter(!is.na(Value)))<150){
min7days <- c(min7days,NA)
dr_days <- c(dr_days,NA)
dr_events <- c(dr_events,NA)
dr_max_dur <- c(dr_max_dur,NA)
dr_threshold <- c(dr_threshold,NA)
}else{
if (is.na(flow.dates$Value[1])){
front_ind=0
while(is.na(dates_value[1])){
dates_value=dates_value[2:length(dates_value)]
front_ind=front_ind+1
}
date_ref_front=flow.dates$Date[front_ind+1]
flow.dates=flow.dates %>% filter(as.Date(Date)>=as.Date(date_ref_front))
}
if (is.na(dates_value[length(dates_value)])){
end_ind=0
while(is.na(dates_value[length(dates_value)])){
dates_value=dates_value[1:(length(dates_value)-1)]
end_ind=end_ind+1
}
date_ref_end=flow.dates$Date[length(flow.dates$Value)-end_ind]
flow.dates=flow.dates %>% filter(as.Date(Date)<=as.Date(date_ref_end))
}
date_vec<-flow.dates$Date
flow.new<-flow.dates%>%mutate(Date=seq(1, n()))%>%mutate(Value=approx(Date,Value,Date)$y)
flow.new$Date<-date_vec
zooflow <- zoo(flow.new$Value, flow.new$Date)
# calculate 7-day minimum flow
zooflow.summ <- window(zooflow, start = as.Date(bd$LastBday),
end = as.Date(bd$FirstBday))
daymean7summ <- rollmean(zooflow.summ, 7)
daymin7summ <- which.min(daymean7summ)
if (length(daymin7summ) > 0){
min7days <- c(min7days,round(daymean7summ[[daymin7summ]],2))
} else {
min7days <- c(min7days,NA)
}
# if loop to stop calculations if there is no threshold
if (is.na(station_row$Q_Lower)){
dr_days = c(dr_days, NA)
dr_events =c(dr_events, NA)
dr_threshold = c(dr_threshold, NA)
dr_max_dur = c(dr_max_dur, NA)
}else{
if (length(zooflow.summ) > 60){
low <- as.numeric(station_row$Q_Lower)
data.s <- data.frame(day= as.numeric(substring(flow.new[["Date"]], 9,10)),
month=as.numeric(substring(flow.new[["Date"]],6,7)),
year=yr,flow=flow.new$Value, hyear=yr,
baseflow=NA,
date=paste0(yr,"-",substring(flow.new[["Date"]],6,11)))
data.s <- data.s %>% filter((as.Date(data.s$date) >= as.Date((bd[,3])[1])) &
(as.Date(data.s$date) <= as.Date((bd[,4])[1])))
data.lf <- createlfobj(data.s)
flowunit(data.lf) <- "m^3/s"
drought <- find_droughts(data.lf, threshold = low)
summary2 <- possibly(base::summary, otherwise = data.frame(event.no=NULL))
duration<-summary2(drought)
if (any(duration[1,"event.no"] == 0, nrow(duration)==0)){
dr_days = c(dr_days,0)
dr_events = c(dr_events, 0)
dr_max_dur =c(dr_max_dur, 0)
} else {
dr_days <- c(dr_days,sum(duration$duration))
vol<-c()
for (i in 1:length(drought$def.increase)){
vol<-append(vol, drought$def.increase[[i]])
}
drevent_count<-c()
zero_count<-c()
for ( i in 1:length(drought$event.no)){
drevent_count<-append(drevent_count, drought$event.no[[i]])
if (drought$event.no[[i]]==0){
zero_count<-append(zero_count, i)
}
}
# account for fact some years start with drought
# if (drought$event.no[1]==1) {
# ind <- 1
# } else {
ind<-0
# }
if (length(zero_count)==0){ #i event that last for the entire period
dr_events=c(dr_events,1)
dr_max_dur=c(dr_max_dur,duration$duration)
} else {
if (length(zero_count)==1){
drevent_count[zero_count[1]]<-"0_1"
} else {
for ( i in 1:(length(zero_count)-1)){
if ((zero_count[i]+1)==zero_count[i+1]){
drevent_count[zero_count[i]]<-paste0(as.character(ind), "_", as.character(ind+1))
} else {
drevent_count[zero_count[i]]<-paste0(as.character(ind), "_", as.character(ind+1))
ind<-ind+1
}
drevent_count[zero_count[length(zero_count)]]<-paste0(as.character(ind), "_", as.character(ind+1))
}
}
dr_filter<-data.frame(volume=vol, dr_event_count=drevent_count)
dr_summary<-dr_filter %>% group_by(dr_event_count) %>% summarise(sum=sum(volume))
dr_ind_events<-c()
if (nrow(duration)>1){
for ( i in 1:(nrow(duration)-1)){
t <- as.numeric(duration$start[i+1]-duration$end[i])
v_above<-(dr_summary %>% filter(dr_event_count==paste0(as.character(i), "_",
as.character(i+1))))$sum
# account for cases where v_above can't be calculated
if (any(is.na(v_above),is_empty(v_above))) {
dr_ind_events<-append(dr_ind_events, 0)
} else {
if (all(t>=5, abs(v_above/min(duration$volume[i], duration$volume[i+1]))>=0.1)){
dr_ind_events<-append(dr_ind_events, 1)
} else {
dr_ind_events<-append(dr_ind_events, 0)
}
}
}
dr_ind_events<-append(dr_ind_events, 1)
} else {
dr_ind_events<-c(1)
}
dr_events <- c(dr_events,sum(dr_ind_events))
dr_max_dur_count <- c()
dr_count<-0
for (i in 1:length(dr_ind_events)){
if (dr_ind_events[i]==1){
dr_max_dur_count <- append(dr_max_dur_count, dr_count + duration$duration[i])
dr_count <- 0
} else { # i.e. in_events[i] == 0
dr_count <- dr_count + duration$duration[i]
}
}
dr_max_dur <- c(dr_max_dur,max(dr_max_dur_count))
}
}
dr_threshold <- c(dr_threshold,(round(station_row$Q_Lower,2)))
} else {
dr_days <- c(dr_days,NA)
dr_events <- c(dr_events,NA)
dr_max_dur <- c(dr_max_dur,NA)
dr_threshold <- c(dr_threshold,NA)
}
}
}
}
ret = data.frame(STATION_NUMBER=rep(station,length(valid_year)),
Year=valid_year,
dr_threshold=dr_threshold,
dr_days=dr_days,
dr_events=dr_events,
dr_max_dur=dr_max_dur,
X7_day_min=min7days)
return(ret)
}
################################################################################
#' chk.hydat
#'
#' @description
#' Function to define where to look for HYDAT database, and import it if necessary,
#' so it can be accessed through functions from the tidyhydat package.
#'
#' @param hydat.path a character string indicating the path for the HYDAT sqlite3
#' file relative to the current working directory. Defaulted to NULL, in which
#' case the file will be in the folder "./Dependencies"
#'
#' @return
#' downloading the latest version of the HYDAT database into a folder and setting
#' the database path
#'
#' @export
chk.hydat <- function(hydat.path = NULL){
# Find the current working directory, check if it has a folder for "Dependencies",
# and if not, create it.
main_dir <- getwd()
hydat.path <- "./Dependencies"
if(length( grep("Dependencies", list.files(main_dir)))==0){
dir.create(file.path(main_dir, hydat.path))
}
hy_file <- paste0(hydat.path,"/Hydat.sqlite3")
# Check if the HYDAT database is already in the dependencies folder, and if not,
# download it.
if(length( grep("Hydat.sqlite", list.files(hydat.path)))==0){
print(paste("No hydat database found in", hydat.path))
print(paste("It will be downloaded to", hydat.path))
download_hydat(dl_hydat_here = hydat.path)
} else {
print(paste("Hydat database found in", hydat.path))
}
# set the path to database
hy_set_default_db(hydat_path = hy_file)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.