#' Flow metrics
#'
#' @param data Input data
#'
#' @export
#'
#' @importFrom magrittr "%>%"
#'
#' @examples
#' sampdat <- phabformat(sampdat)
#' flow(sampdat)
flow <- function(data){
# This function is used in calculation of FL_Q_F
calcDistances = function(vector) {
nreps = length(vector) # number of replicates
res <- array() # result
if(nreps != 1) {
for (i in 1:nreps) {
if (i == 1) {
res[1] <- (vector[2] - vector[1]) / 2
} else if (i == nreps){
res[nreps] <- (vector[nreps] - vector[nreps - 1]) / 2
} else {
res[i] <- (vector[i+1] - vector[i-1]) / 2
}
}
} else {
res[1] <- vector[1]
}
return(as.vector(res))
}
# Subset the data to the necessary analytes. Otherwise the code breaks when we do the tidyr::spread
# Subset the data to the necessary analytes. Otherwise the code breaks when we do the tidyr::spread
data <- data[which(data$AnalyteName %in% c('Distance from Bank', 'StationWaterDepth', 'Velocity', 'Distance, Float', 'Float Time', 'Wetted Width')),]
# We just knock out all the flow metrics is one go and put in in this variable called FlowMetrics
# print("We just knock out all the flow metrics is one go and put in in this variable called FlowMetrics")
FlowMetrics <- data %>%
dplyr::group_by(id) %>%
tidyr::nest() %>%
dplyr::mutate(
FL_Q_F.result = purrr::map(data, function(df){
# print("FL_Q_F.result")
# FL_Q_F is only for LocationCode X. Stations that didn't use the Velocity Area method will have zero rows
# after subsetting it this way
# print("FL_Q_F is only for LocationCode X")
df <- df[df$LocationCode == 'X',] %>% dplyr::select(-c(UnitName, VariableResult))
# If they didn't use velocity area method, return NA
# print("If they didn't use velocity area method, return NA")
if (df %>% nrow() == 0) {return(NA)}
# Dealing with factors
# print("Dealing with factors")
df$Result <- as.numeric(as.character(df$Result))
# Spreading the data makes the calculation easier here
# print("Spreading the data makes the calculation easier here")
df <- df %>% tidyr::spread(key = AnalyteName, value = Result)
# Have to make sure Replicate is a number. Otherwise it doesn't get ordered correctly when we arrange
# print("Have to make sure Replicate is a number. Otherwise it doesn't get ordered correctly")
df$Replicate <- as.numeric(as.character(df$Replicate))
df <- dplyr::arrange(df, Replicate)
return(sum(calcDistances(df[['Distance from Bank']]) * df$StationWaterDepth * 0.001076 * df$Velocity))
}),
FL_Q_F.count = purrr::map(data, function(df){
# print("FL_Q_F.count")
df <- df[df$LocationCode == 'X',] %>% dplyr::select(-c(UnitName, VariableResult))
if (df %>% nrow() == 0) {return(NA)}
df$Result <- as.numeric(as.character(df$Result))
df <- df %>% tidyr::spread(key = AnalyteName, value = Result)
df$Replicate <- as.numeric(as.character(df$Replicate))
df <- dplyr::arrange(df, Replicate)
# This is how I will get the number of non null observations that went into the calculation.
# I'm 99% sure that there is a more efficient way of doing this
return(sum(!is.na(calcDistances(df[['Distance from Bank']]) * df$StationWaterDepth * 0.001076 * df$Velocity)))
}),
FL_Q_M.result = as.numeric(as.character(FL_Q_F.result)) / 35.32, # convert Feet to Meters
FL_Q_M.count = FL_Q_F.count,
FL_N_M.result = purrr::map(data, function(df){
# This code is for those stations that used the Neutral Buoyant Object Method rather than velocity area
# print("This code is for those stations that used the Neutral Buoyant Object Method rather than velocity area")
df <- df %>%
dplyr::filter(MethodName == 'Neutral Buoyant Object') %>%
dplyr::mutate(
# Create a new column called transect that allows us to group on Upper, Middle and Lower
transect = stringr::str_extract(LocationCode, "Upper|Middle|Lower")
)
# If they didn't use this method, return NA
if (df %>% nrow() == 0) {return(NA)}
# I don't know what I should comment here, to be honest. The instructions are somewhat complex
# print("Calculating areas")
area_dataframe <- df %>%
dplyr::filter(AnalyteName %in% c('StationWaterDepth','Wetted Width')) %>%
dplyr::group_by(transect) %>%
tidyr::nest() %>%
dplyr::mutate(
areas = purrr::map(data, function(subdf){
mean(as.numeric(as.character(subdf[which(subdf$AnalyteName == 'StationWaterDepth'),]$Result)) / 100) * as.numeric(as.character(subdf[which(subdf$AnalyteName == 'Wetted Width'),]$Result))
})
) %>% dplyr::select(-data) %>%
tidyr::unnest()
avg_area <- mean(area_dataframe$areas, na.rm = T)
# print("Calculating velocities")
velocity_dataframe <- df %>%
dplyr::filter(AnalyteName %in% c('Float Time','Distance, Float')) %>%
dplyr::group_by(Replicate) %>%
tidyr::nest() %>%
dplyr::mutate(
velocities = purrr::map(data, function(subdf){
as.numeric(as.character(subdf[which(subdf$AnalyteName == 'Distance, Float'),]$Result)) / as.numeric(as.character(subdf[which(subdf$AnalyteName == 'Float Time'),]$Result))
})
) %>% dplyr::select(-data) %>%
tidyr::unnest()
avg_velocity <- mean(velocity_dataframe$velocities, na.rm = T)
return(avg_area * avg_velocity)
}),
FL_N_F.result = as.numeric(as.character(FL_N_M.result)) * 35.32, # Feet to Meters
XWV_F.result = purrr::map(data, function(df){
# print("XWV_F.result")
df <- df %>% dplyr::filter(LocationCode == 'X', AnalyteName == 'Velocity')
if (df %>% nrow() == 0) {return(NA)}
return(mean(df$Result, na.rm = T))
}),
XWV_F.count = purrr::map(data, function(df){
# print("XWV_F.count")
df <- df %>% dplyr::filter(LocationCode == 'X', AnalyteName == 'Velocity')
if (df %>% nrow() == 0) {return(NA)}
return(sum(!is.na(df$Result)))
}),
XWV_F.sd = purrr::map(data, function(df){
# print("XWV_F.sd")
df <- df %>% dplyr::filter(LocationCode == 'X', AnalyteName == 'Velocity')
if (df %>% nrow() == 0) {return(NA)}
return(round(sd(df$Result, na.rm = T), 3))
}),
XWV_M.result = as.numeric(as.character(XWV_F.result)) / 3.28, # Feet to Meters
XWV_M.count = XWV_F.count, # The counts for both metrics will be the same
XWV_M.sd = purrr::map(data, function(df){
# print("XWV_M.result")
df <- df %>% dplyr::filter(LocationCode == 'X', AnalyteName == 'Velocity')
if (df %>% nrow() == 0) {return(NA)}
return(round(sd(df$Result / 3.28, na.rm = T), 3))
}),
MWVM_F.result = purrr::map(data, function(df){
# print("MWVM_F.result")
df <- df %>% dplyr::filter(LocationCode == 'X', AnalyteName == 'Velocity')
if (df %>% nrow() == 0) {return(NA)}
return(max(df$Result, na.rm = T))
}),
MWVM_F.count = XWV_M.count, # Count is same. Metric is ran on same set of numbers.
#We're just taking Max instead of Mean,
MWVM_M.result = as.numeric(as.character(MWVM_F.result)) / 3.28,
MWVM_M.count = XWV_M.count,
PWVZ.result = purrr::map(data, function(df){
# print("PWVZ.result")
df <- df %>% dplyr::filter(LocationCode == 'X', AnalyteName == 'Velocity')
if (df %>% nrow() == 0) {return(NA)}
return(sum(df$Result == 0, na.rm = T) * 100 / sum(!is.na(df$Result)))
}),
PWVZ.count = XWV_M.count
) %>% dplyr::select(-data) %>%
tidyr::unnest() %>%
as.data.frame(stringsAsFactors = FALSE) %>%
tibble::column_to_rownames('id')
# print("round appropriately")
FlowMetrics$FL_Q_F.result <- FlowMetrics$FL_Q_F.result %>% round(3)
FlowMetrics$FL_Q_M.result <- FlowMetrics$FL_Q_M.result %>% round(3)
FlowMetrics$FL_N_F.result <- FlowMetrics$FL_N_F.result %>% round(3)
FlowMetrics$FL_N_M.result <- FlowMetrics$FL_N_M.result %>% round(3)
FlowMetrics$MWVM_F.result <- FlowMetrics$MWVM_F.result %>% round(1)
FlowMetrics$MWVM_M.result <- FlowMetrics$MWVM_M.result %>% round(1)
FlowMetrics$XWV_M.result <- FlowMetrics$XWV_M.result %>% round(2)
FlowMetrics$XWV_F.result <- FlowMetrics$XWV_F.result %>% round(2)
FlowMetrics$PWVZ.result <- FlowMetrics$PWVZ.result %>% round(1)
result <- FlowMetrics
return(result)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.