#' Summarise z values over binned wind data.
#'
#' Input data should be original unbinned data.
#' 1-dimensional binning and calculating summary statistics over wind direction and/or wind velocity bins, respectively.
#' NA values in z and in ws, wd (after cutting) will be silently removed before applying functions
#'
#' @param data a data.frame or tibble containing the data (wide format)
#' @param ws NULL or symbol giving the wind velocity parameter name (wind velocity preferably in m/s)
#' @param wd symbol giving the wind direction parameter name in degrees
#' @param z symbol giving the parameter name to be summarised
#' @param groupings additional groupings. Use helper [grp()] to create
#' @param fun function or list of functions for summary.
#' @param fun.args a list of extra arguments passed on to fun.
#' @param nmin numeric, minimum number of values for fun, if n < nmin: NA is returned
#' @param wd_cutfun function, cut function for wind direction (to create bins). See [cut_wd.fun()] for options.
#' @param ws_cutfun function, cut function for wind speed. See [cut_ws.fun()] for examples
#'
#' @return a tibble with summarised data
#'
#' @section Computed variables:
#'
#' The names of the columns stay the same as in `data` (for the arguments "ws", "wd" and "z").
#'
#' * binned column `ws` if `is.null(ws) == FALSE`
#' * binned column `wd`
#' * factor column `stat` containing the name of the summarize function as level
#' * column `z` with the result of the summarize function
#' * `n` count of occurrences for the corresponding bin
#' * `freq` frequency of the corresponding bin
#'
#' @export
#'
#' @examples
#' library(ggplot2)
#' fn <- rOstluft.data::f("Zch_Stampfenbachstrasse_2010-2014.csv")
#' data <- rOstluft::read_airmo_csv(fn)
#' data <- rOstluft::rolf_to_openair(data)
#' data <- dplyr::mutate(data, year = lubridate::year(date))
#'
#' summary_wind(data, "ws", "wd", "NO2")
#'
#' # multiple stats: Pass function, by name, reference, as function or one sided formula
#' q95 <- function(x) stats::quantile(x, probs = 0.95)
#'
#' funs <- list(
#' "mean",
#' "median",
#' "q95_1" = q95,
#' "q95_2" = function(x) stats::quantile(x, probs = 0.95),
#' "q95_3" = ~ stats::quantile(., probs = 0.95)
#' )
#'
#' res <- summary_wind(data, "ws", "wd", "NO2", fun = funs)
#' res
#'
#' # and q95, q95_1, q95_2, q95_3 alle have the same value
#' tidyr::spread(res, "stat", "NO2") %>%
#' dplyr::select(q95_1, q95_2, q95_3)
#'
#' # is for some reason fun.args used with multiple functions, use ... to catch
#' # superfluous arguments:
#' funs <- list(
#' "q95" = function(x, ...) stats::quantile(x, probs = 0.95),
#' "mean"
#' )
#' summary_wind(data, "ws", "wd", "NO2", fun = funs, fun.args = list(na.rm = TRUE))
#'
#'
#' # more wd classes, less ws classes and squish ws
#' summary_wind(data, "ws", "wd", "NO",
#' wd_cutfun = cut_wd.fun(binwidth = 22.5),
#' ws_cutfun = cut_ws.fun(binwidth = 2, ws_max = 6))
#'
#' # no ws class
#' summary_wind(data, NULL, "wd", "NO")
#'
#' # additional grouping with strings, symbols or named expressions
#' summary_wind(data, ws, wd, NO2, group = grp("site", year, wday = lubridate::wday(date)))
#'
#' # how often comes which concentration from one direction
#' summary_wind(data, NULL, wd, NO2,
#' group = grp(NO2_class = ggplot2::cut_number(NO2, 5)))
#'
#' # the same but we use ws as pollutant
#' summary_wind(data, NO2, wd, NO2, ws_cutfun = cut_number.fun(5))
#'
#' # some plots using the summarized data
#'
#' # a radar plot
#' funs <- list(
#' "mean",
#' "median",
#' "q95" = ~ stats::quantile(., probs = 0.95)
#' )
#'
#' data_summarized <- summary_wind(data, ws, wd, NOx, fun = funs,
#' ws_cutfun = cut_number.fun(1)
#' )
#'
#' ggplot(data_summarized, aes(x = wd, y = NOx, color = stat, group = stat)) +
#' geom_polygon(size = 1, fill = NA) +
#' coord_radar(start = - 22.5 / 180 * pi ) +
#' scale_color_viridis_d(end = 0.8) +
#' scale_y_continuous(limits = c(0, NA), expand = c(0,0, 0, 0)) +
#' facet_wrap(vars(stat))
#'
#'
#' # a wind rose
#' data_summarized <- summary_wind(data, ws, wd, ws,
#' ws_cutfun = cut_ws.fun(ws_max = 4, reverse = TRUE)
#' )
#'
#' ggplot(data_summarized, aes(x = wd, y = freq, fill = ws)) +
#' geom_bar(stat = "identity") +
#' coord_polar2(start = - 22.5 / 180 * pi ) +
#' scale_y_continuous(
#' limits = c(0, NA),
#' expand = c(0,0, 0, 0),
#' labels = scales::percent
#' ) +
#' scale_fill_viridis_d()
#'
#'
#' # a pollution rose, use the pollutant as ws
#' data_summarized <- summary_wind(data, NOx, wd, NOx,
#' ws_cutfun = cut_number.fun(5)
#' )
#'
#' # we can plot with the group as fill
#' ggplot(data_summarized, aes(x = wd, y = freq, fill = forcats::fct_rev(NOx))) +
#' geom_bar(stat = "identity") +
#' coord_polar2(start = - 22.5 / 180 * pi ) +
#' scale_y_continuous(limits = c(0, NA), expand = c(0,0, 0, 0)) +
#' scale_fill_viridis_d(direction = -1, name = "NOx")
#'
#' # or the mean of the group as fill
#' ggplot(data_summarized, aes(x = wd, y = freq, fill = NOx.stat)) +
#' geom_bar(stat = "identity") +
#' coord_polar2(start = - 22.5 / 180 * pi ) +
#' scale_y_continuous(limits = c(0, NA), expand = c(0,0, 0, 0)) +
#' scale_fill_viridis_c(name = "NOx")
summary_wind <- function(
data,
ws,
wd,
z,
groupings = grp(),
fun = "mean",
fun.args = list(),
nmin = 3,
wd_cutfun = cut_wd.fun(binwidth = 45),
ws_cutfun = cut_ws.fun(binwidth = 1)
) {
wd <- rlang::ensym(wd)
z <- rlang::ensym(z)
ws_is_null <- rlang::quo_is_null(rlang::enquo(ws))
if (ws_is_null) {
summary_groups <- modify_list(grp(!!wd), groupings)
not_gather_groups <- c(rlang::as_string(wd), names(groupings), "n", "freq")
} else {
ws <- rlang::ensym(ws)
summary_groups <- modify_list(grp(!!ws, !!wd), groupings)
not_gather_groups <- c(rlang::as_string(wd), rlang::as_string(ws),
names(groupings), "n", "freq")
}
# rename z if needed. we can't apply summarize functions on grouping columns!
# for ws and wd we do auto renaming.
if (!ws_is_null && ws == z) {
z <- rlang::sym(stringr::str_c(rlang::as_string(ws), ".stat"))
data <- dplyr::mutate(data, !!z := !!ws)
}
if (wd == z) {
z <- rlang::sym(stringr::str_c(rlang::as_string(wd), ".stat"))
data <- dplyr::mutate(data, !!z := !!wd)
}
fun <- auto_name(c(fun, "n" = function(...) dplyr::n()))
# apply binning trough cut fuctions
# with stats::na.omit() every row containing a NA value will be filtered
if (ws_is_null) {
data <- dplyr::mutate(data, !!wd := wd_cutfun(!!wd))
data <- dplyr::filter(data, !(is.na(!!wd) | is.na(!!z)))
} else {
data <- dplyr::mutate(data, !!wd := wd_cutfun(!!wd), !!ws := ws_cutfun(!!ws))
data <- dplyr::filter(data, !(is.na(!!wd) | is.na(!!ws) | is.na(!!z)))
}
# apply the summarize function regarding the addiotional grouping columns
data <- dplyr::group_by(data, !!!summary_groups)
data <- dplyr::summarise_at(data,
.vars = dplyr::vars(!!z),
.funs = fun,
!!!fun.args
)
data <- dplyr::ungroup(data)
# calculate frequencies for each groupings
data <- dplyr::group_by(data, !!!rlang::syms(names(groupings)))
data <- dplyr::mutate(data, freq = .data$n / sum(.data$n, na.rm = TRUE))
data <- dplyr::ungroup(data)
# data <- tidyr::gather(data, key = "stat", value = !!z, -dplyr::one_of(not_gather_groups))
data <- tidyr::pivot_longer(data, cols = -dplyr::one_of(not_gather_groups), names_to = "stat", values_to = rlang::as_name(z))
# factorize stat column
data <- dplyr::mutate(data, stat = factor(.data$stat))
data <- dplyr::filter(data, .data$n >= nmin)
return(data)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.