R/metrics.R

Defines functions .fixed_metrics_funs sfn_metrics summarise_by_period

Documented in .fixed_metrics_funs sfn_metrics summarise_by_period

#' Summaries by period
#'
#' This function collapse the TIMESTAMP to the desired period (day, month...)
#' by setting the same value to all timestamps within the period. This modified
#' TIMESTAMP is used to group by and summarise the data.
#' 
#' This function uses internally \code{\link{.collapse_timestamp}} and
#' \code{\link[dplyr]{summarise_all}}. Arguments to control these functions
#' can be passed as `...`. Arguments for each function are spliced and applied
#' when needed. Be advised that all arguments passed to the summarise_all function
#' will be applied to all the summarising functions used, so it will fail if any
#' of that functions does not accept that argument. To complex function-argument
#' relationships, indicate each summary function call within the \code{.funs}
#' argument as explained here \code{\link[dplyr]{summarise_all}}:
#' \preformatted{
#' # This will fail beacuse na.rm argument will be also passed to the n function,
#' # which does not accept any argument:
#' summarise_by_period(
#'   data = get_sapf_data(ARG_TRE),
#'   period = '7 days',
#'   .funs = list(mean, sd, n()),
#'   na.rm = TRUE
#' )
#'
#' # to solve this is better to use the .funs argument:
#' summarise_by_period(
#'   data = get_sapf_data(ARG_TRE),
#'   period = '7 days',
#'   .funs = list(~ mean(., na.rm = TRUE), ~ sd(., na.rm = TRUE), ~ n())
#' )
#' }
#'
#' @section TIMESTAMP_coll:
#'   Previously to the collapsing step, a temporal variable called
#'   \code{TIMESTAMP_coll} is created to be able to catch the real timestamp when
#'   some events happens, for example to use the \code{min_time} function. If
#'   your custom summarise function needs to get the time at which some event
#'   happens, use TIMESTAMP_coll instead of TIMESTAMP for that:
#'   \preformatted{
#'     min_time <- function(x, time) {
#'       time[which.min(x)]
#'     }
#'
#'     summarise_by_period(
#'       data = get_sapf_data(ARG_TRE),
#'       period = '1 day',
#'       .funs = list(~ min_time(., time = TIMESTAMP_coll)) # Not TIMESTAMP
#'     )
#'   }
#'
#' @param data sapflow or environmental data as obtained by \code{\link{get_sapf_data}}
#'   and \code{\link{get_env_data}}. Must have a column named TIMESTAMP
#' @param period period to collapse by. See \code{\link{sfn_metrics}} for details.
#' @param .funs funs to summarise the data. See details.
#' @param ... optional arguments. See details
#'
#' @return A `tbl_df` object with the metrics results. The names of the columns
#'   indicate the original variable (tree or environmental variable) and the
#'   metric calculated (i.e. `vpd_mean`), separated by underscore
#'
#' @examples
#' library(dplyr)
#'
#' # data
#' data('ARG_TRE', package = 'sapfluxnetr')
#'
#' # simple summary
#' summarise_by_period(
#'   data = get_sapf_data(ARG_TRE),
#'   period = '7 days',
#'   .funs = list(~ mean(., na.rm = TRUE), ~ sd(., na.rm = TRUE), ~ n())
#' )
#'
#' @importFrom rlang .data
#'
#' @export

summarise_by_period <- function(data, period, .funs, ...) {
  
  # modificate .funs if data is environmental (no centroids).
  if (any(names(data) %in% .env_vars_names())) {

    # only in daily, but as daily can be stated in many ways, we check if
    # .funs[['centroid']] is NULL, and if not, we make it.
    if (!is.null(.funs[['centroid']])) {
      .funs[['centroid']] <- NULL
    }

  }

  # we will need the extra arguments (...) if any, just in case
  dots <- rlang::quos(...)
  
  if (rlang::is_function(period)) {
    # if period is a custom function, remove side argument if it exists
    # and dispatch each argument in dots to the corresponding function
    # (period custom function or summarise_all)
    dots['side'] <- NULL
    side_val <- 'start'
    dots_collapse_timestamp <- dots[names(dots) %in%
                                      methods::formalArgs(period)]
    dots_summarise_all <- dots[!(names(dots) %in%
                                   methods::formalArgs(period))]
  } else {
    # if period is a character vector, then the collapse timestamp arguments
    # are for *_date from lubridate
    args_date_funs <- c(
      methods::formalArgs(lubridate::floor_date),
      methods::formalArgs(lubridate::ceiling_date)
    )
    
    side_val <- rlang::eval_tidy(dots[['side']])
    if (is.null(side_val)) {side_val <- 'start'}
    dots['side'] <- NULL
    dots_collapse_timestamp <- dots[names(dots) %in% args_date_funs]
    dots_summarise_all <- dots[!(names(dots) %in% args_date_funs)]
  }
  
  # data processing
  data %>%
    dplyr::mutate(
      TIMESTAMP_coll = .data$TIMESTAMP,
      TIMESTAMP = .collapse_timestamp(
        .data$TIMESTAMP, !!! dots_collapse_timestamp,
        period = period, side = side_val
      )
    ) %>%
    dplyr::group_by(.data$TIMESTAMP) %>%
    dplyr::summarise_all(.funs = .funs, !!! dots_summarise_all) %>%
    dplyr::select(
      -dplyr::contains('_coll_'),
      -dplyr::contains('accumulated'),
      dplyr::contains('precip_accumulated')
    ) -> res

  return(res)

}


#' Metrics summary function
#'
#' Generate metrics from a site/s data for the period indicated
#'
#' @section Period:
#' \code{period} argument is used by internal function
#' \code{\link{.collapse_timestamp}} and it can be stated in two ways:
#' \itemize{
#'   \item{\emph{frequency period} format: "3 hours", "1 day", "7 days", "1 month"}
#'   \item{As a \emph{custom function}. This will be the name of a function,
#'   without quotes, that accepts as the first argument the timestamp to collapse.
#'   The result of the function must be a vector of collapsed TIMESTAMPs of the
#'   same length than the original TIMESTAMP which will be used to group by and
#'   summarise the data. Additional arguments to this function, if needed, can
#'   be passed in the \code{...} argument.}
#' }
#' \code{\link{.collapse_timestamp}} also accepts the \code{side} argument to
#' collapse by the starting timestamp or the ending timestamp of each group. This
#' can be supplied in the \code{...} argument.
#'
#' @section .funs:
#' \code{.funs} argument uses the same method as the \code{.funs} argument in the
#' \code{\link[dplyr]{summarise_all}} function of \code{dplyr} package. Basically
#' it accepts a list of function calls generated by list(). If you want to pass
#' on a custom function you can specify it here. See details in
#' \code{\link{summarise_by_period}} for more complex summarising functions
#' declaration.
#'
#' @section Interval:
#' Previously to the metrics summary, data can be filtered by an special
#' interval (i.e. predawn or nightly). This filtering can be specified with the
#'  \code{interval} argument as this:
#' \itemize{
#'   \item{\code{"general"} (default). No special interval is used, and metrics
#'         are performed with all the data}.
#'   \item{\code{"predawn"}. Data is filtered for predawn interval. In this case
#'         \code{int_start} and \code{int_end} must be specified as 24h value}
#'   \item{\code{"midday"}. Data is filtered for midday interval. In this case
#'         \code{int_start} and \code{int_end} must be specified as 24h value}
#'   \item{\code{"night"}. Data is filtered for night interval. In this case
#'         \code{int_start} and \code{int_end} must be specified as 24h value}
#'   \item{\code{"daylight"}. Data is filtered for daylight interval. In this case
#'         \code{int_start} and \code{int_end} must be specified as 24h value}
#' }
#'
#' @param sfn_data \code{\link{sfn_data}} or \code{\link{sfn_data_multi}} object
#'   to obtain the metrics from
#'
#' @param period Time period to aggregate data by. See period section for an
#'   explanation about the periods ('3 hours', '1 day', '1 month', '1 year', ...)
#'
#' @param .funs List of function calls to summarise the data by, see .funs
#'   section for more details.
#'
#' @param solar Logical indicating if the solarTIMESTAMP must be used instead of
#'   the site local TIMESTAMP. Default to TRUE (use solarTIMESTAMP).
#'
#' @param interval Character vector indicating if the metrics must be filtered
#'   by an special hour interval. See Interval section in details.
#'
#' @param int_start Integer value indicating the starting hour of the special
#'   interval in 24h format. See Interval section in details.
#'
#' @param int_end Integer value indicating the ending hour of the special
#'   interval in 24h format. See Interval section in details.
#'
#' @param ... optional arguments to pass to methods used
#'   (i.e. .collapse_timestamp or summarise funs extra arguments)
#'
#' @family metrics
#'
#' @return For \code{\link{sfn_data}} objects, a list of tbl_df objects
#'   with the following structure:
#'   \itemize{
#'     \item{$sapf: metrics for the sapflow data}
#'     \item{$env: metrics for the environmental data}
#'   }
#'
#'   For \code{\link{sfn_data_multi}} objects, a list of lists of tbl_df objects
#'   with the metrics for each site:
#'   \itemize{
#'     \item{$SITE_CODE
#'       \itemize{
#'         \item{$sapf: metrics for the sapflow data}
#'         \item{$env: metrics for the environmental data}
#'       }
#'     }
#'     \item{$NEXT_SITE_CODE...}
#'   }
#'
#' @examples
#' library(dplyr)
#'
#' ### general metrics
#' ## sfn_data
#' data('ARG_TRE', package = 'sapfluxnetr')
#' ARG_TRE_metrics <- sfn_metrics(
#'   ARG_TRE,
#'   period = '7 days',
#'   .funs = list(~ mean(., na.rm = TRUE), ~ sd(., na.rm = TRUE), ~ n()),
#'   solar = FALSE,
#'   interval = 'general'
#' )
#'
#' str(ARG_TRE_metrics)
#' ARG_TRE_metrics[['sapf']]
#' ARG_TRE_metrics[['env']]
#'
#' ## sfn_data_multi
#' \donttest{
#' data('ARG_MAZ', package = 'sapfluxnetr')
#' data('AUS_CAN_ST2_MIX', package = 'sapfluxnetr')
#' multi_sfn <- sfn_data_multi(ARG_TRE, ARG_MAZ, AUS_CAN_ST2_MIX)
#'
#' multi_metrics <- sfn_metrics(
#'   multi_sfn,
#'   period = '7 days',
#'   .funs = list(~ mean(., na.rm = TRUE), ~ sd(., na.rm = TRUE), ~ n()),
#'   solar = FALSE,
#'   interval = 'general'
#' )
#'
#' str(multi_metrics)
#'
#' multi_metrics[['ARG_TRE']][['sapf']]
#' }
#'
#' ### midday metrics
#' ARG_TRE_midday <- sfn_metrics(
#'   ARG_TRE,
#'   period = '1 day',
#'   .funs = list(~ mean(., na.rm = TRUE), ~ sd(., na.rm = TRUE), ~ n()),
#'   solar = TRUE,
#'   interval = 'midday', int_start = 11, int_end = 13
#' )
#'
#' str(ARG_TRE_midday)
#' ARG_TRE_midday[['sapf']]
#'
#' @export

sfn_metrics <- function(
  sfn_data,
  period,
  .funs,
  solar,
  interval = c('general', 'predawn', 'midday', 'night', 'daylight'),
  int_start = NULL,
  int_end = NULL,
  ...
) {

  # argument checks
  assertthat::assert_that(
    inherits(sfn_data, c('sfn_data', 'sfn_data_multi')),
    msg = 'sfn_data must be a sfn_Data or sfn_data_multi object'
  )
  
  ## period style deprecated conversion ####
  if (is.character(period) && period %in% c('daily', 'hourly', 'monthly', 'yearly', 'weekly')) {
    
    warning("especifying period as '***ly' format (i.e. 'daily', 'monthly')
            is soft-deprecated since 0.0.6.9000 version. Please use the
            'frequency period' format instead: '1 day', '1 month'")
    
    period <- switch(
      period,
      'daily' = '1 day',
      'weekly' = '1 week',
      'monthly' = '1 month',
      'yearly' = '1 year',
      'hourly' = '1 hour',
      period
    )
  }

  # we need to check if multi and then repeat the function for each element
  if (inherits(sfn_data, 'sfn_data_multi')) {
    
    res_multi <- sfn_data %>%
      furrr::future_map(
      # purrr::map(
        sfn_metrics,
        period = period,
        .funs = .funs,
        solar = solar,
        interval = interval,
        int_start = int_start,
        int_end = int_end,
        ...,
        .progress = TRUE
      )

    return(res_multi)
  }

  # if sfn_data then we have to calculate the desired metrics from the data

  print(paste0(
    'Crunching data for ', get_si_code(sfn_data), '. In large datasets this ',
    'could take a while'
  ))

  sapf_data <- get_sapf_data(sfn_data, solar = solar) %>%
    # mutate to create the temporal columns timestep and period_minutes, to use
    # them in the data_coverage summarise function
    dplyr::mutate(
      timestep = get_plant_md(sfn_data)[['pl_sens_timestep']][1],
      period_minutes = .period_to_minutes(
        period, .data$TIMESTAMP, unique(.data$timestep)
      )
    )
  env_data <- get_env_data(sfn_data, solar = solar) %>%
    # mutate to create the temporal columns timestep and period_minutes, to use
    # them in the data_coverage summarise function
    dplyr::mutate(
      timestep = get_env_md(sfn_data)[['env_timestep']][1],
      period_minutes = .period_to_minutes(
        period, .data$TIMESTAMP, unique(.data$timestep)
      )
    )

  whole_data <- list(sapf = sapf_data, env = env_data)

  # if interval is night, is somewhat complicated. We need to uniformize the
  # interval for nights as they span two days.
  if (interval == 'night') {

    # progress to not scare seeming like freezed in large datasets
    print(paste0('Nighttime data for ', get_si_code(sfn_data)))

    # period_minutes modification specific for night intervals calculations
    new_period_minutes <- lubridate::as.duration(
      lubridate::hours(24 - (int_start - int_end))
    )@.Data / 60
    
    night_data <- whole_data %>%
      purrr::map(
        dplyr::filter,
        dplyr::between(
          lubridate::hour(.data$TIMESTAMP), int_start, 24
        ) |
          dplyr::between(
            lubridate::hour(.data$TIMESTAMP), 0, int_end - 1
          )
      ) %>% 
      purrr::map(
        dplyr::mutate,
        period_minutes = new_period_minutes
      )

    if (period == '1 day') {

      # TODO nightly_daily_fun
      period_summary <- night_data %>%
        purrr::map(
          summarise_by_period,
          .nightly_daily_cf,
          .funs,
          # .nightly_daily_cf args
          night_data = night_data, int_start = int_start,
          # dots
          ...
        )

    } else {

     period_summary <- night_data %>%
       purrr::map(
         dplyr::mutate,
         period_minutes = new_period_minutes * 30
       ) %>%
       purrr::map(summarise_by_period, period, .funs, ...)

    }

    names(period_summary[['sapf']]) <- paste0(
      names(period_summary[['sapf']]), '_night'
    )
    names(period_summary[['env']]) <- paste0(
      names(period_summary[['env']]), '_night'
    )

  } else {

    # if interval is general, we don't need to filter by hours so is
    # straightforward:
    if (interval == 'general') {

      # progress to not scare seeming like freezed
      print(paste0('General data for ', get_si_code(sfn_data)))

      # period summaries: we want to know period means, maximum, minimum, quantiles...
      whole_data %>%
        purrr::map(summarise_by_period, period, .funs, ...) -> period_summary

    } else {

      # if interval is not general and not night, then we need to filter by hours
      # and then calculate the summaries

      # progress to not scare seeming like freezed
      print(paste0(interval, ' data for ', get_si_code(sfn_data)))
      
      # period_minutes modification specific for daytime intervals calculations
      new_period_minutes <- lubridate::as.duration(
        lubridate::hours(int_end - int_start)
      )@.Data / 60
      
      if (period != '1 day') {
        new_period_minutes <- new_period_minutes * 30
      }

      whole_data %>%
        purrr::map(
          dplyr::filter,
          dplyr::between(lubridate::hour(.data$TIMESTAMP), int_start, int_end - 1)
        ) %>%
        purrr::map(
          dplyr::mutate,
          period_minutes = new_period_minutes
        ) %>%
        purrr::map(summarise_by_period, period, .funs, ...) -> period_summary

      # now we need to create the names with the interval
      # predawn
      if (interval == 'predawn') {
        names(period_summary[['sapf']]) <- paste0(
          names(period_summary[['sapf']]), '_pd'
        )
        names(period_summary[['env']]) <- paste0(
          names(period_summary[['env']]), '_pd'
        )
      }

      # midday
      if (interval == 'midday') {
        names(period_summary[['sapf']]) <- paste0(
          names(period_summary[['sapf']]), '_md'
        )
        names(period_summary[['env']]) <- paste0(
          names(period_summary[['env']]), '_md'
        )
      }

      # daylight
      if (interval == 'daylight') {
        names(period_summary[['sapf']]) <- paste0(
          names(period_summary[['sapf']]), '_daylight'
        )
        names(period_summary[['env']]) <- paste0(
          names(period_summary[['env']]), '_daylight'
        )
      }
    }
  }

  # remove the timestep and period_minutes columns created
  res <- period_summary %>%
    purrr::modify_depth(
      1, ~ dplyr::select(
        .x, -dplyr::starts_with('timestep'),
        -dplyr::starts_with('period_minutes')
      )
    )

  return(res)
}




####### shorthand functions for sfn_metrics ####################################

#' helper function to generate the fixed metrics
#'
#' generates a call to list to capture the fixed metrics in a quosure lambda
#' style
#' 
#' @section Metrics:
#' Calculated metrics are as follow:
#' 
#' \itemize{
#'   \item{mean: Mean value for the selected period}
#'   \item{sd: Standard deviation for the selected period}
#'   \item{coverage: Percentage of coverage as the percentage of no NAs in the
#'   expected length of the period, stated by the site timestep}
#'   \item{q*: q95 by default. Quantile value for the selected period. Quantiles to
#'   calculate are stated in the probs argument}
#'   \item{accumulated: Accumulated value on the selected period}
#'   \item{centroid: daily centroid value in the selected period, calculated
#'   only if centroid argument is TRUE}
#' }
#'
#' @param probs probs vector for quantile
#'
#' @param centroid logical indicating if centroid calculation must be made.
#'   (i.e. in monthly metrics, the centroid calculation is not needed)
#'
#' @importFrom stats sd
#' @importFrom stats quantile
#' 
#' @examples 
#' sapfluxnetr:::.fixed_metrics_funs(0.95, FALSE)
#' sapfluxnetr:::.fixed_metrics_funs(c(0.05, 0.95), TRUE)
#' 
#' @keywords internal

.fixed_metrics_funs <- function(probs, centroid) {

  # hack to avoid R CMD CHECKS to complain about . not being a global variable
  . <- NULL

  # we need magic to add the quantiles as they return more than one value
  # (usually). So lets play with quasiquotation
  quantile_args <- probs %>%
    purrr::map(
      function(x) {
        quantile_quo <- dplyr::quo(quantile(., probs = !!x, na.rm = TRUE))
        # dplyr .funs argument expect a list of formulas, not quosures:
        rlang::new_formula(
          NULL,
          rlang::quo_squash(quantile_quo),
          rlang::quo_get_env(quantile_quo)
        )
      }
    )
  names(quantile_args) <- paste0('q_', round(probs*100, 0))

  # we use rlang::list2 as we need the quantile spliced and evaluated with !!!
  .funs <- rlang::list2(
    mean = ~ mean(., na.rm = TRUE),
    sd = ~ sd(., na.rm = TRUE),
    coverage = ~ data_coverage(., .data$timestep, .data$period_minutes),
    !!! quantile_args,
    accumulated = ~ .accumulated_posix_aware(., na.rm = TRUE),
    centroid = ~ diurnal_centroid(.)
  )

  if (!centroid) {
    .funs[['centroid']] <- NULL
    }

  return(.funs)

}

#' Complete metrics wrappers
#'
#' This set of functions returns a complete set of statistics for a site (using
#' \code{\link{sfn_data}}) or several sites (using \code{\link{sfn_data_multi}})
#'
#' @details
#' \code{*_metrics} functions are wrappers for \code{\link{sfn_metrics}} with a
#' set of fixed arguments.
#'
#' \code{*_metrics} functions return all or some of the following statistics:
#' \itemize{
#'   \item{mean: mean of variable (tree or environmental variable) for the
#'         given period. NAs are removed}
#'   \item{sd: standard deviation of the variable for the given period. NAs are
#'         removed}
#'   \item{coverage: Data coverage percentage (percentage of measures without
#'         NAs)}
#'   \item{q_XX: 0.XX quantile value for the period}
#'   \item{centroid: Diurnal centroid value (hours passed until the half of
#'         the summed daily value was reached). Only returned for sapflow
#'         measures when period is '1 day'}
#'   \item{accumulated: Accumulated values for precipitation only}
#' }
#'
#' @param probs numeric vector of probabilities for
#'   \code{\link[stats]{quantile}}
#'
#' @param tidy Logical indicating if the metrics must be returned in a tidy
#'   format (a long tibble, each observation in its own row)
#'
#' @param metadata metadata object, usually the result of
#'   \code{\link{read_sfn_metadata}}. Only used if tidy is TRUE.
#'
#' @param ... additional arguments to be passed to \code{\link{.collapse_timestamp}}
#'   or \code{\link[lubridate]{floor_date}} or \code{\link[lubridate]{ceiling_date}}.
#'
#' @family metrics
#'
#' @return If \code{tidy} is TRUE, a tibble with the metrics for
#'   sapflow and environmental data, with all the metadata included. If
#'   \code{tidy} is FALSE (default), a list of tibbles with the calculated
#'   metrics.
#'
#' @importFrom dplyr n
#'
#' @name metrics
NULL

#' @rdname metrics
#'
#' @section daily_metrics:
#' \code{daily_metrics} summarise daily data for all hours in the day
#'
#' @inheritParams sfn_metrics
#'
#' @examples
#' ## daily_metrics
#' # data load
#' data('ARG_TRE', package = 'sapfluxnetr')
#' data('sfn_metadata_ex', package = 'sapfluxnetr')
#'
#' # non tidy raw metrics (default)
#' ARG_TRE_raw_daily <- daily_metrics(ARG_TRE)
#' str(ARG_TRE_raw_daily)
#'
#' \donttest{
#' # tidy daily metrics
#' ARG_TRE_daily <- daily_metrics(
#'   ARG_TRE, tidy = TRUE, metadata = sfn_metadata_ex
#' )
#' ARG_TRE_daily
#' }
#'
#' @export

daily_metrics <- function(
  sfn_data,
  solar = TRUE,
  probs = c(0.95),
  tidy = FALSE,
  metadata = NULL,
  ...
) {
  
  # hack for cran tests
  . <- NULL

  # hardcoded values
  period <- '1 day'

  # default funs
  .funs <- .fixed_metrics_funs(probs, TRUE)
  
  # pipe for avoid creating intermediate big objects
  # 
  # just input all in the sfn_function
  sfn_metrics(
    sfn_data,
    period = period,
    .funs = .funs,
    solar = solar,
    interval = 'general',
    ...
  ) %>% {
    # and tidyfy it if tidy is true
    if (tidy) {
      metrics_tidyfier(., metadata, interval = 'general')
    } else {
      .
    }
  }
}

#' @rdname metrics
#'
#' @section monthly_metrics:
#' \code{monthly_metrics} summarise monthly data for all hours in the day.
#'
#' @inheritParams sfn_metrics
#'
#' @examples
#' ## monthly_metrics
#' # data load
#' data('ARG_TRE', package = 'sapfluxnetr')
#' data('sfn_metadata_ex', package = 'sapfluxnetr')
#'
#' # non tidy raw metrics (default)
#' ARG_TRE_raw_monthly <- monthly_metrics(ARG_TRE)
#' str(ARG_TRE_raw_monthly)
#'
#' \donttest{
#' # tidy monthly metrics
#' ARG_TRE_monthly <- monthly_metrics(
#'   ARG_TRE, tidy = TRUE, metadata = sfn_metadata_ex
#' )
#' ARG_TRE_monthly
#' }
#'
#' @export

monthly_metrics <- function(
  sfn_data,
  solar = TRUE,
  probs = c(0.95),
  tidy = FALSE,
  metadata = NULL,
  ...
) {
  
  . <- NULL

  # hardcoded values
  period <- '1 month'

  # default funs
  .funs <- .fixed_metrics_funs(probs, FALSE)

  # just input all in the sfn_function
  sfn_metrics(
    sfn_data,
    period = period,
    .funs = .funs,
    solar = solar,
    interval = 'general',
    ...
  ) %>% {
    # and tidyfy it if tidy is true
    if (tidy) {
      metrics_tidyfier(., metadata, interval = 'general')
    } else {
      .
    }
  }
}

#' @rdname metrics
#'
#' @section nightly_metrics:
#' \code{nightly_metrics} will return the metrics for night
#' periods, summarised daily or monthly
#'
#' Night for daily period starts in DOY x and ends in DOY x+1 (i.e. if
#' \code{night_start = 20, night_end = 6} values for the night starting at
#' 2018-03-28 20:00:00 and ending at 2018-03-29 06:00:00 are summarised).
#'
#' Night for monthly period summarises all night periods in the month, that
#' includes from 00:00:00 of the first month night to 23:59:59 of the last
#' month night.
#'
#' @inheritParams sfn_metrics
#'
#' @examples
#' \donttest{
#' ## nightly_metrics
#' # data load
#' data('AUS_CAN_ST2_MIX', package = 'sapfluxnetr')
#'
#' # non tidy daily night metrics (default)
#' AUS_CAN_ST2_MIX_night <- nightly_metrics(AUS_CAN_ST2_MIX)
#'
#' str(AUS_CAN_ST2_MIX_night)
#' AUS_CAN_ST2_MIX_night[['sapf']]
#' AUS_CAN_ST2_MIX_night[['env']]
#'
#' # change the night interval
#' AUS_CAN_ST2_MIX_night_short <- nightly_metrics(
#'   AUS_CAN_ST2_MIX, int_start = 21, int_end = 4 # night starting and ending hour
#' )
#' AUS_CAN_ST2_MIX_night_short[['env']]
#'
#' # tidy nightly metrics
#' data('sfn_metadata_ex', package = 'sapfluxnetr')
#' AUS_CAN_ST2_MIX_night_tidy <- nightly_metrics(
#'   AUS_CAN_ST2_MIX,
#'   tidy = TRUE, metadata = sfn_metadata_ex
#' )
#' AUS_CAN_ST2_MIX_night_tidy
#' }
#'
#' @export

nightly_metrics <- function(
  sfn_data,
  period = c('1 day', '1 month'),
  solar = TRUE,
  int_start = 20,
  int_end = 6,
  probs = c(0.95),
  tidy = FALSE,
  metadata = NULL,
  ...
) {
  
  . <- NULL

  period <- match.arg(period)

  # default funs
  if (period == '1 day') {
    .funs <- .fixed_metrics_funs(probs, TRUE)
  } else {
    .funs <- .fixed_metrics_funs(probs, FALSE)
  }

  # just input all in the sfn_metrics function
  sfn_metrics(
    sfn_data,
    period = period,
    .funs = .funs,
    solar = solar,
    interval = 'night',
    int_start = int_start,
    int_end = int_end,
    ...
  ) %>% {
    # and tidyfy it if tidy is true
    if (tidy) {
      metrics_tidyfier(., metadata, interval = 'night')
    } else {
      .
    }
  }
}

#' @rdname metrics
#'
#' @section daylight_metrics:
#' \code{daylight_metrics} will return the metrics for daylight
#' periods, summarised daily or monthly. Daylight interval is selected by start
#' and end hours.
#'
#' @inheritParams sfn_metrics
#'
#' @examples
#' \donttest{
#' ## daylight_metrics
#' # data load
#' data('AUS_CAN_ST2_MIX', package = 'sapfluxnetr')
#'
#' # non tidy daily daylight metrics (default)
#' AUS_CAN_ST2_MIX_daylight <- daylight_metrics(AUS_CAN_ST2_MIX)
#'
#' str(AUS_CAN_ST2_MIX_daylight)
#' AUS_CAN_ST2_MIX_daylight[['sapf']]
#' AUS_CAN_ST2_MIX_daylight[['env']]
#'
#' # change the daylight interval
#' AUS_CAN_ST2_MIX_daylight_short <- daylight_metrics(
#'   AUS_CAN_ST2_MIX, int_start = 8, int_end = 18 # night starting and ending hour
#' )
#' AUS_CAN_ST2_MIX_daylight_short[['env']]
#'
#' # tidy daylight metrics
#' data('sfn_metadata_ex', package = 'sapfluxnetr')
#' AUS_CAN_ST2_MIX_daylight_tidy <- daylight_metrics(
#'   AUS_CAN_ST2_MIX,
#'   tidy = TRUE, metadata = sfn_metadata_ex
#' )
#' AUS_CAN_ST2_MIX_daylight_tidy
#' }
#'
#' @export

daylight_metrics <- function(
  sfn_data,
  period = c('1 day', '1 month'),
  solar = TRUE,
  int_start = 6,
  int_end = 20,
  probs = c(0.95),
  tidy = FALSE,
  metadata = NULL,
  ...
) {
  
  . <- NULL

  period <- match.arg(period)

  # default funs
  if (period == '1 day') {
    .funs <- .fixed_metrics_funs(probs, TRUE)
  } else {
    .funs <- .fixed_metrics_funs(probs, FALSE)
  }

  # just input all in the sfn_metrics function
  sfn_metrics(
    sfn_data,
    period = period,
    .funs = .funs,
    solar = solar,
    interval = 'daylight',
    int_start = int_start,
    int_end = int_end,
    ...
  ) %>% {
    # and tidyfy it if tidy is true
    if (tidy) {
      metrics_tidyfier(., metadata, interval = 'daylight')
    } else {
      .
    }
  }
}

#' @rdname metrics
#'
#' @section predawn_metrics:
#' \code{predawn_metrics} will always return the metrics for predawn
#' period, summarised daily or monthly. Predawn interval is selected by start and
#' end hours.
#'
#' Predawn metrics did not return the centroid metric.
#'
#' @inheritParams sfn_metrics
#'
#' @examples
#' \donttest{
#' ## predawn_metrics
#' # data load
#' data('AUS_CAN_ST2_MIX', package = 'sapfluxnetr')
#'
#' # non tidy daily predawn metrics (default)
#' AUS_CAN_ST2_MIX_predawn <- predawn_metrics(AUS_CAN_ST2_MIX)
#'
#' str(AUS_CAN_ST2_MIX_predawn)
#' AUS_CAN_ST2_MIX_predawn[['sapf']]
#' AUS_CAN_ST2_MIX_predawn[['env']]
#'
#' # change the predawn interval
#' AUS_CAN_ST2_MIX_predawn_short <- predawn_metrics(
#'   AUS_CAN_ST2_MIX, int_start = 8, int_end = 18 # night starting and ending hour
#' )
#' AUS_CAN_ST2_MIX_predawn_short[['env']]
#'
#' # tidy daylight metrics
#' data('sfn_metadata_ex', package = 'sapfluxnetr')
#' AUS_CAN_ST2_MIX_predawn_tidy <- predawn_metrics(
#'   AUS_CAN_ST2_MIX,
#'   tidy = TRUE, metadata = sfn_metadata_ex
#' )
#' AUS_CAN_ST2_MIX_predawn_tidy
#' }
#'
#' @export

predawn_metrics <- function(
  sfn_data,
  period = c('1 day', '1 month'),
  solar = TRUE,
  int_start = 4,
  int_end = 6,
  probs = c(0.95),
  tidy = FALSE,
  metadata = NULL,
  ...
) {
  
  . <- NULL

  period <- match.arg(period)

  # default funs
  .funs <- .fixed_metrics_funs(probs, FALSE)

  # just input all in the sfn_metrics function
  sfn_metrics(
    sfn_data,
    period = period,
    .funs = .funs,
    solar = solar,
    interval = 'predawn',
    int_start = int_start,
    int_end = int_end,
    ...
  ) %>% {
    # and tidyfy it if tidy is true
    if (tidy) {
      metrics_tidyfier(., metadata, interval = 'predawn')
    } else {
      .
    }
  }
}

#' @rdname metrics
#'
#' @section midday_metrics:
#' \code{midday_metrics} will always return the metrics for midday
#' period, summarised daily or monthly. midday interval is selected by start and
#' end hours.
#'
#' Midday metrics did not return the centroid metric.
#'
#' @inheritParams sfn_metrics
#'
#' @examples
#' \donttest{
#' ## midday_metrics
#' # data load
#' data('AUS_CAN_ST2_MIX', package = 'sapfluxnetr')
#'
#' # non tidy daily midday metrics (default)
#' AUS_CAN_ST2_MIX_midday <- midday_metrics(AUS_CAN_ST2_MIX)
#'
#' str(AUS_CAN_ST2_MIX_midday)
#' AUS_CAN_ST2_MIX_midday[['sapf']]
#' AUS_CAN_ST2_MIX_midday[['env']]
#'
#' # change the midday interval
#' AUS_CAN_ST2_MIX_midday_short <- midday_metrics(
#'   AUS_CAN_ST2_MIX, int_start = 8, int_end = 18 # night starting and ending hour
#' )
#' AUS_CAN_ST2_MIX_midday_short[['env']]
#'
#' # tidy daylight metrics
#' data('sfn_metadata_ex', package = 'sapfluxnetr')
#' AUS_CAN_ST2_MIX_midday_tidy <- midday_metrics(
#'   AUS_CAN_ST2_MIX,
#'   tidy = TRUE, metadata = sfn_metadata_ex
#' )
#' AUS_CAN_ST2_MIX_midday_tidy
#' }
#'
#' @export

midday_metrics <- function(
  sfn_data,
  period = c('1 day', '1 month'),
  solar = TRUE,
  int_start = 11,
  int_end = 13,
  probs = c(0.95),
  tidy = FALSE,
  metadata = NULL,
  ...
) {
  
  . <- NULL

  period <- match.arg(period)

  # default funs
  .funs <- .fixed_metrics_funs(probs, FALSE)

  # just input all in the sfn_metrics function
  sfn_metrics(
    sfn_data,
    period = period,
    .funs = .funs,
    solar = solar,
    interval = 'midday',
    int_start = int_start,
    int_end = int_end,
    ...
  ) %>% {
    # and tidyfy it if tidy is true
    if (tidy) {
      metrics_tidyfier(., metadata, interval = 'midday')
    } else {
      .
    }
  }
}

#' Build a tidy data frame from the metrics results nested list
#'
#' Transform the nested list of metrics in a tidy tibble where each observation
#' has its own row
#'
#' @param metrics_res Nested list containing the metrics results as obtained
#'   from \code{\link{metrics}}
#'
#' @param metadata List containing the metadata nested list, as obtained from
#'   \code{\link{read_sfn_metadata}}
#'
#' @param interval Interval to return, it depends on the \code{metrics_res} and
#'   can be \code{"gen"} for the general metrics, \code{"md"} for midday metrics,
#'   \code{"pd"} for predawn metrics, \code{"night"} for night metrics or
#'   \code{"day"} for diurnal metrics.
#'
#' @return a tibble with the following columns:
#'   \itemize{
#'     \item{TIMESTAMP: POSIXct vector with the date-time of the observation}
#'     \item{si_code: Character vector with the site codes}
#'     \item{pl_code: Character vector with the plant codes}
#'     \item{sapflow_*: Variables containing the different metrics for the
#'           sapflow measurements (i.e. sapflow_mean, sapflow_q_95)}
#'     \item{ta_*; rh_*; vpd_*; ...: Variables containing the different metrics
#'           for environmental variables (i.e. ta_mean, ta_q_95)}
#'     \item{pl_*: plant metadata variables (i.e. pl_sapw_area, pl_sens_meth)}
#'     \item{si_*: site metadata variables (i.e. si_biome, si_contact_firstname)}
#'     \item{st_*: stand metadata variables (i.e. st_aspect, st_lai)}
#'     \item{sp_*: species metadata variables (i.e. sp_basal_area_perc)}
#'     \item{env_*: environmental metadata variables (i.e. env_timezone)}
#'   }
#'
#' @examples
#' \donttest{
#' # data
#' multi_sfn <- sfn_data_multi(ARG_TRE, ARG_MAZ, AUS_CAN_ST2_MIX)
#' data('sfn_metadata_ex', package = 'sapfluxnetr')
#'
#' # metrics
#' multi_metrics <- daily_metrics(multi_sfn)
#'
#' # tidyfing
#' multi_tidy <- metrics_tidyfier(
#'   multi_metrics, sfn_metadata_ex, interval = 'general'
#' )
#' multi_tidy
#'
#' # A really easier way of doing the same
#' multi_tidy_easy <- daily_metrics(multi_sfn, tidy = TRUE, metadata = sfn_metadata_ex)
#' }
#'
#' @export

metrics_tidyfier <- function(
  metrics_res,
  metadata,
  interval = c('general', 'predawn', 'midday', 'night', 'daylight')
) {
  
  # hack to avoid CRAN NOTE with the use of "." in the last step
  . <- NULL
  
  # which timestamp var we use (depends on the interval)
  timestamp_var <- switch(
    interval,
    'general' = 'TIMESTAMP',
    'midday' = 'TIMESTAMP_md',
    'predawn' = 'TIMESTAMP_pd',
    'night' = 'TIMESTAMP_night',
    'daylight' = 'TIMESTAMP_daylight'
  )
  
  # individual data objects
  # In the case of sapf and env data, if the metrics corresponds to only one
  # site the extraction must be done without sapf/env level
  if (all(names(metrics_res) %in% c('sapf', 'env'))) {
    
    sapf_data <- metrics_res['sapf']
    
    env_data <- metrics_res['env']
    
    raw_codes <- metadata[['site_md']][['si_code']]
    
    raw_index <- stringr::str_detect(names(sapf_data[['sapf']])[2], raw_codes)
    
    sites_codes <- raw_codes[raw_index]
    
    names(sapf_data) <- sites_codes
    names(env_data) <- sites_codes
    
  } else {
    
    sapf_data <- metrics_res %>%
      purrr::map(c('sapf'))
    
    env_data <- metrics_res %>%
      purrr::map(c('env'))
    
    sites_codes <- names(metrics_res)
    
  }
  
  # get the metadata
  plant_md <- metadata[['plant_md']] %>%
    dplyr::filter(.data$si_code %in% sites_codes)
  
  site_md <- metadata[['site_md']] %>%
    dplyr::filter(.data$si_code %in% sites_codes)
  
  stand_md <- metadata[['stand_md']] %>%
    dplyr::filter(.data$si_code %in% sites_codes)
  
  species_md <- metadata[['species_md']] %>%
    dplyr::filter(.data$si_code %in% sites_codes) %>%
    # dplyr::group_by(.data$si_code) %>%
    # dplyr::summarise_all(function(x) { list(x) })
    dplyr::mutate(pl_species = .data$sp_name)
  
  env_md <- metadata[['env_md']] %>%
    dplyr::filter(.data$si_code %in% sites_codes)
  
  
  # we start modifying the sapflow data
  whole_data <- sapf_data %>%
    purrr::map(.sapflow_tidy) %>%
  
    # join sapf and env for all sites, by timestamp. As we have two lists, of
    # equal length, one with the modified sapflow data and another with the
    # env data, we join them together with map2
    purrr::map2(env_data, ~ dplyr::full_join(.x, .y, by = timestamp_var)) %>%
    
    # and finally we join all list elements with bind rows (controls for
    # missing variables and create them with the correct class), arranging by
    # TIMESTMAP
    dplyr::bind_rows() %>%
    dplyr::arrange(!!dplyr::sym(timestamp_var)) %>%
    
    # join all the metadata, always first the plant_md to join by pl_code
    # and after that by site code as the rest of metadata is one row only,
    # except for species_md, as it has a row by species and we need to join
    # not only by site, also by pl_species
    dplyr::left_join(plant_md, by = 'pl_code') %>%
    dplyr::left_join(site_md, by = 'si_code') %>%
    dplyr::left_join(stand_md, by = 'si_code') %>%
    dplyr::left_join(species_md, by = c('si_code', 'pl_species')) %>%
    dplyr::left_join(env_md, by = 'si_code') %>%
    
    # order the columns
    dplyr::select(
      dplyr::starts_with('TIMESTAMP'), "si_code", "pl_code",
      dplyr::starts_with('sapflow_'), dplyr::everything()
    )
  
  return(whole_data)
  
}


#### Utils for substituting tibbletime dependencies ####
#' .collapse_timestamp helper
#' 
#' Util to collapse the TIMESTAMP using lubridate::*_date functions
#' 
#' @section Period:
#' Periods accepted are in the "number period" format, as in
#' \code{\link[lubridate]{floor_date}} or a custom function name without quotes:
#' \itemize{
#'   \item{hours (exs. "1 hour", "12 hours")}
#'   \item{days (exs. "1 day", "3 days")}
#'   \item{weeks (exs. "1 week", "4 weeks")}
#'   \item{months (exs. "1 month", "6 months")}
#'   \item{years (exs. "1 year", "7 years")}
#'   \item{Also a custom function can be supplied, one that transforms the
#'   TIMESTAMP variable to the collapsed timestamp desired. See
#'   \code{\link{sfn_metrics}} for details}
#' }
#' 
#' @section Side:
#' Side indicates if using \code{\link[lubridate]{floor_date}} (side = "start) or
#' \code{\link[lubridate]{ceiling_date}} (side = 'end'). Ceiling dates is not
#' trivial, see \code{\link[lubridate]{ceiling_date}} for information on how
#' the date will be ceiled.
#' 
#' @return A vector of the same length as TIMESTAMP, with the collapsed timestamp
#'   with the same tz as the original one.
#' 
#' @keywords internal
#' 
#' @examples
#' arg_tre_timestamp <- get_timestamp(ARG_TRE)
#' sapfluxnetr:::.collapse_timestamp(
#'   arg_tre_timestamp, period = "1 day", side = 'start'
#' )
.collapse_timestamp <- function(timestamp, ..., period, side = 'start') {
  
  if (rlang::is_function(period)) {
    # if (rlang::is_empty(dots_list)) {
    #   collapsed_timestamp <- period(timestamp)
    # } else {
    #   collapsed_timestamp <- period(timestamp, !!! dots_list)
    # }
    collapsed_timestamp <- period(timestamp, ...)
  } else {
    # checks
    .assert_that_period_is_valid(period)
    assertthat::assert_that(
      assertthat::is.string(side),
      assertthat::are_equal(length(side), 1),
      side %in% c('start', 'end'),
      msg = "'side' must be a character vector of length one with accepted values 'start' or 'end'"
    )
    
    if (side == 'start') {
      collapsed_timestamp <- lubridate::floor_date(
        timestamp, period, ...
      )
    } else {
      collapsed_timestamp <- lubridate::ceiling_date(
        timestamp, period, ...
      )
    }
  }
  
  return(collapsed_timestamp)
}

#' .parse_period
#' 
#' Util to parse the period supplied to the function call to
#' create the collapsed timestamp
#' 
#' @param period Period in the "number period" format
#' 
#' @examples 
#' sapfluxnetr:::.parse_period('1 day')
#' 
#' @return  a list, with the freq value as numeric and the period value as
#'   character
#'
#' @keywords internal
.parse_period <- function(period) {
  
  .assert_that_period_is_valid(period)
  
  # split string
  splitted_period <- period %>%
    stringr::str_split(' ') %>%
    purrr::flatten_chr()
  
  period_freq <- as.numeric(splitted_period[1])
  period_char <- splitted_period[2]
  
  list(freq = period_freq, period = period_char)
  
}


#' test for character period
#' 
#' @param period "frequency period"
#' @examples 
#' sapfluxnetr:::.assert_that_period_is_valid('1 day') # TRUE
#' # not run
#' # sapfluxnetr:::.assert_that_period_is_valid('1day') # error
#' 
#' @return TRUE if is valid, informative error if not.
#' @keywords internal
.assert_that_period_is_valid <- function(period) {
  # check for character and length
  assertthat::assert_that(
    assertthat::is.string(period),
    assertthat::are_equal(length(period), 1),
    msg = '"period" must be character string of length 1. See help for more details'
  )
  
  # split string
  splitted_period <- period %>%
    stringr::str_split(' ') %>%
    purrr::flatten_chr()
  
  ## check that we have length 2
  assertthat::assert_that(
    assertthat::are_equal(length(splitted_period), 2),
    msg = '"period" must consist of a frequency and a period in the style: "2 days"'
  )
  
  ## check that 
  assertthat::assert_that(
    # Coercing to numeric should give a number, not NA
    suppressWarnings(!is.na(as.numeric(splitted_period[1]))),
    msg = "Frequency must be coercible to numeric."
  )
}

#' Period custom function
#' 
#' nightly period custom function, to use with nightly_metrics
#' 
#' This function will be supplied as "period" argument to summarise_by_period
#' when daily nightly metrics are requested.
#' 
#' @param timestamp TIMESTAMP
#' @param night_data as it comes inside of sfn_metrics
#' @param int_start interval start as supplied to sfn_metrics
#' 
#' @return a vector of the same length as TIMESTAMP with the collapsed values.
#' @keywords internal
.nightly_daily_cf <- function(
  timestamp,
  night_data, int_start
) {
  
  night_data[['sapf']] %>%
    dplyr::mutate(
      coll = .collapse_timestamp(
        .data$TIMESTAMP,
        period = '1 day',
        side = 'start'
      )
    ) %>%
    dplyr::group_by(.data$coll) %>%
    # closest to night start timestamp
    dplyr::summarise(
      custom_dates = .data$TIMESTAMP[which.min(
        abs(lubridate::hour(.data$TIMESTAMP) - int_start)
      )],
      TIMESTAMP = .data$custom_dates
    ) %>%
    dplyr::right_join(
      night_data[['sapf']] %>% dplyr::select("TIMESTAMP"), by = 'TIMESTAMP'
    ) %>%
    tidyr::fill("custom_dates") %>%
    dplyr::pull(.data$custom_dates) %>%
    lubridate::floor_date(unit = 'hour')
}

Try the sapfluxnetr package in your browser

Any scripts or data that you put into this service are public.

sapfluxnetr documentation built on Feb. 16, 2023, 7:52 p.m.