R/uniform_data.R

Defines functions uniform_data

Documented in uniform_data

#' @name uniform_data
#' @title Converting observation data to uniform time series
#' @description Citizen weather observations do not have uniform standards, this function converts the time series of
#' observations by various citizen weather stations into the same unified form.
#' @param data a data.frame that includes observation data with datetime information.
#' @param data.column a character, the selected column name in the data.frame that
#' represents observation data to be checked.
#' @param datetime.column a character, the selected column name in the data.frame that
#' represents dates and times of observations to be checked.
#' @param timeseq a date and time vector, indicating the standard reporting timestamps.
#' `timeseq` must have the same length with `data`.
#' @param freq a number, the standard reporting interval time, the unit is "second".
#' We typically use 10 (min) for forecast use, and 60 (min) for analysis use.
#' @param method the matching method, choosing from "average", "maximum", "nearest", or "angle.average".
#' "average": the average value of observations in each time interval with length `freq`.
#' "maximum": the maximum value of observations in each time interval with length `freq`.
#' "nearest": the nearest observation to `timeseq` in each time interval with length `freq`.
#' "angle.average": the circular average value of angle observations
#' in each time interval with length `freq`.
#' @param position the time window considered to match the standard 10-min timestamps:
#' "centre" means the time window is centred at the standard timestamps;
#' "past" means the time window is the past 10-min interval of the standard timestamps.
#' @return a new matched time series, with standard reporting time every `interval` minutes.
#' @import xts
#' @import tidyverse
#' @importFrom dplyr tibble
#' @importFrom stringr str_c
#' @export
#' @examples
#' library(tidyverse)
#' datetime = as.POSIXlt(seq(0,30000,300), origin = "2017-02-03 08:00:00")
#' datetime_seq_standard = as.POSIXlt(seq(0,30000,600), origin = "2017-02-03 08:00:00")
#' test = tibble(datetime = datetime,
#'               windspeed = c(rep(10.2,5),seq(10,15,0.1),rep(9.3,45)))
#' standard_ts = uniform_data(data = test, data.column = 'windspeed',
#'                            datetime.column = 'datetime',
#'                            timeseq = datetime_seq_standard,
#'                            method = "average")
#' str(standard_ts)
#' head(standard_ts)

uniform_data = function(data, data.column, datetime.column,
                        timeseq, freq = 600, method = "average", position = "past")
{
  stopifnot(is.data.frame(data))
  stopifnot(is.character(data.column), data.column %in% colnames(data))
  stopifnot(is.character(datetime.column), datetime.column %in% colnames(data))
  stopifnot(lubridate::is.POSIXt(timeseq))
  stopifnot(freq > 0)
  stopifnot(is.character(method),
            method %in% c("average", "maximum", "nearest", "angle.average"))
  stopifnot(is.character(position),
            position %in% c("past", "centre"))

  # # for test
  # data = wow_test_each[[1]]
  # data.column = 'windspeed_metrepersecond'
  # datetime.column = 'datetime'
  # timeseq = datetime_seq_test
  # freq = 600

  data = as.data.frame(data)
  obs.data = data[[data.column]]    # data[,data.column]
  obs.datetime = data[[datetime.column]]    # data[,datetime.column]
  attr(obs.datetime, 'tzone') = "GMT"

  time_series = xts(x = obs.data, order.by = obs.datetime)
  if (position == "centre") {
    standard_dt_seq = tibble('start' = timeseq - (freq/2),
                             'end' = timeseq + (freq/2 - 1) )
  } else {
    standard_dt_seq = tibble('start' = timeseq - freq + 1,
                             'end' = timeseq )
  }
  standard_dt_seq = standard_dt_seq[order(standard_dt_seq$start),]
  standard_dt_label = str_c(standard_dt_seq$start,'/',standard_dt_seq$end)

  # output_xts = xts(x = rep(NA, length(timeseq)), order.by = timeseq)

  if (method == "average")
  {
    output.data = unlist( lapply(standard_dt_label, FUN = function(x){
      ts_interval = time_series[x]
      interval.data = ts_interval[!is.na(ts_interval)]
      average.value = ifelse( length(interval.data) > 0, mean(interval.data), NA)
      return(average.value)
    }) )
  }
  else if (method == "maximum")
  {
    output.data = unlist( lapply(standard_dt_label, FUN = function(x){
      ts_interval = time_series[x]
      interval.data = ts_interval[!is.na(ts_interval)]
      max.value = ifelse( length(interval.data) > 0, max(interval.data), NA)
      return(max.value)
    }) )
  }
  else if (method == "nearest")
  {
    time_series_intervals = lapply(standard_dt_label, FUN = function(x){
      time_series[x] } )
    output.data = unlist( lapply(1:length(time_series_intervals), FUN = function(i){
      ts_interval = time_series_intervals[[i]]
      ts_interval = ts_interval[!is.na(ts_interval)]
      nearest.value = ifelse( length(ts_interval) > 0,
                              coredata(ts_interval)[which.min( abs( index(ts_interval) - timeseq[i]) )],
                              NA)
      # nearest_label = which.min( abs( index(ts_interval) - timeseq[i]) )
      # nearest.value = coredata(ts_interval)[nearest_label]
      return(nearest.value)
    }) )
  }
  else if (method == "angle.average")
  {
    output.data = unlist( lapply(standard_dt_label, FUN = function(x){
      ts_interval = time_series[x]
      interval.data = ts_interval[!is.na(ts_interval)]
      average.angle = ifelse( length(interval.data) > 0, angle_average(interval.data), NA)
      return(average.angle)
    }) )
  }

  output_xts = xts(x = output.data, order.by = timeseq)
  attr(output_xts,"tzone") = "GMT"

  return(output_xts)
}
jieyu97/QCwind documentation built on June 18, 2021, 3:37 a.m.