R/utilities.R

Defines functions expression_ym_range

# If dplyr::select is in a package, using .data also prevents R CMD check from
# giving a NOTE about undefined global variables (provided that @importFrom
# rlang .data is inserted). Using rlang::.data (without @importFrom rlang .data)
# is another option. See Wickham, Hadley, R Packages, O'Reilly, 1st Edition,
# 2015, p. 89 for details

#' @importFrom rlang .data
#' @importFrom magrittr "%>%"

expression_ym_range <- function(.from = NA, .to = NA) {

  expression_ym <- NULL

  if(is.na(.from) && is.na(.to)){
    expression_ym <- expression(. ~ .)
  }

  if(!is.na(.from) && is.na(.to)){
    expression_ym <- expression(from_ ~ .)
  }

  if(is.na(.from) && !is.na(.to)){
    expression_ym <- expression(. ~ to_)
  }

  if(!is.na(.from) && !is.na(.to)){
    expression_ym <- expression(from_ ~ to_)
  }

  return(expression_ym)
}


get_incomplete_record <- function(tbl, show = T, issue_warning = T){

  admissible_class <- c('tbl_ts', 'tbl_df', 'tbl', 'data.frame')
  arg_class <- attr(tbl, which = 'class')

  if(rlang::is_empty(base::intersect(arg_class, admissible_class)) == TRUE){
    stop('The argument class is not admissible.', call. = T)
  }

  incomplete_record <- dplyr::rowwise(data = tbl) %>%
    dplyr::mutate(hasNA = base::anyNA(dplyr::across())) %>%
    dplyr::filter(.data$hasNA == TRUE) %>%
    dplyr::select(-.data$hasNA)

  if(show == TRUE) {
    if(dim(incomplete_record)[1] > 0){
      if(issue_warning) {
        warning('The following records are incomplete and will be removed:',
                call. = T, immediate. = T)
      }
      print(incomplete_record, n = dim(incomplete_record)[1])
    }
  } else {
    return(incomplete_record)
  }

}


last_cal_day <- function(yyyy, mm){
  stopifnot(base::is.numeric(yyyy))
  stopifnot(base::is.numeric(mm))

  cal_day <- lubridate::ymd(
    base::paste0(yyyy, '-', mm, '-', lubridate::days_in_month(mm))
  )

  if(mm == 2 && (lubridate::leap_year(yyyy) == T)) {
    cal_day <- cal_day + 1
  }

  return(cal_day)
}


parseBuiltdHdl <- function(str){

  stopifnot(!base::is.na(str))
  stopifnot(base::is.character(str))

  psn_end <- 0
  psn_start <- 0

  psn_end <- stringr::str_locate(str, pattern = '_')[1]
  hdl_ <- stringr::str_sub(string = str, start = 1, end = psn_end - 1)

  str <- stringr::str_sub(string = str, start = psn_end + 1)

  psn_start <- max(stringr::str_locate_all(str, pattern = '_')[[1]])
  frequency_ <- stringr::str_sub(string = str, start = psn_start + 1)

  str <- stringr::str_sub(string = str, start = 1, end = psn_start - 1)
  if(stringr::str_detect(str, pattern = '_')){
    str <- stringr::str_extract(str, pattern = '__[A-Z,-]{1,30}')
  }
  region_ <- stringr::str_remove_all(str, pattern = '_')

  return( c(hdl = hdl_, region = region_, frequency = frequency_))
}


remove_year_month_from <- function(tbl){

  stopifnot(
    any(attr(tbl, which = 'class') %in% c('tbl_df', 'tbl', 'data.frame'))
  )

  str_control <- base::as.character("year_month")

  if(str_control %in% names(tbl)) {
    if(tsibble::is_yearmonth(tbl$year_month)){
      tbl <- tibble::as_tibble(x = tbl) %>%
        dplyr::select( -.data$year_month )
    } else {
      # -.data$ is REQUIRED, even with .data = tbl
      tbl <- dplyr::select(.data = tbl, -.data$year_month)
    }
  } else {
    return(tbl)
  }

}


stop_on_gap_duplicate <- function( tsbl, print_n = 500 ){

  if( tsibble::is_tsibble(tsbl) == FALSE) {
    stop('Argument is not a tsibble object', call. = T)
  }

  # Check time gaps in the tsibble object
  if( tsibble::has_gaps(tsbl) == TRUE){
    print(tsibble::count_gaps(.data = tsbl, .full = TRUE), print_n)
    stop("Gaps were detected in tsibble object",
         call. = T)
  }

  # Add duplicate check
  if( any(tsibble::are_duplicated(tsbl, index = .data$year_month) == TRUE) ) {
    stop("Duplicated index (year_month) were detected in tsibble object",
         call. = T)
  }
}


stop_on_ym_range <- function(.from = NA, .to = NA) {

  date_form <- stringr::str_glue('\n',
                                 '2020',
                                 '2020 Jan', '2020/Jan', '2020-Jan',
                                 '2020 jan', '2020/jan', '2020-jan',
                                 '2020 01', '2020/01', '2020-01',
                                 .sep = '\n')
  date_pattern <- c("[:digit:]{4}",
                    "[:digit:]{4}[:blank:]{1}[:upper:]{1}[:lower:]{2}",
                    "[:digit:]{4}[:punct:]{1}[:upper:]{1}[:lower:]{2}",
                    "[:digit:]{4}[:blank:]{1}[:lower:]{3}",
                    "[:digit:]{4}[:punct:]{1}[:lower:]{3}",
                    "[:digit:]{4}[:blank:]{1}[:digit:]{2}",
                    "[:digit:]{4}[:punct:]{1}[:digit:]{2}")
  date_range <- c(.from, .to)

  if( !base::is.na(.from) ){
    if( is.character(.from) == F ){
      stop('Argument .from must be a character',call. = T)
    } else {
      if(base::any(stringr::str_detect(.from, pattern = date_pattern)) == F) {
        stop(stringr::str_glue('Argument .from must have the following form: ',
                               date_form), call. = T)
      }
    }
  } else {
    stop('Argument .from must be specified. Currently set to NA',call. = T)
  }

  if( !base::is.na(.to) ){
    if( is.character(.to) == F ){
      stop('Argument .to must be a character',call. = T)
    } else {
      if(base::any(stringr::str_detect(.to, pattern = date_pattern)) == F) {
        stop(stringr::str_glue('Argument .to must have the following form: ',
                               date_form), call. = T)
      }
    }
  }

  NA_count <- length( which( is.na(date_range) ) )
  if( NA_count == 0) {
    if( tsibble::yearmonth(.from) >= tsibble::yearmonth(.to) ){
      stop("Date range error: incompatible '.from' and '.to'",call. = T)
    }
  }
}


to_tibble_time <- function(tbl) {

  stopifnot(any(attr(tbl,
                     which = 'class') %in% c('tbl_df', 'tbl', 'data.frame')))
  stopifnot( 'date' %in% names(tbl))
  tt <- tibbletime::as_tbl_time(x = tbl, .data$date)

  return(tt)
}


with_sub_Period <- function(obs, interval_type = c('Fama-Gibbons', 'n_month'),
                            wnd_sz = 60){

  interval_type <- match.arg(arg = interval_type)
  stopifnot( any(attr(obs, "class") %in% c("tbl_df", "tbl", "data.frame") ) )

  if( 'year_month' %in% names(obs) == F ) {
    stop("Variable 'year_month' is missing",call. = T)
  }
  stopifnot( tsibble::is_yearmonth(obs$year_month) == T )

  # ---------------------------------------------------------------------------
  # Sub-periods in Table 4, Fama-Gibbons (1984)
  if(interval_type == 'Fama-Gibbons') {

    int0 <- lubridate::interval(start = tsibble::yearmonth('1900-01'),
                                end = tsibble::yearmonth('1953-12'))

    int1 <- lubridate::interval(start = tsibble::yearmonth('1954-01'),
                                end = tsibble::yearmonth('1957-06'))

    int2 <- lubridate::interval(start = tsibble::yearmonth('1957-07'),
                                end = tsibble::yearmonth('1960-12'))

    int3 <- lubridate::interval(start = tsibble::yearmonth('1961-01'),
                                end = tsibble::yearmonth('1964-06'))

    int4 <- lubridate::interval(start = tsibble::yearmonth('1964-07'),
                                end = tsibble::yearmonth('1967-12'))

    int5 <- lubridate::interval(start = tsibble::yearmonth('1968-01'),
                                end = tsibble::yearmonth('1971-06'))

    int6 <- lubridate::interval(start = tsibble::yearmonth('1971-07'),
                                end = tsibble::yearmonth('1974-06'))

    int7 <- lubridate::interval(start = tsibble::yearmonth('1974-07'),
                                end = tsibble::yearmonth('1977-12'))

    int8 <- lubridate::interval(start = tsibble::yearmonth('1978-01'),
                                end = tsibble::yearmonth('2999-01'))

    obs <- obs %>%
      dplyr::mutate(
        sub_period = purrr::map_chr(.x = obs$year_month, .f = function(.){
          if( lubridate::`%within%`(base::as.Date(.), int0) == T ){
            period = 'Other'
          }
          if( lubridate::`%within%`(base::as.Date(.), int1) == T ){
            period = 'Sub-period 1'
          }
          if( lubridate::`%within%`(base::as.Date(.), int2) == T ){
            period = 'Sub-period 2'
          }
          if( lubridate::`%within%`(base::as.Date(.), int3) == T ){
            period = 'Sub-period 3'
          }
          if( lubridate::`%within%`(base::as.Date(.), int4) == T ){
            period = 'Sub-period 4'
          }
          if( lubridate::`%within%`(base::as.Date(.), int5) == T ){
            period = 'Sub-period 5'
          }
          if( lubridate::`%within%`(base::as.Date(.), int6) == T ){
            period = 'Sub-period 6'
          }
          if( lubridate::`%within%`(base::as.Date(.), int7) == T ){
            period = 'Sub-period 7'
          }
          if( lubridate::`%within%`(base::as.Date(.), int8) == T ){
            period = 'Other'
          }

          return(period)
        })
      )
  }


  # ---------------------------------------------------------------------------
  # n-month sub-periods
  if(interval_type == 'n_month') {

    start_obs <- dplyr::slice_head(obs)['year_month']
    end_obs <- dplyr::slice_tail(obs)['year_month']

    n_month <- base::as.integer( end_obs - start_obs )
    N <- (n_month %/% wnd_sz) + 1

    int_n <- purrr::map_dfr(.x = 1:N, .f = function(.){
      start = (start_obs + (((. - 1) * wnd_sz)))[[1]]
      end = (start_obs + (. * wnd_sz) - 1)[[1]]
      data.frame(int = lubridate::interval(start = start, end = end),
                 start = start, end = end,
                 lab = stringr::str_glue(as.character(start),
                                         '-',
                                         as.character(end)))
    }) %>%
      tibble::as_tibble()

    obs <- obs %>%
      dplyr::mutate(
        sub_period = purrr::map_chr(.x = obs$year_month, .f = function(.){

          hdl <- which(lubridate::`%within%`(as.Date(.), int_n$int) )
          len <- length(hdl)

          if(len == 0){
            period = 'Other'
          } else {
            period <- as.character(int_n$lab[hdl])
          }

          return(period)
        })
      )

  }

  return(obs)
}
fognyc/bindr documentation built on Dec. 4, 2020, 12:33 p.m.