R/split_interval.R

Defines functions split_interval

Documented in split_interval

#' Split date as character to starts and ends.
#' 
#' @param label target date character.
#' @import magrittr
#' @importFrom lubridate as_date ceiling_date floor_date days make_datetime ymd
#' @importFrom purrr as_vector keep flatten map
#' @importFrom rlang warn
#' @importFrom stringi stri_datetime_symbols stri_trans_general
#' @importFrom stringr str_extract str_replace str_split
#' @importFrom tibble data_frame
#' @return tibble. Contains input dates sllit to starts and end days.
#' @examples 
#' \dontrun{
#' split_interval("平成29年3月") # Determinate last day of month
#' split_interval("平成27年4月-5月")
#' split_interval("平成27年4~5月")
#' split_interval("平成27年4月から5月")
#' split_interval("平成27年4月より5月")
#' split_interval("平成1年11月から平成1年12月")
#' split_interval("平成20年~平成24年")
#' split_interval("国家公務員退職手当実態調査(平成28年度)")
#' split_interval("平成26年度")
#' split_interval("(平成28年1月~3月)")
#' split_interval("H20.12.31現在")
#' split_interval("平成17年(2005年)")
#' split_interval("平成7年")
#' 
#' require(magrittr); require(lubridate)
#' tibble::data_frame(
#' label = c("平成27年4月~5月",
#'          "平成29年3月",
#'          "平成1年11月から12月",
#'          "平成1年11月から平成1年12月")
#'          ) %>% 
#'          dplyr::mutate(out = purrr::pmap(., split_interval)) %>% 
#'          tidyr::unnest() %>% 
#'          dplyr::filter(start_date %within% lubridate::interval(ymd("2017-01-01"), 
#'                                            lubridate::ymd("2018-01-01")))
#' }
#' @export
split_interval <- function(label, ...) {
  
  fixed_label <- stringi::stri_trans_general(label, id = "nfkd") %>% 
    stringr::str_to_lower() %>% 
    fix_interval_connect_char() %>% 
    unique_jyr_label() %>% 
    stringr::str_replace("\\(年\\)", "")
  
  extract_jyr_doubt <- fixed_label %>% 
    stringr::str_extract("\\(.+\\)") %>% 
    stringr::str_replace_all(stringr::regex("(\\(|\\[|\\]|\\))"), "")

  if (is.na(extract_jyr_doubt) == FALSE)
    if (extract_jyr_doubt %>% is_jyr() == TRUE) fixed_label <- extract_jyr_doubt
  
  x <- fixed_label %>% convert_jyr()
  
  # Not jyr label
  if (is.na(x) == TRUE) {
    res <- tibble::data_frame(start_date = NA, end_date = NA)
  } else {
    # TODO: enhance fiscal year
    if (fixed_label %>% stringr::str_detect("[0-9]{1,2}年度") == TRUE) {
      # x <- fixed_label %>% convert_jyr()
      res <- tibble::data_frame(start_date = lubridate::make_date(x, 4, 1), end_date = lubridate::make_date(x + 1, 4, 1) - 1)
    } else {
      if (.is_interval(fixed_label) == FALSE) {
        fixed_label_tmp <- fixed_label %>%
          fix_interval_complement() %>% 
          stringr::str_split("-") 
    
        if (length(fixed_label_tmp[[1]]) == 1) {
          x <- fixed_label %>%
            stringr::str_split("-") %>%
            purrr::map(~ convert_jyr_date(.x) %>% as.character) %>% 
            purrr::flatten()
        } else {
          x <- suppressWarnings(fixed_label_tmp %>% 
                                  purrr::flatten() %>% 
                                  purrr::map(convert_jyr_date)
                                )
          
          tmp_res <- label %>% 
            extract_jyr()
          
          if (length(tmp_res) > 0 & stringr::str_detect(fixed_label, "-") == FALSE) {
            if (length(tmp_res) == 1) {
              x[[length(x)]][1]  <- x[[length(x)]][1] + 364
            } else if (identical_jyr(tmp_res[1], tmp_res[2]) == TRUE) {
              x[[length(x)]][1]  <- x[[length(x)]][1] + 364
            } 
          }
        
        }
        
        
        if (stringr::str_detect(x[[length(x)]], "1$") == TRUE) x[[length(x)]][1]  <- as.character(lubridate::ceiling_date(lubridate::ymd(x[[length(x)]]), unit = "month", change_on_boundary = TRUE) - lubridate::days(1))
        # if (stringr::str_detect(x[[length(x)]], "1$") == TRUE & length(unique(fixed_label_tmp)) > 1) x[[length(x)]][1]  <- as.character(lubridate::ceiling_date(lubridate::ymd(x[[length(x)]]), unit = "month", change_on_boundary = TRUE) - lubridate::days(1))
        # if (stringr::str_detect(x[[length(x)]], "1$") == TRUE & length(unlist(fixed_label_tmp)) == 2 & length(unique(fixed_label_tmp)) == 1) x[[length(x)]][1]  <- x[[length(x)]][1] + 364
          
        if (length(x) == 1) {
          rlang::warn("Does not contains interval elements")
          x[[2]] <- x[[1]]
          x[[1]] <- as.character(lubridate::floor_date(lubridate::ymd(x[[length(x)]][1]), unit = "month"))
        }
        
      } else {
        
        xx <- suppressWarnings(fixed_label %>% 
                                 stringr::str_extract_all(pattern = paste0(paste0("(", jyr %>% paste0("|西暦[0-9]{1,4}|[0-9]{1,4}年"), ")"),
                                                                           "|[0-9]{1,2}月|[0-9]{1,2}日")) %>% 
                                 purrr::flatten() %>% 
                                 purrr::keep(~ .x %>% stringr::str_detect(pattern = paste0("^(", jyr %>% paste0("|西暦[0-9]{1,4}|[0-9]{1,4}年"), ")"))))
        
        # fix: YY-YY
        if (length(xx) == 2) {
          x <- xx %>% purrr::map(~ convert_jyr_date(.x) %>% as.character) %>% 
            purrr::flatten()
          
          x[[1]][1] <- as.character(lubridate::make_date(lubridate::year(x[[1]][1]), lubridate::month(x[[1]][1]) - 11, lubridate::day(x[[1]][1])))
          
          x[[length(x)]][1]  <- as.character(lubridate::ceiling_date(lubridate::ymd(x[[length(x)]]), unit = "month", change_on_boundary = TRUE) - lubridate::days(1))
        } else {
          
          # NOTES:
          
          x_tmp <- fixed_label %>%
            stringr::str_replace("(\\u662d\\u548c|\\u5e73\\u6210)[0-9]{1,2}\\u5e74", "") %>% 
            stringr::str_extract(".+\\u6708") %>% 
            stringr::str_split("(-|~|\\u304b\\u3089|\\u3088\\u308a)") %>% 
            purrr::as_vector()
          
          elements <- x_tmp %>% 
            extract_interval_elements()
          
          if (length(elements) == 1) {
            x_tmp <- x_tmp %>% 
              stringr::str_replace(elements, "") %>% 
              stringr::str_c(., elements)
          }
          
          x <- x_tmp %>% 
            purrr::map(
              ~ lubridate::make_datetime(
                stringr::str_extract(fixed_label, "(\\u662d\\u548c|\\u5e73\\u6210)[0-9]{1,2}\\u5e74") %>% 
                  convert_jyr(),
                which(stringi::stri_datetime_symbols("ja_JP")$Month == .x),
                1
              )
            ) 
          # Collect last day of month
          x[[length(x)]][1] <- lubridate::ceiling_date(x[[length(x)]][1], unit = "month", change_on_boundary = TRUE) - lubridate::days(1)
        }
        
      }
      
      if (length(x) == 1) {
        x[[2]] <- x[[1]]
        x[[1]] <- lubridate::floor_date(x[[length(x)]][1], unit = "month")
      }
      
      res <- tibble::data_frame(start_date = lubridate::as_date(x[[1]]), end_date = lubridate::as_date(x[[length(x)]]))
      
    }    
  }
    
  return(res)
}
uribo/odkitchen documentation built on May 28, 2019, 3:13 p.m.