tests/testthat/test-datetime.R

test_that("datetime parsing works", {
  test_vroom(
"date,time,datetime
2018-01-01,10:01:01 AM,2018-01-01 10:01:01
2019-01-01,05:04:03 AM,2019-01-01 05:04:03
",
    delim = ",",
    equals = tibble::tibble(
      date = c(as.Date("2018-01-01"), as.Date("2019-01-01")),
      time = c(hms::hms(1, 1, 10), hms::hms(3, 4, 5)),
      datetime = vctrs::vec_c(as.POSIXct("2018-01-01 10:01:01", tz = "UTC"), as.POSIXct("2019-01-01 05:04:03", tz = "UTC"))
    )
  )
})

# Parsing ----------------------------------------------------------------------

r_parse <- function(x, fmt) as.POSIXct(strptime(x, fmt, tz = "UTC"))

test_that("%d, %m and %y", {
  target <- utctime(2010, 2, 3, 0, 0, 0, 0)

  test_parse_datetime("10-02-03", "%y-%m-%d", expected = target)
  test_parse_datetime("10-03-02", "%y-%d-%m", expected = target)
  test_parse_datetime("03/02/10", "%d/%m/%y", expected = target)
  test_parse_datetime("02/03/10", "%m/%d/%y", expected = target)
})

test_that("Compound formats work", {
  target <- utctime(2010, 2, 3, 0, 0, 0, 0)

  test_parse_datetime("02/03/10", "%D", expected = target)
  test_parse_datetime("2010-02-03", "%F", expected = target)
  test_parse_datetime("10/02/03", "%x", expected = target)
})

test_that("%y matches R behaviour", {
  test_parse_datetime("01-01-69", "%d-%m-%y", expected = r_parse("01-01-69", "%d-%m-%y"))
  test_parse_datetime("01-01-68", "%d-%m-%y", expected = r_parse("01-01-68", "%d-%m-%y"))
})

test_that("%e allows leading space", {
  test_parse_datetime("201010 1", "%Y%m%e", expected = utctime(2010, 10, 1, 0, 0, 0, 0))
})

test_that("%OS captures partial seconds", {
  test_parse_datetime("2001-01-01 00:00:01.125", "%Y-%m-%d %H:%M:%OS", expected = utctime(2001, 1, 1, 0, 0, 1, .125))
  test_parse_datetime("2001-01-01 00:00:01.133", "%Y-%m-%d %H:%M:%OS", expected = utctime(2001, 1, 1, 0, 0, 1, .133))
})

test_that("%Y requires 4 digits", {
  test_parse_date("03-01-01", "%Y-%m-%d", expected = as.Date(NA))
  test_parse_date("003-01-01", "%Y-%m-%d", expected = as.Date(NA))
  test_parse_date("0003-01-01", "%Y-%m-%d", expected = as.Date("0003-01-01"))
  test_parse_date("00003-01-01", "%Y-%m-%d", expected = as.Date(NA))
})

test_that("invalid dates return NA", {
  test_parse_datetime("2010-02-30", "%Y-%m-%d", expected = .POSIXct(NA_real_, tz = "UTC"))
})

test_that("failed parsing returns NA", {
  test_parse_datetime(c("2010-02-ab", "2010-02", "2010/02/01"), "%Y-%m-%d",
    expected = .POSIXct(rep(NA_real_, 3), tz = "UTC"))
})

test_that("invalid specs returns NA", {
  test_parse_datetime("2010-02-20", "%Y-%m-%m", expected = .POSIXct(NA_real_, tz = "UTC"))
})

test_that("ISO8601 partial dates are not parsed", {
  test_parse_datetime("20", "", expected = .POSIXct(NA_real_, tz = "UTC"))
  test_parse_datetime("2001", "", expected = .POSIXct(NA_real_, tz = "UTC"))
  test_parse_datetime("2001-01", "", expected = .POSIXct(NA_real_, tz = "UTC"))
})

test_that("Year only gets parsed", {
  test_parse_datetime("2010", "%Y", expected = ISOdate(2010, 1, 1, 0, tz = "UTC"))
  test_parse_datetime("2010-06", "%Y-%m", expected = ISOdate(2010, 6, 1, 0, tz = "UTC"))
})

test_that("%p detects AM/PM", {
  test_parse_datetime(c("2015-01-01 01:00 AM", "2015-01-01 01:00 am"), "%F %I:%M %p",
    expected = .POSIXct(c(1420074000, 1420074000), "UTC")
  )

  test_parse_datetime(c("2015-01-01 01:00 PM", "2015-01-01 01:00 pm"), "%F %I:%M %p",
    expected = .POSIXct(c(1420117200, 1420117200), "UTC")
  )

  test_parse_datetime("12/31/1991 12:01 AM", "%m/%d/%Y %I:%M %p",
    expected = .POSIXct(694137660, "UTC"))

  test_parse_datetime("12/31/1991 12:01 PM", "%m/%d/%Y %I:%M %p",
    expected = .POSIXct(694180860, "UTC"))

  test_parse_datetime("12/31/1991 01:01 AM", "%m/%d/%Y %I:%M %p",
    expected = .POSIXct(694141260, "UTC"))

  test_parse_datetime(c("12/31/1991 00:01 PM", "12/31/1991 13:01 PM"),
      "%m/%d/%Y %I:%M %p", expected = .POSIXct(rep(NA_real_, 2), tz = "UTC"))
})

test_that("%b and %B are case insensitive", {
  ref <- as.Date("2001-01-01")

  test_parse_date("2001 JAN 01", "%Y %b %d", expected = ref)
  test_parse_date("2001 JANUARY 01", "%Y %B %d", expected = ref)
})

test_that("%. requires a value", {
  ref <- as.Date("2001-01-01")

  test_parse_date("2001?01?01", "%Y%.%m%.%d", expected = ref)
  test_parse_date("20010101", "%Y%.%m%.%d", expected = as.Date(NA))
})

test_that("%Z detects named time zones", {
  ref <- .POSIXct(1285912800, "America/Chicago")
  ct <- locale(tz = "America/Chicago")

  test_parse_datetime("2010-10-01 01:00", "", expected = ref, locale = ct)
  test_parse_datetime("2010-10-01 01:00 America/Chicago", "%Y-%m-%d %H:%M %Z", locale = ct, expected = ref)
})

test_that("%Z detects named time zones", {
  ref <- .POSIXct(1285912800, "UTC")

  test_parse_datetime("1285912800", "%s", expected = ref)
})

test_that("parse_date returns a double like as.Date()", {
  ref <- as.Date("2001-01-01")

  res <- test_parse_date("2001-01-01", "", expected = ref)
  expect_type(res[[1]], "double")
})

test_that("parses NA/empty correctly", {
  expect_equal(
    vroom(I("x\n\n"), delim = ",", col_types = list(x = "T"), skip_empty_rows = FALSE),
    tibble::tibble(x = .POSIXct(NA_real_, tz = "UTC"))
  )

  expect_equal(
    vroom(I("x\n\n"), delim = ",", col_types = list(x = "D"), skip_empty_rows = FALSE),
    tibble::tibble(x = as.Date(NA))
  )

  test_parse_datetime("TeSt", "", na = "TeSt", expected = .POSIXct(NA_real_, tz = "UTC"))
  test_parse_date("TeSt", "", na = "TeSt", expected = as.Date(NA))
})

## Locales -----------------------------------------------------------------

test_that("locale affects months", {
  jan1 <- as.Date("2010-01-01")

  fr <- locale("fr")
  test_parse_date("1 janv. 2010", "%d %b %Y", locale = fr, expected = jan1)
  test_parse_date("1 janvier 2010", "%d %B %Y", locale = fr, expected = jan1)
})

test_that("locale affects day of week", {
  a <- as.POSIXct("2010-01-01", tz = "UTC")
  b <- .POSIXct(unclass(as.Date("2010-01-01")) * 86400, tz = "UTC")
  fr <- locale("fr")

  test_parse_datetime("Ven. 1 janv. 2010", "%a %d %b %Y", locale=fr, expected = a)
  test_parse_datetime("Ven. 1 janv. 2010", "%a %d %b %Y", locale=fr, expected = b)
})

test_that("locale affects am/pm", {
  skip_on_os("windows")
  skip_on_os("solaris")
  skip_if_not(l10n_info()$`UTF-8`)

  expected <- hms::hms(hours = 13, minutes = 30)
  test_parse_time("01:30 PM", "%H:%M %p", expected = expected)
  test_parse_time("\UC624\UD6C4 01\UC2DC 30\UBD84", "%p %H\UC2DC %M\UBD84", expected = expected, locale = locale("ko"))
})

test_that("locale affects both guessing and parsing", {
  out <- vroom(I("01/02/2013\n"), delim = ",", col_names = FALSE, col_types = list(), locale = locale(date_format = "%m/%d/%Y"))
  expect_equal(out[[1]][[1]], as.Date("2013-01-02"))
})

## Time zones ------------------------------------------------------------------

test_that("same times with different offsets parsed as same time", {
  # From http://en.wikipedia.org/wiki/ISO_8601#Time_offsets_from_UTC
  same_time <- paste("2010-02-03", c("18:30Z", "22:30+04", "1130-0700", "15:00-03:30"))
  test_parse_datetime(same_time, format = "", expected = rep(utctime(2010, 2, 3, 18, 30, 0, 0), 4))
})

test_that("offsets can cross date boundaries", {
  expected <- as.POSIXct("2015-02-01 01:00:00Z", tz = "UTC")
  test_parse_datetime("2015-01-31T2000-0500", expected = expected)
  test_parse_datetime("2015-02-01T0100Z", expected = expected)
})

test_that("unambiguous times with and without daylight savings", {
  skip("Not working on CI")
  skip_on_cran() # need to figure out why this fails

  melb <- locale(tz = "Australia/Melbourne")

  # Melbourne had daylight savings in 2015 that ended the morning of 2015-04-05
  expected_melb <- .POSIXct(c(1428109200, 1428285600), "Australia/Melbourne")
  test_parse_datetime(c("2015-04-04 12:00:00", "2015-04-06 12:00:00"), locale = melb, expected = expected_melb)

  # Japan didn't have daylight savings in 2015
  expected_ja <- .POSIXct(c(1428116400, 1428289200), "Japan")
  ja <- locale(tz = "Japan")
  test_parse_datetime(c("2015-04-04 12:00:00", "2015-04-06 12:00:00"), locale = ja, expected = expected_ja)
})

test_that("ambiguous times always choose the earliest time", {
  ny <- locale(tz = "America/New_York")

  format <- "%Y-%m-%d %H:%M:%S%z"
  expected <- as.POSIXct("1970-10-25 01:30:00-0400", tz = "America/New_York", format = format)

  test_parse_datetime("1970-10-25 01:30:00", locale = ny, expected = expected)
})

test_that("nonexistent times return NA", {
  ny <- locale(tz = "America/New_York")
  expected <- .POSIXct(NA_real_, tz = "America/New_York")

  test_parse_datetime("1970-04-26 02:30:00", locale = ny, expected = expected)
})

test_that("can use `tz = ''` for system time zone", {
  withr::local_timezone("Europe/London")

  system <- locale(tz = "")
  expected <- as.POSIXct("1970-01-01 00:00:00", tz = "Europe/London")

  test_parse_datetime("1970-01-01 00:00:00", locale = system, expected = expected)
})

test_that("can catch faulty system time zones", {
  withr::local_timezone("foo")
  expect_error(locale(tz = ""), "Unknown TZ foo")
})

## Guessing ---------------------------------------------------------------------

test_that("DDDD-DD not parsed as date (i.e. doesn't trigger partial date match)", {
  expect_type(vroom(I("1989-90\n1990-91\n"), delim = "\n", col_types = list())[[1]], "character")
})

test_that("leading zeros don't get parsed as date without explicit separator", {
  expect_type(vroom(I("00010203\n"), col_names = FALSE, delim = "\n", col_types = list())[[1]], "double")
  expect_s3_class(vroom(I("0001-02-03\n"), col_names = FALSE, delim = "\n", col_types = list())[[1]], "Date")
})

test_that("must have either two - or none", {
  expect_s3_class(vroom(I("2000-10-10\n"), col_names = FALSE, delim = "\n", col_types = list())[[1]], "Date")
  expect_type(vroom(I("2000-1010\n"), col_names = FALSE, delim = "\n", col_types = list())[[1]], "character")
  expect_type(vroom(I("200010-10\n"), col_names = FALSE, delim = "\n", col_types = list())[[1]], "character")
  expect_type(vroom(I("20001010\n"), col_names = FALSE, delim = "\n", col_types = list())[[1]], "double")
})

test_that("times are guessed even without AM / PM", {
  expect_s3_class(guess_type("01:02:03"), "collector_time")
  expect_s3_class(guess_type("01:02:03 AM"), "collector_time")
  expect_s3_class(guess_type("01:02:03 PM"), "collector_time")
})

test_that("subsetting works with both double and integer indexes", {
  x <- vroom(I("X1\n2020-01-01 01:01:01"), delim = ",", col_types = "T")
  dt <- as.POSIXct("2020-01-01 01:01:01", tz = "UTC")
  na_dt <- .POSIXct(NA_real_, tz = "UTC")
  expect_equal(x$X1[1L], dt)
  expect_equal(x$X1[1], dt)
  expect_equal(x$X1[NA_integer_], na_dt)
  expect_equal(x$X1[NA_real_], na_dt)
})

test_that("malformed date / datetime formats cause R errors", {
  expect_error(
    vroom(I("x\n6/28/2016"), delim = ",", col_types = list(x = col_date("%m/%/%Y")), altrep = FALSE),
    "Unsupported format"
  )

  expect_error(
    vroom(I("x\n6/28/2016"), delim = ",", col_types = list(x = col_datetime("%m/%/%Y")), altrep = FALSE),
    "Unsupported format"
  )
})

test_that("single digit dates and hours are parsed correctly (https://github.com/tidyverse/readr/issues/1276)", {
  test_vroom(I("4/9/2021 2:18:25 PM"), col_types = list(col_datetime("%m/%d/%Y %H:%M:%S %p")), col_names = "X", delim = ",",
    equals = tibble::tibble(X = as.POSIXct("2021-04-09 14:18:25", tz = "UTC"))
  )
})

test_that("durations", {
  parse_time <- function(x, format, ...) {
    vroom(I(x), delim = ",", col_names = FALSE, col_types = list(col_time(format = format)), altrep = FALSE)[[1]]
  }
  expect_warning(parse_time("25:00:00", format = "%H:%M:%S"))
  expect_equal(parse_time("25:00:00", format = "%h:%M:%S"), hms::hms(hours = 25))
  expect_equal(parse_time("1000000000:00:00", format = "%h:%M:%S"), hms::hms(hours = 1e9))
  expect_equal(parse_time("-1:23:45", format = "%h:%M:%S"), hms::as_hms(-hms::hms(45, 23, 1)))
  expect_equal(parse_time("-1:23:45.67", format = "%h:%M:%OS"), hms::as_hms(-hms::hms(45.67, 23, 1)))
})

Try the vroom package in your browser

Any scripts or data that you put into this service are public.

vroom documentation built on Oct. 2, 2023, 5:07 p.m.