TileDB Dimensions: Date and Datetime Support

knitr::opts_chunk$set(collapse = TRUE,
                      class.output = NULL,
                      class.message = NULL,
                      class.warning = NULL,
                      class.error = NULL,
                      python.reticulate = TRUE)  # can toggle to off
Sys.setenv("TZ"="UTC")

Overview

TileDB supports a wide range of date and time types as shown in the documention for Datetimes.

The supported range of dates and times correspond to the dates and times supported by the Python extension numpy, and is described in its documentation.

```{python pyex1} import numpy as np import datetime

day = "1971-02-03 04:05:06" # third day of second month of first year past epoch

[np.datetime64(day, 'Y').astype('int64'), # extract year, month, ... as an int64 np.datetime64(day, 'M').astype('int64'), np.datetime64(day, 'D').astype('int64'), np.datetime64(day, 'h').astype('int64'), np.datetime64(day, 'm').astype('int64'), np.datetime64(day, 's').astype('int64')]

R has date and datetime support built-in.  The `Date` supports dates using an integer count since
the epoch, and matches the `D` value from the previous example.  Similarly, the 'compact' `POSIXct`
representation of a `Datetime` uses the number of seconds since the epoch and corresponds to the `s`
value from the previous example.


```r
as.Date(398, origin="1970-01-01")  # conversion from numeric input requires 'origin'
as.POSIXct(34401906, origin="1970-01-01", tz="UTC", usetz=TRUE)

R can reconstruct dates and times from the numpy representation using the epoch as a 'base' date along with time period calculations. This can be done using base R (adding to Date or POSIXct objects), by using the lubridate package for a number of intermediate formats and by using the nanotime package for higher-resolution periods and intervals.

suppressMessages(library(lubridate))
ymd("1970-01-01") +  c(years(1), months(13), days(398))
ymd_hms("1970-01-01 00:00:00") + c(hours(9556), minutes(573365))

suppressMessages(library(nanotime))
nanotime("1970-01-01T00:00:00+00:00") + nanoduration(hours=2, minutes=3,
                                                     seconds=4, nanoseconds=5)

Python and R Interoperability

Coarsest: Year

Python

```{python pyex2} import numpy as np import sys import os import tiledb uri = "/tmp/tiledb/dt_year" dom = tiledb.Domain(tiledb.Dim(name="rows", domain=(np.datetime64('2001-01-01'), np.datetime64('2030-12-31')), tile=np.timedelta64(10, 'Y'), dtype=np.datetime64('', 'Y'))) schema = tiledb.ArraySchema(domain=dom, sparse=True, attrs=[tiledb.Attr(name="a", dtype=np.int32)]) if (os.path.isdir(uri)): tiledb.VFS().remove_dir(uri) tiledb.SparseArray.create(uri, schema)

with tiledb.SparseArray(uri, mode='w') as A: I = [np.datetime64('2001-01-01'),np.datetime64('2002-01-01'),np.datetime64('2003-01-01')] data = np.array(([1,2,3])) A[I] = data

#### R

```r
library(tiledb)
uri <- "/tmp/tiledb/dt_year"
arr <- tiledb_array(uri, as="data.frame")
arr[]
## we can also look at 'raw' int64 values:
datetimes_as_int64(arr) <- TRUE
arr[]

Day

Python

```{python pyex3} import numpy as np import sys import os import tiledb uri = "/tmp/tiledb/dt_day" dom = tiledb.Domain(tiledb.Dim(name="rows", domain=(np.datetime64('2001-01-01'), np.datetime64('2030-12-31')), tile=np.timedelta64(10, 'D'), dtype=np.datetime64('', 'D'))) schema = tiledb.ArraySchema(domain=dom, sparse=True, attrs=[tiledb.Attr(name="a", dtype=np.int32)]) if (os.path.isdir(uri)): tiledb.VFS().remove_dir(uri) tiledb.SparseArray.create(uri, schema)

with tiledb.SparseArray(uri, mode='w') as A: I = [np.datetime64('2001-01-01'),np.datetime64('2001-01-02'),np.datetime64('2001-01-03')] data = np.array(([1,2,3])) A[I] = data

#### R

```r
library(tiledb)
uri <- "/tmp/tiledb/dt_day"
arr <- tiledb_array(uri, as="data.frame")
arr[]

Minute

Python

```{python pyex4} import numpy as np import sys import os import tiledb uri = "/tmp/tiledb/dt_min" dom = tiledb.Domain(tiledb.Dim(name="rows", domain=(np.datetime64('2001-01-01'), np.datetime64('2030-12-31')), tile=np.timedelta64(10, 'm'), dtype=np.datetime64('', 'm'))) schema = tiledb.ArraySchema(domain=dom, sparse=True, attrs=[tiledb.Attr(name="a", dtype=np.int32)]) if (os.path.isdir(uri)): tiledb.VFS().remove_dir(uri) tiledb.SparseArray.create(uri, schema)

with tiledb.SparseArray(uri, mode='w') as A: I = [np.datetime64('2001-01-01 00:00'), np.datetime64('2001-01-02 00:01'), np.datetime64('2001-01-03 00:02')] data = np.array(([1,2,3])) A[I] = data

#### R

```r
library(tiledb)
uri <- "/tmp/tiledb/dt_min"
arr <- tiledb_array(uri, as="data.frame")
arr[]

Millisecond

Python

```{python pyex5a} import numpy as np import sys import os import tiledb uri = "/tmp/tiledb/dt_ms" dom = tiledb.Domain(tiledb.Dim(name="rows", domain=(np.datetime64('1969-01-01'), np.datetime64('2030-12-31')), tile=np.timedelta64(10, 'ms'), dtype=np.datetime64('', 'ms'))) schema = tiledb.ArraySchema(domain=dom, sparse=True, attrs=[tiledb.Attr(name="a", dtype=np.int32)]) if (os.path.isdir(uri)): tiledb.VFS().remove_dir(uri) tiledb.SparseArray.create(uri, schema)

with tiledb.SparseArray(uri, mode='w') as A: I = [np.datetime64('1970-01-01 00:00:00.001'), np.datetime64('1980-01-01 00:00:00.002'), np.datetime64('1990-01-01 00:00:00.003'), np.datetime64('2000-01-01 00:00:00.004'), np.datetime64('2010-01-01 00:00:00.005'), np.datetime64('2020-01-01 00:00:00.006')] data = np.array(([1,2,3,4,5,6])) A[I] = data

#### R

```r
library(tiledb)
uri <- "/tmp/tiledb/dt_ms"
arr <- tiledb_array(uri, as="data.frame")
arr[]

Microsecond

Python

```{python pyex5} import numpy as np import sys import os import tiledb uri = "/tmp/tiledb/dt_us" dom = tiledb.Domain(tiledb.Dim(name="rows", domain=(np.datetime64('1969-01-01'), np.datetime64('2030-12-31')), tile=np.timedelta64(10, 'us'), dtype=np.datetime64('', 'us'))) schema = tiledb.ArraySchema(domain=dom, sparse=True, attrs=[tiledb.Attr(name="a", dtype=np.int32)]) if (os.path.isdir(uri)): tiledb.VFS().remove_dir(uri) tiledb.SparseArray.create(uri, schema)

with tiledb.SparseArray(uri, mode='w') as A: I = [np.datetime64('1970-01-01 00:00:00.000001'), np.datetime64('1980-01-01 00:00:00.000002'), np.datetime64('1990-01-01 00:00:00.000003'), np.datetime64('2000-01-01 00:00:00.000004'), np.datetime64('2010-01-01 00:00:00.000005'), np.datetime64('2020-01-01 00:00:00.000006')] data = np.array(([1,2,3,4,5,6])) A[I] = data

#### R

```r
library(tiledb)
uri <- "/tmp/tiledb/dt_us"
arr <- tiledb_array(uri, as="data.frame")
arr[]

Nanosecond

Python

```{python pyex6} import numpy as np import sys import os import tiledb uri = "/tmp/tiledb/dt_ns" dom = tiledb.Domain(tiledb.Dim(name="rows", domain=(np.datetime64('1969-01-01'), np.datetime64('2030-12-31')), tile=np.timedelta64(10, 'ns'), dtype=np.datetime64('', 'ns'))) schema = tiledb.ArraySchema(domain=dom, sparse=True, attrs=[tiledb.Attr(name="a", dtype=np.int32)]) if (os.path.isdir(uri)): tiledb.VFS().remove_dir(uri) tiledb.SparseArray.create(uri, schema)

with tiledb.SparseArray(uri, mode='w') as A: I = [np.datetime64('1970-01-01 00:00:00.000000001'), np.datetime64('1980-01-01 00:00:00.000000002'), np.datetime64('1990-01-01 00:00:00.000000003'), np.datetime64('2000-01-01 00:00:00.000000004'), np.datetime64('2010-01-01 00:00:00.000000005'), np.datetime64('2020-01-01 00:00:00.000000006')] data = np.array(([1,2,3,4,5,6])) A[I] = data

#### R

```r
library(tiledb)
uri <- "/tmp/tiledb/dt_ns"
arr <- tiledb_array(uri, return_as="data.frame")
arr[]

Use integer64 Directly

Sometimes we may want to access the date or datetimes value in their native integer64 format. To do so, we set a toggle when opening the array as shown in the following example which uses the array from the preceding example (at resolution of nanosecond).

library(tiledb)
uri <- "/tmp/tiledb/dt_ns"
arr <- tiledb_array(uri, as="data.frame", datetimes_as_int64=TRUE)
arr[]

We can also write integer64 types. The following example adds two extra rows:

library(tiledb)
uri <- "/tmp/tiledb/dt_ns"
arr <- tiledb_array(uri, return_as="data.frame", datetimes_as_int64=TRUE)
arr[] <- data.frame( rows=bit64::as.integer64(2:3), a=102:103)
arr[]


Try the tiledb package in your browser

Any scripts or data that you put into this service are public.

tiledb documentation built on Sept. 27, 2023, 9:06 a.m.