tests/testthat/test-data.table_examples.R

test_that("dbi.table works on data.table help examples", {

  conns <- list(DBI::dbConnect(RSQLite::SQLite(), ":memory:"),
                DBI::dbConnect(duckdb::duckdb(), ":memory:"))

  for (conn in conns) {
    DT <- data.table(x = rep(c("b", "a", "c"), each = 3),
                     y = c(1, 3, 6),
                     v = 1:9)

    expect_no_error({
      DBIT <- as.dbi.table(conn, DT, type = "query")
    })

    expect_s3_class(DT, "data.table")
    expect_s3_class(DBIT, "dbi.table")

    # identical(dim(DT), dim(DF))    # TRUE
    # identical(DF$a, DT$a)          # TRUE

    expect_true(is.list(DT))
    expect_true(is.list(DBIT))

    expect_true(is.data.frame(DT))
    expect_false(is.data.frame(DBIT))

    # DT[2]                          # 2nd row
    # DT[3:2]                        # 3rd and 2nd row

    expect_true(reference.test(
      DBIT[order(x)],
      verbose = FALSE))

    expect_true(reference.test(
      DBIT[order(x), ],
      verbose = FALSE))

    expect_true(reference.test(
      DBIT[y > 2],
      verbose = FALSE))

    expect_true(reference.test(
      DBIT[y > 2 & v > 5],
      verbose = FALSE))

    # DT[!2:4]                       # all rows other than 2:4
    # DT[-(2:4)]                     # same

    # select|compute columns data.table way
    # DT[, v]                        # v column (as vector)

    expect_true(reference.test(
      DBIT[, list(v)],
      verbose = FALSE))

    expect_true(reference.test(
      DBIT[, .(v)],
      verbose = FALSE))

    # DT[, sum(v)]                   # sum of column v, returned as vector

    expect_true(reference.test(
      DBIT[, .(sum(v, na.rm = TRUE))],
      verbose = FALSE))

    expect_true(reference.test(
      DBIT[, .(sv = sum(v, na.rm = TRUE))],
      verbose = FALSE))

    expect_true(reference.test(
      DBIT[, .(v, v*2)],
      verbose = FALSE))


    # subset rows and select|compute data.table way
    # DT[2:3, sum(v)]                # sum(v) over rows 2 and 3, return vector
    # DT[2:3, .(sum(v))]             # same, but return data.table with column V1
    # DT[2:3, .(sv=sum(v))]          # same, but return data.table with column sv
    # DT[2:5, cat(v, "\n")]          # just for j's side effect


    # select columns the data.frame way
    expect_true(reference.test(
      DBIT[, 2],
      verbose = FALSE))

    expect_true(reference.test({
      colNum <- 2
      DBIT[, ..colNum]},
      verbose = FALSE))

    # DT[["v"]]                      # same as DT[, v] but faster if called in a loop


    # grouping operations - j and by

    expect_true(reference.test(
      DBIT[, sum(v, na.rm = TRUE), by = x],
      verbose = FALSE))

  # DT[, sum(v), keyby = x]              # same, but order the result on by cols
  # DT[, sum(v), by = x, keyby = TRUE]   # same, but using sorting flag

    expect_true(reference.test(
      DBIT[, sum(v, na.rm = TRUE), by = x][order(x)],
      ignore.row.order = FALSE,
      verbose = FALSE))

  # fast ad hoc row subsets (subsets as joins)
  # DT["a", on="x"]                # same as x == "a" but uses binary search (fast)
  # DT["a", on=.(x)]               # same, for convenience, no need to quote every column
  # DT[.("a"), on="x"]             # same

  # NOTE: not optimized in dbi.table but still useful as tests

    expect_true(reference.test(
      DBIT[x == "a"],
      verbose = FALSE))

    expect_true(reference.test(
      DBIT[x != "b" | y != 3],
      verbose = FALSE))

  # DT[.("b", 3), on=c("x", "y")]  # join on columns x,y of DT; uses binary search (fast)
  # DT[.("b", 3), on=.(x, y)]      # same, but using on=.()
  # DT[.("b", 1:2), on=c("x", "y")]             # no match returns NA
  # DT[.("b", 1:2), on=.(x, y), nomatch=NULL]   # no match row is not returned
  # DT[.("b", 1:2), on=c("x", "y"), roll=Inf]   # locf, nomatch row gets rolled by previous row
  # DT[.("b", 1:2), on=.(x, y), roll=-Inf]      # nocb, nomatch row gets rolled by next row
  # DT["b", sum(v*y), on="x"]                   # on rows where DT$x=="b", calculate sum(v*y)

  # all together now
  # DT[x!="a", sum(v), by=x]                    # get sum(v) by "x" for each i != "a"
  # DT[!"a", sum(v), by=.EACHI, on="x"]         # same, but using subsets-as-joins
  # DT[c("b","c"), sum(v), by=.EACHI, on="x"]   # same
  # DT[c("b","c"), sum(v), by=.EACHI, on=.(x)]  # same, using on=.()

  # joins as subsets
    expect_no_error({
      X <- data.table(x = c("c", "b"), v = 8:7, foo = c(4, 2))
      X <- as.dbi.table(conn, X, type = "query")
    })

    expect_true(reference.test(
      DBIT[X, on = "x"],
      verbose = FALSE))

    expect_true(reference.test(
      X[DBIT, on = "x"],
      verbose = FALSE))

    expect_true(reference.test(
      DBIT[X, on = "x", nomatch = NULL],
      verbose = FALSE))

    expect_true(reference.test(
      DBIT[!X, on = "x"],
      verbose = FALSE))

     if (!inherits(conn, "SQLiteConnection")) {
       expect_true(reference.test(
         DBIT[X, on = c(y = "v")],
         verbose = FALSE))

      expect_true(reference.test(
        DBIT[X, on = "y == v"],
        verbose = FALSE))
    }

    expect_true(reference.test(
      DBIT[X, on = .(y <= foo)],
      verbose = FALSE))

    expect_true(reference.test(
      DBIT[X, on = "y <= foo"],
      verbose = FALSE))

    expect_true(reference.test(
      DBIT[X, on = c("y <= foo")],
      verbose = FALSE))

    expect_true(reference.test(
      DBIT[X, on = .(y >= foo)],
      verbose = FALSE))

    expect_true(reference.test(
      DBIT[X, on = .(x, y <= foo)],
      verbose = FALSE))

    expect_true(reference.test(
      DBIT[X, .(x, y, x.y, v), on = .(x, y >= foo)],
      verbose = FALSE))

  # NOTE: mult and .EACHI not supported in version 1

  # DT[X, on="x", mult="first"]           # first row of each group
  # DT[X, on="x", mult="last"]            # last row of each group
  # DT[X, sum(v), by=.EACHI, on="x"]      # join and eval j for each row in i
  # DT[X, sum(v)*foo, by=.EACHI, on="x"]  # join inherited scope
  # DT[X, sum(v)*i.v, by=.EACHI, on="x"]  # 'i,v' refers to X's v column
  # DT[X, on=.(x, v>=v), sum(y)*foo, by=.EACHI] # NEW non-equi join with by=.EACHI (v1.9.8+)



  # setting keys
  # kDT = copy(DT)                        # (deep) copy DT to kDT to work with it.
  # setkey(kDT,x)                         # set a 1-column key. No quotes, for convenience.
  # setkeyv(kDT,"x")                      # same (v in setkeyv stands for vector)
  # v="x"
  # setkeyv(kDT,v)                        # same
  # haskey(kDT)                           # TRUE
  # key(kDT)                              # "x"


  # NOTE: keys are not supported

  # fast *keyed* subsets
  # kDT["a"]                              # subset-as-join on *key* column 'x'
  # kDT["a", on="x"]                      # same, being explicit using 'on=' (preferred)

  # all together
  # kDT[!"a", sum(v), by=.EACHI]          # get sum(v) for each i != "a"

  # multi-column key
  # setkey(kDT,x,y)                       # 2-column key
  # setkeyv(kDT,c("x","y"))               # same

  # fast *keyed* subsets on multi-column key
  # kDT["a"]                              # join to 1st column of key
  # kDT["a", on="x"]                      # on= is optional, but is preferred
  # kDT[.("a")]                           # same, .() is an alias for list()
  # kDT[list("a")]                        # same
  # kDT[.("a", 3)]                        # join to 2 columns
  # kDT[.("a", 3:6)]                      # join 4 rows (2 missing)
  # kDT[.("a", 3:6), nomatch=NULL]        # remove missing
  # kDT[.("a", 3:6), roll=TRUE]           # locf rolling join
  # kDT[.("a", 3:6), roll=Inf]            # same as above
  # kDT[.("a", 3:6), roll=-Inf]           # nocb rolling join
  # kDT[!.("a")]                          # not join
  # kDT[!"a"]                             # same

  # more on special symbols, see also ?"special-symbols"
  # DT[.N]                                  # last row

  # Note: .N only supported in j

    expect_no_error(DBIT[, .N]) # dbi.table returns dbi.table,
                                # data.table returns numeric

    expect_true(reference.test(
      DBIT[, .N, by = x],
      verbose = FALSE))

  # DT[, .SD, .SDcols=x:y]                  # select columns 'x' through 'y'
  # DT[ , .SD, .SDcols = !x:y]              # drop columns 'x' through 'y'
  # DT[ , .SD, .SDcols = patterns('^[xv]')] # select columns matching '^x' or '^v'
  # DT[, .SD[1]]                            # first row of all columns
  # DT[, .SD[1], by=x]                      # first row of 'y' and 'v' for each group in 'x'
  # DT[, c(.N, lapply(.SD, sum)), by=x]     # get rows *and* sum columns 'v' and 'y' by group
  # DT[, .I[1], by=x]                       # row number in DT corresponding to each group
  # DT[, grp := .GRP, by=x]                 # add a group counter column
  # DT[ , dput(.BY), by=.(x,y)]             # .BY is a list of singletons for each group
  # X[, DT[.BY, y, on="x"], by=x]           # join within each group
  # DT[, {
  #   # write each group to a different file
  #   fwrite(.SD, file.path(tempdir(), paste0('x=', .BY$x, '.csv')))
  # }, by=x]
  # dir(tempdir())

  # add/update/delete by reference (see ?assign)

    expect_true(reference.test({
      DBIT[, z := 42L]
      DBIT[]
      },
      verbose = FALSE))

    expect_true(reference.test({
      DBIT[, z := NULL]
      DBIT[]
      },
      verbose = FALSE))

  # print(DT["a", v:=42L, on="x"])        # subassign to existing v column by reference
  # print(DT["b", v2:=84L, on="x"])       # subassign to new column by reference (NA padded)

    expect_true(reference.test({
      DBIT[, m := mean(v), by = x]
      DBIT[]
      },
      verbose = FALSE))


    expect_no_error(rm(DBIT))


  # advanced usage
    expect_no_error({
      DT <- data.table(x = rep(c("b", "a", "c"), each = 3),
                      v = c(1, 1, 1, 2, 2, 1, 1, 2, 2),
                      y = c(1, 3, 6),
                      a = 1:9,
                      b = 9:1)
      DBIT <- as.dbi.table(conn, DT, type = "query")
    })

    expect_true(reference.test(
      DBIT[, sum(v, na.rm = TRUE), by = .(y %% 2)],
      verbose = FALSE))

    expect_true(reference.test(
      DBIT[, sum(v, na.rm = TRUE), by = .(bool = y %% 2)],
      verbose = FALSE))

  # DT[, .SD[2], by = x]                    # get 2nd row of each group
  # DT[, tail(.SD, 2), by = x]               # last 2 rows of each group
  # DT[, lapply(.SD, sum), by = x]          # sum of all (other) columns for each group
  # DT[, .SD[which.min(v)], by = x]         # nested query by group

    expect_true(reference.test(
      DBIT[, list(MySum = sum(v, na.rm = TRUE),
                  MyMin = min(v, na.rm = TRUE),
                  MyMax = max(v, na.rm = TRUE)),
          by = .(x, y %% 2)],
      verbose = FALSE))

  # DT[, .(a = .(a), b = .(b)), by=x]     # list columns
  # DT[, .(seq = min(a):max(b)), by=x]    # j is not limited to just aggregations

    expect_true(reference.test(
      DBIT[, sum(v, na.rm = TRUE), by = x][V1 < 20],
      verbose = FALSE))

    expect_true(reference.test(
      DBIT[, sum(v, na.rm = TRUE), by = x][order(-V1)],
      verbose = FALSE))

  # DT[, c(.N, lapply(.SD, sum)), by = x]    # get number of observations and sum per group
  # DT[, {tmp <- mean(y);
  #       .(a = a - tmp, b = b - tmp)
  #       }, by = x]                        # anonymous lambda in 'j', j accepts any valid
                                            # expression. TO REMEMBER: every element of
                                            # the list becomes a column in result.

    expect_no_error(rm(colNum, DBIT, DT, X))
    expect_no_error(DBI::dbDisconnect(conn))
  }
})

Try the dbi.table package in your browser

Any scripts or data that you put into this service are public.

dbi.table documentation built on April 3, 2025, 7:40 p.m.