## ---- echo = FALSE, message = FALSE---------------------------------------------------------------
require(data.table)
knitr::opts_chunk$set(
comment = "#",
error = FALSE,
tidy = FALSE,
cache = FALSE,
collapse = TRUE)
## ----echo = FALSE---------------------------------------------------------------------------------
options(width = 100L)
## -------------------------------------------------------------------------------------------------
flights <- fread("flights14.csv")
head(flights)
dim(flights)
## -------------------------------------------------------------------------------------------------
set.seed(1L)
DF = data.frame(ID1 = sample(letters[1:2], 10, TRUE),
ID2 = sample(1:3, 10, TRUE),
val = sample(10),
stringsAsFactors = FALSE,
row.names = sample(LETTERS[1:10]))
DF
rownames(DF)
## -------------------------------------------------------------------------------------------------
DF["C", ]
## ----eval = FALSE---------------------------------------------------------------------------------
# rownames(DF) = sample(LETTERS[1:5], 10, TRUE)
# # Warning: non-unique values when setting 'row.names': 'C', 'D'
# # Error in `.rowNamesDF<-`(x, value = value): duplicate 'row.names' are not allowed
## -------------------------------------------------------------------------------------------------
DT = as.data.table(DF)
DT
rownames(DT)
## -------------------------------------------------------------------------------------------------
setkey(flights, origin)
head(flights)
## alternatively we can provide character vectors to the function 'setkeyv()'
# setkeyv(flights, "origin") # useful to program with
## -------------------------------------------------------------------------------------------------
flights[.("JFK")]
## alternatively
# flights[J("JFK")] (or)
# flights[list("JFK")]
## ----eval = FALSE---------------------------------------------------------------------------------
# flights["JFK"] ## same as flights[.("JFK")]
## ----eval = FALSE---------------------------------------------------------------------------------
# flights[c("JFK", "LGA")] ## same as flights[.(c("JFK", "LGA"))]
## -------------------------------------------------------------------------------------------------
key(flights)
## -------------------------------------------------------------------------------------------------
setkey(flights, origin, dest)
head(flights)
## or alternatively
# setkeyv(flights, c("origin", "dest")) # provide a character vector of column names
key(flights)
## -------------------------------------------------------------------------------------------------
flights[.("JFK", "MIA")]
## -------------------------------------------------------------------------------------------------
key(flights)
flights[.("JFK")] ## or in this case simply flights["JFK"], for convenience
## -------------------------------------------------------------------------------------------------
flights[.(unique(origin), "MIA")]
## -------------------------------------------------------------------------------------------------
key(flights)
flights[.("LGA", "TPA"), .(arr_delay)]
## ----eval = FALSE---------------------------------------------------------------------------------
# flights[.("LGA", "TPA"), "arr_delay", with = FALSE]
## -------------------------------------------------------------------------------------------------
flights[.("LGA", "TPA"), .(arr_delay)][order(-arr_delay)]
## -------------------------------------------------------------------------------------------------
flights[.("LGA", "TPA"), max(arr_delay)]
## -------------------------------------------------------------------------------------------------
# get all 'hours' in flights
flights[, sort(unique(hour))]
## -------------------------------------------------------------------------------------------------
setkey(flights, hour)
key(flights)
flights[.(24), hour := 0L]
key(flights)
## -------------------------------------------------------------------------------------------------
flights[, sort(unique(hour))]
## -------------------------------------------------------------------------------------------------
setkey(flights, origin, dest)
key(flights)
## -------------------------------------------------------------------------------------------------
ans <- flights["JFK", max(dep_delay), keyby = month]
head(ans)
key(ans)
## -------------------------------------------------------------------------------------------------
flights[.("JFK", "MIA"), mult = "first"]
## -------------------------------------------------------------------------------------------------
flights[.(c("LGA", "JFK", "EWR"), "XNA"), mult = "last"]
## -------------------------------------------------------------------------------------------------
flights[.(c("LGA", "JFK", "EWR"), "XNA"), mult = "last", nomatch = NULL]
## ----eval = FALSE---------------------------------------------------------------------------------
# # key by origin,dest columns
# flights[.("JFK", "MIA")]
## ----eval = FALSE---------------------------------------------------------------------------------
# flights[origin == "JFK" & dest == "MIA"]
## ----eval = FALSE---------------------------------------------------------------------------------
# setkey(flights, NULL)
# flights[origin == "JFK" & dest == "MIA"]
## -------------------------------------------------------------------------------------------------
set.seed(2L)
N = 2e7L
DT = data.table(x = sample(letters, N, TRUE),
y = sample(1000L, N, TRUE),
val = runif(N))
print(object.size(DT), units = "Mb")
## -------------------------------------------------------------------------------------------------
key(DT)
## (1) Usual way of subsetting - vector scan approach
t1 <- system.time(ans1 <- DT[x == "g" & y == 877L])
t1
head(ans1)
dim(ans1)
## -------------------------------------------------------------------------------------------------
setkeyv(DT, c("x", "y"))
key(DT)
## (2) Subsetting using keys
t2 <- system.time(ans2 <- DT[.("g", 877L)])
t2
head(ans2)
dim(ans2)
identical(ans1$val, ans2$val)
## ----eval = FALSE---------------------------------------------------------------------------------
# 1, 5, 10, 19, 22, 23, 30
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.