In the following examples, we'll use a data set that has one million rows and ten columns. The associated data frame is $800$MB. To make things interesting, the data frame will include a mixture of numbers and missing values.
x = runif(1e7) x[sample(1e7, 1e6)] = NA # 10% NAs dd = as.data.frame(replicate(10, x))
x = runif(1e3) x[sample(1e3, 1e2)] = NA # 10% NAs dd = as.data.frame(replicate(10, x))
Base R: read.csv
/write.csv
system.time(write.csv(dd, "dd.csv")) # user system elapsed #101.012 1.096 104.153 system.time(read.csv("dd.csv")) # user system elapsed #151.568 0.728 152.134
Base R: readRDS
/saveRDS
system.time(saveRDS(dd, file = "dd.RData")) # user system elapsed # 101.77 0.28 102.69 system.time(readRDS(file = "dd.RData")) # user system elapsed # 5.052 0.104 5.148
The data.table package
library("data.table") system.time(fwrite(dd, "dd.csv")) # user system elapsed # 28.776 1.600 4.128 system.time(fread("dd.csv")) # user system elapsed # 21.93 0.18 22.07
The readr package}
library("readr") system.time(write_csv(dd, "dd.csv")) # user system elapsed # 189.44 1.54 190.77 system.time(read_csv("dd.csv")) # user system elapsed # 18.476 0.252 18.588
The feather package
library("feather") system.time(write_feather(dd, "dd.feather")) # user system elapsed # 0.620 0.540 1.319 system.time(read_feather("dd.feather")) # user system elapsed # 0.356 0.068 0.422
Personally, I think that feather
looks very promising, but is very new.
writes = c(104, 102, 4.1, 190, 1.319) reads = c(152, 5.1, 22.07, 18.58, 0.422) n = c("read.csv", "Rdata", "data.table", "readr", "feather") par(mar = c(3, 3, 2, 1), mgp = c(2, 0.4, 0), tck = -.01, cex.axis = 0.8, las = 1, xaxs = "i", yaxs = "i") barplot(writes, names.arg = n, col = "steelblue", border = NA, space = 0.05, ylim = c(0, 200), ylab = "Time(s)") abline(h = seq(0, 200, 20), col = "white") title("Write times", adj = 1, cex.main = 1, font.main = 2, col.main = "black") par(mar = c(3, 3, 2, 1), mgp = c(2, 0.4, 0), tck = -.01, cex.axis = 0.8, las = 1, xaxs = "i", yaxs = "i") barplot(reads, names.arg = n, col = "steelblue", border = NA, space = 0.05, ylim = c(0, 200), ylab = "Time(s)") abline(h = seq(0, 150, 20), col = "white") title("Read times", adj = 1, cex.main = 1, font.main = 2, col.main = "black")
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.