my_answers <- c('S+', 'C++', 'Python', 'Matlab', 'Javascript') #check_answers(my_answers)
Crie um dataframe com o código a seguir:
library(dplyr) my_N <- 10000 my_df <- tibble(x = 1:my_N, y = runif(my_N))
Exporte o dataframe resultante para cada um dos cinco formatos: csv, rds, xlsx, fst. Qual dos formatos ocupou maior espaço na memória do computador? Dica: file.size
calcula o tamanho de arquivos dentro do próprio R.
library(tidyverse) do_tests <- function(my_N) { my_df <- tibble(x = 1:my_N, y = runif(my_N)) # csv my_f <- tempfile(pattern = 'temp', fileext = '.csv') time.csv <- system.time({ write_csv(my_df, my_f) })['elapsed'] size.csv <- file.size(my_f)/1000000 # rds my_f <- tempfile(pattern = 'temp', fileext = '.rds') time.rds <- system.time({ write_rds(my_df, my_f) })['elapsed'] size.rds <- file.size(my_f)/1000000 # xlsx my_f <- tempfile(pattern = 'temp', fileext = '.xlsx') library(writexl) time.xlsx <- system.time({ write_xlsx(my_df, my_f) })['elapsed'] size.xlsx <- file.size(my_f)/1000000 # fst library(fst) my_f <- tempfile(pattern = 'temp', fileext = '.fst') time.fst <- system.time({ write_fst(my_df, my_f) })['elapsed'] size.fst <- file.size(my_f)/1000000 print(c(size.csv, size.rds, size.xlsx, size.fst)) print(c(time.csv, time.rds, time.xlsx, time.fst)) tab <- tibble(Result = c('csv', 'rds', 'xlsx', 'fst'), Size = c(size.csv, size.rds, size.xlsx, size.fst), Time = c(time.csv, time.rds, time.xlsx, time.fst)) return(tab) } my_N <- 10000 tab <- do_tests(my_N) print(tab) my_msg <- paste0('The format with largest disk space for N = ', my_N, ' is ', tab$Result[which.max(tab$Size)], '.') message(my_msg)
extype: string
exsolution: r mchoice2string(c(TRUE, FALSE, FALSE, FALSE, FALSE), single = TRUE)
exname: "export file"
exshuffle: TRUE
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.