my_answers <- c('S+', 
                'C++',
                'Python',
                'Matlab',
                'Javascript')

#check_answers(my_answers)

Question

Crie um dataframe com o código a seguir:

library(dplyr)

my_N <- 10000
my_df <- tibble(x = 1:my_N,
                y = runif(my_N))

Exporte o dataframe resultante para cada um dos cinco formatos: csv, rds, xlsx, fst. Qual dos formatos ocupou maior espaço na memória do computador? Dica: file.size calcula o tamanho de arquivos dentro do próprio R.

Solution

library(tidyverse)

do_tests <- function(my_N) {

  my_df <- tibble(x = 1:my_N,
                  y = runif(my_N))

  # csv
  my_f <- tempfile(pattern = 'temp', fileext = '.csv')
  time.csv <- system.time({
    write_csv(my_df, my_f)
  })['elapsed']
  size.csv <- file.size(my_f)/1000000

  # rds
  my_f <- tempfile(pattern = 'temp', fileext = '.rds')
  time.rds <- system.time({
    write_rds(my_df, my_f)
  })['elapsed']

  size.rds <- file.size(my_f)/1000000

  # xlsx
  my_f <- tempfile(pattern = 'temp', fileext = '.xlsx')
  library(writexl) 
  time.xlsx <- system.time({
    write_xlsx(my_df, my_f)
  })['elapsed']

  size.xlsx <- file.size(my_f)/1000000

  # fst
  library(fst)
  my_f <- tempfile(pattern = 'temp', fileext = '.fst')
  time.fst <- system.time({
    write_fst(my_df, my_f)
  })['elapsed']

  size.fst <- file.size(my_f)/1000000

  print(c(size.csv, size.rds, size.xlsx, size.fst))

  print(c(time.csv, time.rds, time.xlsx, time.fst))

  tab <- tibble(Result = c('csv', 'rds', 'xlsx', 'fst'), 
                Size =  c(size.csv, size.rds, size.xlsx, size.fst),
                Time = c(time.csv, time.rds, time.xlsx, time.fst))

  return(tab)
}

my_N <- 10000

tab <- do_tests(my_N)
print(tab)

my_msg <- paste0('The format with largest disk space for N = ', my_N, ' is ', 
                 tab$Result[which.max(tab$Size)], '.')
message(my_msg)

Meta-information

extype: string exsolution: r mchoice2string(c(TRUE, FALSE, FALSE, FALSE, FALSE), single = TRUE) exname: "export file" exshuffle: TRUE



msperlin/adfeR documentation built on March 26, 2021, 3:05 a.m.