#' Return readble file size.
#'
#' @export
#' @param file_path Path of the file to be evaluated
#' @examples
#' neat_file_size(10276) # 10.03MB
neat_file_size <- function(bytes=NULL, file_path=NULL) {
if (is.null(bytes) & !is.null(file_path)) bytes = file.size(file_path)
else if (is.null(bytes) & is.null(file_path)) stop('bytes and file_path cannot be both NULL')
if (bytes >= 1024^3) {
gbs = round(bytes/1024^3, 1)
sprintf('%s GB', gbs)
} else if (1024^2<bytes & bytes<=1024^3) {
mbs = round(bytes/1024^2, 1)
sprintf('%s MB', mbs)
} else if (1024<bytes & bytes<=1024^2) {
kbs = round(bytes/1024, 1)
sprintf('%s KB', kbs)
} else {
bs = round(bytes, 1)
sprintf('%s B', bs)
}
}
#' A simplified save function using saveRDS().
#'
#' @export
#' @param obj expression or char. The object to be saved, use its R object name, e.g., `sv(dt)` or `sv('data')`.
#' @param svname expression or char. The file name of the serialized object. Default to objname, e.g., sv(dt, dt_new_name)
#' @param compress Whether the object should be compressed (only when saved as `rds`)
#' @param path To which subfolder the data is saved. Default to './data'. If './', save to current directory.
#' @param svtype The extension of saved file. Currently only `rds` and `feather` are available. Default to `rds`.
#' @examples
#' sv(dt) # equals to sv('dt')
#' sv(dt, dt_new_name, path='../Rdata')
sv <- function(obj, svname=NULL, svtype=NULL, path = "./data") {
start <- Sys.time()
if (path=='nosync') path = './data/nosync'
# if folder not exists, stop
if (!file.exists(path)) {
# dir.create(path)
stop(sprintf('Directory "%s" not exists!', path))
}
# Determin svname
# - if char, use as it is
# - if not char, change to char
if (class(svname) == 'character') {
if (length(svname) != 1) {
stop(sprintf('Got svname: %s', svname))
}
} else if (is.null(substitute(svname))) {
svname = as.character(substitute(obj))
} else {
as.character(substitute(svname))
}
# Determin save type: "rds" or "feather"
# Default: 'data.table' -> 'feather'
# others -> 'rds'
if ('character' %in% class(obj) & length(class(obj))==1) {
obj_class = class(get(obj))
} else {
obj_class = class(obj)
}
auto_svtype = ifelse(any(c('data.table', 'data.frame') %in% obj_class),
'feather',
'rds')
svtype = ifelse(is.null(substitute(svtype)),
auto_svtype,
as.character(substitute(svtype)))
if (auto_svtype != svtype) {
warning(sprintf('We detect the data is in "%s" format, but you are trying to save as "%s". Be careful!', class(obj), svtype),
call.=F)
}
# Save with `write_feather` or `saveRDS`
svdir = sprintf('%s/%s.%s', path, svname, svtype)
if (svtype=='feather') {
if ('character' %in% class(obj) & length(class(obj))==1) {
arrow::write_feather(get(obj), svdir, version=2, compression='lz4')
} else {
arrow::write_feather(obj, svdir, version=2, compression='lz4')
}
} else if (svtype=='rds') {
if ('character' %in% class(obj) & length(class(obj))==1) {
saveRDS(get(obj), file = svdir, compress=T)
} else {
saveRDS(obj, file = svdir, compress=T)
}
}
# print result
sprintf('"%s" saved as "%s.%s" (%s)', as.character(substitute(obj)), svname, svtype, neat_file_size(file_path=svdir)) %>% cat()
end <- Sys.time()
gap <- end - start
sprintf(' (%s %s, %s)\n', round(gap, 2), units(gap), end) %>% cat()
}
#' Load file into R environment.
#'
#' Currently only support "rds" and "feather" format. If only one format exists, the function will load it automatically. Otherwise, it will stop and ask you to select which format to load.
#'
#' @export
#' @param filename expression or char. The file name to be loaded, e.g. ld(x) equals to load(file='dt.rds')
#' @param force Whether the object should be reloaded if it's already in the current environment.
#' @param path From which subfolder to read the data. Default to './data'. If './', load from current directory.
#' @examples
#' ld(dt)
#' ld(filename=dt, obj=dt_new, path='../Rdata')
ld <- function(filename, ldtype=NULL, col_select=NULL, path = './data') {
start <- Sys.time()
# check if ldtype is valid
# possible value: NULL, "rds", "feather"
if (!is.null(ldtype) && !(ldtype %in% c('rds', 'feather'))) {
stop('`ldtype` must be one of "rds", "feather" or "NULL"')
}
# format "path"
if (path=='nosync') path = './data/nosync'
# convert filename to string
filename = as.character(substitute(filename))
# verify file type: rds or feather
# if file doesn't exist, stop;
# if both exists, stop and ask for clarification;
# if only one exists, assign it to `lddir`
if (is.null(ldtype)) {
hit = list.files(path, pattern=sprintf('^%s\\.(rds|feather|RDS|Rds)$', filename))
} else {
hit = list.files(path, pattern=sprintf('^%s\\.%s$', filename, substitute(ldtype)))
}
hit_extensions = sapply(str_split(hit, '\\.'), tail, 1) %>% unique() %>%
str_c(collapse=', ')
if (length(hit)==0) {
if (is.null(ldtype)) {
stop(sprintf('Cannot find "%s" with possible extensions ("rds", "feather")', filename))
} else{
stop(sprintf('Cannot find "%s.%s"', filename, ldtype))
}
} else if (length(hit)==1) {
lddir <- sprintf('%s/%s', path, hit)
ldtype = str_split(hit, '\\.')[[1]] %>% tail(1)
filename_ext = hit
} else {
stop(sprintf('Multiple extensions (%s) of "%s" found, please clarify!', hit_extensions, filename))
}
# get file size before loading
file_size = neat_file_size(file_path=lddir)
# load the file
# first, load file as val
if (tolower(ldtype)=='feather') {
val = arrow::read_feather(lddir, col_select=col_select) %>% setDT()
}
else if (tolower(ldtype)=='rds') {
val = readRDS(lddir)
}
sprintf('"%s" (%s) loaded', filename_ext, file_size) %>% cat()
# output time elapsed
end <- Sys.time()
gap <- end - start
finish_time = Sys.time() %>% format('%Y-%m-%d %-I:%M %p')
sprintf(' (%s %s) (%s)\n', round(gap, 2), units(gap), finish_time) %>% cat()
# return data
return(val)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.