R/io.R

Defines functions ld sv neat_file_size

Documented in ld sv

#' Return readble file size.
#'
#' @export
#' @param file_path Path of the file to be evaluated
#' @examples
#' neat_file_size(10276) # 10.03MB
neat_file_size <- function(bytes=NULL, file_path=NULL) {
    if (is.null(bytes) & !is.null(file_path)) bytes = file.size(file_path)
    else if (is.null(bytes) & is.null(file_path)) stop('bytes and file_path cannot be both NULL')
    if (bytes >= 1024^3) {
        gbs = round(bytes/1024^3, 1)
        sprintf('%s GB', gbs)
    } else if (1024^2<bytes & bytes<=1024^3) {
        mbs = round(bytes/1024^2, 1)
        sprintf('%s MB', mbs)
    } else if (1024<bytes & bytes<=1024^2) {
        kbs = round(bytes/1024, 1)
        sprintf('%s KB', kbs)
    } else {
        bs = round(bytes, 1)
        sprintf('%s B', bs)
    }
}

#' A simplified save function using saveRDS().
#'
#' @export
#' @param obj expression or char. The object to be saved, use its R object name, e.g., `sv(dt)` or `sv('data')`.
#' @param svname expression or char. The file name of the serialized object. Default to objname, e.g., sv(dt, dt_new_name)
#' @param compress Whether the object should be compressed (only when saved as `rds`)
#' @param path To which subfolder the data is saved. Default to './data'. If './', save to current directory. 
#' @param svtype The extension of saved file. Currently only `rds` and `feather` are available. Default to `rds`. 
#' @examples
#' sv(dt) # equals to sv('dt')
#' sv(dt, dt_new_name, path='../Rdata')
sv <- function(obj, svname=NULL, svtype=NULL, path = "./data") {
    
    start <- Sys.time()

    if (path=='nosync') path = './data/nosync'

    # if folder not exists, stop
    if (!file.exists(path)) {
        # dir.create(path)
        stop(sprintf('Directory "%s" not exists!', path))
    }

    # Determin svname
    # - if char, use as it is
    # - if not char, change to char
    if (class(svname) == 'character') {
        if (length(svname) != 1) {
            stop(sprintf('Got svname: %s', svname))
        }
    } else if (is.null(substitute(svname))) {
        svname = as.character(substitute(obj))
    } else {
        as.character(substitute(svname))
    }    

    # Determin save type: "rds" or "feather"
    # Default: 'data.table' -> 'feather'
    #          others       -> 'rds'
    if ('character' %in% class(obj) & length(class(obj))==1) {
        obj_class = class(get(obj))
    } else {
        obj_class = class(obj)
    }
    auto_svtype = ifelse(any(c('data.table', 'data.frame') %in% obj_class),
                         'feather',
                         'rds')

    svtype = ifelse(is.null(substitute(svtype)),
                    auto_svtype,
                    as.character(substitute(svtype)))

    if (auto_svtype != svtype) {
        warning(sprintf('We detect the data is in "%s" format, but you are trying to save as "%s". Be careful!', class(obj), svtype),
                call.=F)
    }

    # Save with `write_feather` or `saveRDS`
    svdir = sprintf('%s/%s.%s', path, svname, svtype)

    if (svtype=='feather') {
        if ('character' %in% class(obj) & length(class(obj))==1) {
            arrow::write_feather(get(obj), svdir, version=2, compression='lz4')
        } else {
            arrow::write_feather(obj, svdir, version=2, compression='lz4')
        }
    } else if (svtype=='rds') {
        if ('character' %in% class(obj) & length(class(obj))==1) {
            saveRDS(get(obj), file = svdir, compress=T) 
        } else {
            saveRDS(obj, file = svdir, compress=T) 
        }

    }

    # print result
    sprintf('"%s" saved as "%s.%s" (%s)', as.character(substitute(obj)), svname, svtype, neat_file_size(file_path=svdir)) %>% cat()

    end <- Sys.time()
    gap <- end - start
    sprintf(' (%s %s, %s)\n', round(gap, 2), units(gap), end) %>% cat()
}


#' Load file into R environment.
#'
#' Currently only support "rds" and "feather" format. If only one format exists, the function will load it automatically. Otherwise, it will stop and ask you to select which format to load.
#'
#' @export
#' @param filename expression or char. The file name to be loaded, e.g. ld(x) equals to load(file='dt.rds')
#' @param force Whether the object should be reloaded if it's already in the current environment.
#' @param path From which subfolder to read the data. Default to './data'. If './', load from current directory.
#' @examples
#' ld(dt)
#' ld(filename=dt, obj=dt_new, path='../Rdata')
ld <- function(filename, ldtype=NULL, col_select=NULL, path = './data') {
    start <- Sys.time()
    # check if ldtype is valid
    # possible value: NULL, "rds", "feather"
    if (!is.null(ldtype) && !(ldtype %in% c('rds', 'feather'))) {
        stop('`ldtype` must be one of "rds", "feather" or "NULL"')
    }

    # format "path"
    if (path=='nosync') path = './data/nosync'

    # convert filename to string
    filename = as.character(substitute(filename))

    # verify file type: rds or feather
    # if file doesn't exist, stop;
    # if both exists, stop and ask for clarification;
    # if only one exists, assign it to `lddir`
    if (is.null(ldtype)) {
        hit = list.files(path, pattern=sprintf('^%s\\.(rds|feather|RDS|Rds)$', filename))
    } else {
        hit = list.files(path, pattern=sprintf('^%s\\.%s$', filename, substitute(ldtype)))
    }
    hit_extensions = sapply(str_split(hit, '\\.'), tail, 1)  %>% unique() %>% 
                     str_c(collapse=', ')

    if (length(hit)==0) {
        if (is.null(ldtype)) {
            stop(sprintf('Cannot find "%s" with possible extensions ("rds", "feather")', filename))
        } else{
            stop(sprintf('Cannot find "%s.%s"', filename, ldtype))
        }
    } else if (length(hit)==1) {
        lddir <- sprintf('%s/%s', path, hit)
        ldtype = str_split(hit, '\\.')[[1]] %>% tail(1)
        filename_ext = hit
    } else {
        stop(sprintf('Multiple extensions (%s) of "%s" found, please clarify!', hit_extensions, filename))
    }

    # get file size before loading
    file_size = neat_file_size(file_path=lddir)


    # load the file
    # first, load file as val
    if (tolower(ldtype)=='feather') {
        val = arrow::read_feather(lddir, col_select=col_select) %>% setDT()
    }       
    else if (tolower(ldtype)=='rds') {
        val = readRDS(lddir)
    } 

    sprintf('"%s" (%s) loaded', filename_ext, file_size) %>% cat()
        
    # output time elapsed
    end <- Sys.time()
    gap <- end - start
    finish_time = Sys.time() %>% format('%Y-%m-%d %-I:%M %p')
    sprintf(' (%s %s) (%s)\n', round(gap, 2), units(gap), finish_time) %>% cat()
    
    # return data
    return(val)
}
XiaomoWu/utilr documentation built on July 30, 2022, 11:26 a.m.