R/file_utils.R

Defines functions load_data

Documented in load_data

#' Downloads and unzip files with cache support.
#'
#' Searches the /tmp/documentId folder for flags indicating that the data 
#' associated with the documentId has previously been loaded.
#' If it has not, downloads the file and, if it is a ZIP archive, unzips it.
#' The cache is cleared by the GarbageCollector
#' 
#'  
#' 
#' @param ctx Tercen context.
#' @param documentId Identifier for file to be loaded.
#' @param force_load Downloads and unzips data regardless of cache
#' @keywords file
#' @export
#' @examples
#' load_data(ctx, docId)
#' @import tools
load_data <- function(ctx, documentId, force_load=FALSE){
  tempFolder <- file.path('/tmp', documentId, documentId)
  
  if( !dir.exists(tempFolder) ){
    dir.create(tempFolder, recursive = TRUE)
  }
  
  doc = ctx$client$fileService$get(documentId)
  
  filename <- file.path(tempFolder, doc$name  )
  
  if( !file.exists(file.path(tempFolder, '.downloaded')) || 
      force_load == TRUE  ){
    writeBin(ctx$client$fileService$download(documentId), filename)
    
    file.create(file.path(tempFolder, '.downloaded')  )
  }
  
  isZip <- length(grep(".zip", doc$name)) > 0
  
  if(isZip  && 
     (!file.exists(file.path(tempFolder, '.extracted')) || force_load == TRUE) ) {
    unzip(filename, exdir = tempFolder,
          overwrite = TRUE)
    
    unlink( filename ) # ZIP file will no longer be needed
    
    file.create(file.path(tempFolder, '.extracted')  )
  }
  
  # If desired file is ZIP archive, get a list, otherwise return the filename
  if( isZip  ){
    # Handling of the case when there are no subfolders 
    file_list <- list.files(
      file.path(list.files(tempFolder, full.names = TRUE)), 
      full.names = TRUE, all.files = FALSE,
      recursive = TRUE)
    
    # The line above fails in case there are no sub-folders in the ZIP archive
    if(length(file_list) == 0){
      file_list <- list.files(
        unique(dirname(file.path(list.files(tempFolder, full.names = TRUE))))[[1]], 
        full.names = TRUE, all.files = FALSE,
        recursive = TRUE)
    }
    
    
    return(file_list)
  }else{
    return(filename)
  }
}
tercen/tim documentation built on May 3, 2023, 10:43 p.m.