R/inventory.R

Defines functions inventory_transfer inventory_check inventory_find inventory_exists inventory_show inventory_make inventory_get inventory_rm inventory_add inventory_save_and_add

Documented in inventory_add inventory_check inventory_exists inventory_find inventory_get inventory_make inventory_rm inventory_save_and_add inventory_show inventory_transfer

## -------------------------------------------------------------------------------------------------------------------------------------------------------
#' Wrapper for inventory_add that also saves tables and R objects.
#'
#' @param object object gets saved by function save_method with extension file_type.    
#' @param inv_location Path to the inventory you want to create or modify. If possible, this arg
#'  defaults to the parent of the last destination given to `freeze`.
#' @param tag identifier for an inventory record that you want to add.
#' @param filename Path for the file that you want to save. Absolute or relative to inv_location.
#' @param extra Any character string without tabs. Meant for notes to describe the saved data.
#' @param force Bump any existing entry with the given tag, moving it to a backup with a similar name. Default is \code{TRUE}.
#'
#' @export
#'
inventory_save_and_add = function(
  object,
  tag = deparse(substitute(object)), 
  file_type   = if( is.data.frame(object)){ "csv"     } else { "Rdata" },
  save_method = if( is.data.frame(object)){ write.csv } else { saveRDS },
  filename = file.path(Sys.getenv("FREEZR_DESTINATION"), paste0(tag, ".", file_type)),
  extra, 
  inv_location = NULL,
  verbose = T, 
  ...
){
  if(verbose){
    cat(sep = "",
      "\n File: ", filename, 
      "\n Save method: ", deparse(substitute(save_method)),
      "\n To retrieve: inventory_get(tag ='", tag, "')", 
      "\n Notes:", extra
    )
  }
  save_method( object, filename, ... )
  inventory_add( tag = tag, 
                 filename = filename,
                 extra = extra, 
                 inv_location = inv_location )
}

#' Keep track of important items from previous analyses.
#'
#' @export
#'
#' @param inv_location Path to the inventory you want to create or modify. If possible, this arg
#'  defaults to the parent of the last destination given to `freeze`.
#' @param tag identifier for an inventory record that you want to add.
#' @param parent_tag identifier for a file that this analysis depends on.
#' @param filename relative path from \code{inv_location} for a file that you want to add to the inventory.
#' @param extra Any character string without tabs. Meant for metadata to be associated with the given \code{tag}
#'  and \code{filename}.
#'  This may just be notes, or you could include an underscore- and pipe-delimited list of key-value pairs;
#'  the sky's the limit.
#' @param force Bump any existing entry with the given tag, moving it to a backup with a similar name. Default is \code{TRUE}.
#'
#' @details \code{inventory_*} functions help track data as it passes through multiple stages of analysis.
#'  The central data structure is a table with the filename \code{.inventory.txt}. It has five
#'  columns: \code{tag}, \code{parent_tag}, \code{date_modified}, \code{filename}, and \code{extra}.
#'
#' \code{inventory_add} looks for this table at the \code{inv_location} you specify.
#' \code{inventory_add} will add a row with the given tag. If that tag is present already and
#'  \code{force==TRUE}, the record will be altered. If the tag is present but \code{force==FALSE},
#'  the tag will be altered via \code{make.unique} and a new record will be created.
#'
#'
inventory_add = function( tag = NULL, inv_location = NULL, filename = NULL,
                          extra = "", parent_tag = "",
                          force = TRUE ){
  filename = path.expand(filename)
  
  if( !is.null( tag ) ) { assertthat::assert_that( tag!="" ) }
  if( length( grep( x=extra, pattern = "\t", fixed = T ) ) > 0 ){
    stop("Sorry, inventories are tab-delimited. `extra` field may not contain tabs.")
  }

  inv = inventory_show( inv_location )
  inventory_path = inventory_find( inv_location ) 
  inv_location = dirname(inventory_path)

  # # Check whether filename exists.
  absolute = R.utils::isAbsolutePath(filename)
  if( absolute ){
    filename_full = filename
  } else {
    filename_full = file.path( inv_location, filename )
  }
  if( !file.exists( filename_full ) ){
    warning( paste0("The file you're adding, ", filename_full, ", does not exist! Adding it anyway.\n") )
  }

  # if tag already present
  if( tag %in% inv$tag ){
    bump_tag = rev( make.unique( c( inv$tag, tag ) ) )[1]
    if( force ) {
      warning( paste0( "Displacing an old row. It will now have the tag:     ", bump_tag ) )
    } else {
      warning( paste0( "That tag is already taken. Using ", bump_tag, " instead." ) )
    }
    inv[which(inv$tag==tag), "tag"] = ifelse( force, bump_tag,      tag)
    row_add = data.frame(     tag   = ifelse( force, tag,      bump_tag),
                          parent_tag=parent_tag,
                          date_modified = format( Sys.time(), "%Y_%b_%d|%H_%M_%S"),
                          filename=filename,
                          extra=extra )
  # if tag not present
  } else {
    row_add = data.frame( tag=tag,
                          parent_tag=parent_tag,
                          date_modified = format( Sys.time(), "%Y_%b_%d|%H_%M_%S"),
                          filename=filename,
                          extra=extra )
  }
  inv = rbind( inv, row_add )

  utils::write.table( inv, inventory_path, quote = F, row.names = F, col.names = T, sep = "\t" )
}




#' Remove no-longer important items from the inventory.
#'
#' @export
#' @param inv_location Path to the inventory you want to create or modify. If possible, this arg
#'  defaults to the parent of the last destination given to `freeze`.
#' @param tag identifier for an inventory record that you want to add.
#' @details \code{inventory_*} functions help track data as it passes through multiple stages of analysis.
#'  The central data structure is a table with the filename \code{.inventory.txt}. It has five
#'  columns: \code{tag}, \code{parent_tag}, \code{date_modified}, \code{filename}, and \code{extra}.
#'
#' \code{inventory_rm} will remove the row with the given tag.
#'
#' @export 
#'
inventory_rm = function( tag = NULL, inv_location = NULL ){

  if( !is.null( tag ) ) { assertthat::assert_that( tag!="" ) }

  inventory_path = inventory_find( inv_location )
  inv_location = dirname( inventory_path )
  inv = inventory_show( inv_location, make_new = F )

  given_tag = tag
  if( ! given_tag %in% inv$tag ){
    warning("Tag not present. No action taken.")
  }
  inv = subset( inv, tag != given_tag)
  utils::write.table( inv, inventory_path, quote = F, row.names = F, col.names = T, sep = "\t" )
}


#' Retrieve (paths to) important items from previous analyses.
#'
#' @export
#' @param inv_location Path to the inventory you want to access. If possible, this arg
#'  defaults to the parent of the last destination given to `freeze`.
#' @param tag identifier for an inventory record that you want to access.
#' @param return_all_fields When retrieving data, return the whole inventory record instead of just the \code{filename}.
#' @details \code{inventory_*} functions help organize data as it passes through multiple stages of analysis.
#'  The central data structure is a table with the filename \code{.inventory.txt}. It has five
#'  columns: \code{tag}, \code{parent_tag}, \code{date_modified}, \code{filename}, and \code{extra}.
#'
#' \code{inventory_get} looks for this table at the \code{inv_location} you specify. Any record matching
#'  the tag you give will be returned.
#'
inventory_get = function( tag = NULL, inv_location = NULL, return_all_fields = FALSE ){
  inventory_path = inventory_find( inv_location )
  inv = inventory_show( inv_location )

  ii = which(inv$tag==tag)
  if( length(ii) == 0 ){
    warning( "That tag is not present. Quitting." )
    return()
  } else if ( length(ii) > 1 ){
    warning( paste0( "Duplicate tag detected! This is not supposed to happen. ",
                     "If you can reproduce this issue without altering `.inventory.txt` by hand, ",
                     "please contact the package maintainer." ) )
  }

  myrow = inv[ii, ]
  relative = substring(myrow$filename, 1, 1)[[1]]!=.Platform$file.sep
  if(relative){
    myrow$filename = file.path( dirname( inventory_path ), myrow$filename )
  }

  if( return_all_fields ){
    return( myrow )
  } else {
    return( myrow$filename )

  }
}


#' Make a new inventory.
#'
#' @export
#' @param inv_location Path to the inventory you want to create, access, or modify. If possible, this arg
#'  defaults to the parent of the last destination given to `freeze`.
#' @details \code{inventory_*} functions help organize data as it passes through multiple stages of analysis.
#'  The central data structure is a table with the filename \code{.inventory.txt}. It has five
#'  columns: \code{tag}, \code{parent_tag}, \code{date_modified}, \code{filename}, and \code{extra}.
#'
#' \code{inventory_make} will make an inventory at the specified location unless there's one already.
#'
inventory_make = function( inv_location ){
  if( length(inv_location) == 0)(stop("There is no default location for inventory_make; you must supply one explicitly."))
   if( inventory_find( inv_location = inv_location, return_existence_logical = TRUE ) ){
    stop(paste0("There is already an inventory at that location (or a superdirectory).\n",
                "Nested inventories are not supported, and they may never be.\n"))
  }
  else {
    inv = data.frame( tag=as.Date( character() ),
                      filename=character(),
                      parent_tag=character(),
                      date_modified=character(),
                      extra=character(),
                      stringsAsFactors=FALSE )
    if(!dir.exists(inv_location)) {
      dir.create(inv_location, recursive = T)
      assertthat::assert_that(dir.exists(inv_location))
    }
    utils::write.table( inv, file.path(inv_location, ".inventory.txt"), quote = F, row.names = F, col.names = T, sep = "\t" ) 
  }
  return()
}

#' Return an entire inventory.
#'
#' @export
#' @param make_new Deprecated; use inventory_make instead. 
#' @param inv_location Path to the inventory you want to create, access, or modify. If possible, this arg
#'  defaults to the parent of the last destination given to `freeze`.
#' @details \code{inventory_*} functions help organize data as it passes through multiple stages of analysis.
#'  The central data structure is a table with the filename \code{.inventory.txt}. It has five
#'  columns: \code{tag}, \code{parent_tag}, \code{date_modified}, \code{filename}, and \code{extra}.
#'
#' \code{inventory_show} will return the whole table.
#'
inventory_show = function( inv_location = NULL, make_new = FALSE ){

  if(make_new){ 
    warning("The make_new argument of inventory_show is deprecated. Use inventory_make instead.")
    inventory_make(inv_location)
  }
  
  inventory_path = inventory_find( inv_location ) 
  inv = utils::read.table( inventory_path, header = T, sep = "\t", stringsAsFactors = F )
  return( inv )
}


#' Locate your inventory.
#'
#' @export
#' @param inv_location Path to the inventory you think might exist, or to its parent folder, or to
#'  any folder above that (as long as none of them contain other inventories). When possible, this arg
#'  defaults to the parent of the last destination given to `freeze`.
#' @details \code{inventory_*} functions help organize data as it passes through multiple stages of analysis.
#'  The central data structure is a table with the filename \code{.inventory.txt}. It has five
#'  columns: \code{tag}, \code{parent_tag}, \code{date_modified}, \code{filename}, and \code{extra}.
#'
#' \code{inventory_exists} assesses whether an inventory is accessible from the specified location.
#'
inventory_exists = function( inv_location = NULL ){
  inventory_find( inv_location = inv_location, return_existence_logical = TRUE)
}
  
#' Locate your inventory.
#'
#' @export
#' @param return_existence_logical If TRUE, return a bool indicating whether the inventory can be found.
#' @param inv_location Path to the inventory you want to create, access, or modify, or to its parent folder, or to
#'  any folder above that (as long as none of them contain other inventories). When possible, this arg
#'  defaults to the parent of the last destination given to `freeze`.
#' @details \code{inventory_*} functions help organize data as it passes through multiple stages of analysis.
#'  The central data structure is a table with the filename \code{.inventory.txt}. It has five
#'  columns: \code{tag}, \code{parent_tag}, \code{date_modified}, \code{filename}, and \code{extra}.
#'
#' \code{inventory_find} helps locate the inventory.
#'
inventory_find = function( inv_location = NULL, return_existence_logical = FALSE ){
  
  # Deal with input. Postcondition: look_in is a file that exists.
  if( is.null( inv_location ) ){
    if("FREEZR_DESTINATION" %in% names(Sys.getenv())){
      look_in = Sys.getenv()[["FREEZR_DESTINATION"]] 
    } else {
      if(return_existence_logical){return(FALSE)}
      stop("Grumble! Please enter inv_location; default not available.\n")
    }
  } else {
    look_in = c( inv_location, dirname( inv_location ) )
    if( !any( dir.exists( look_in ) ) ){
      if(return_existence_logical){return(FALSE)}
      stop("Grumble! For inv_location, the folder you specified does not exist.\n")
    } else {
      look_in = inv_location
    }
  }
  
  # # Searches from last inv_location given to freezr::freeze, descending the file tree.
  while(TRUE){
    inventory_path = file.path( look_in, ".inventory.txt" )
    if( file.exists( inventory_path ) ){
      if(return_existence_logical){return(TRUE)}
      return( inventory_path )
    } else if( look_in == dirname(look_in) ) {
      if(return_existence_logical){return(FALSE)}
      stop("Uh oh! No inventory found in any superdirectory of the inv_location you specified. Try inventory_make. \n" )
    } else {
      look_in = dirname(look_in)
    }
  }
  if(return_existence_logical){return(FALSE)}
  stop("Please enter inv_location; default not available.")
}


#' Make sure everything in the inventory is actually present
#'
#' @param inv_location Where to look for the inventory.
#' 
#' This function issues warnings if the inventory or any file it points to is absent. 
#' It returns a dataframe with details, unless the inventory is absent, in which case
#' it returns NULL.
#'
#' @export
#'
inventory_check = function( inv_location = NULL ){

  # inventory_show will throw an error if the inventory's not there, but we want a warning instead.
  inv = tryCatch( inventory_show( inv_location, make_new = F ) , 
                  error = function(e){
                    if( conditionMessage(e)=="There is no inventory at that location."){
                      warning("There is no inventory at that location.")
                      return(NULL)
                    } else {
                      stop(paste0("Unexpected error in inventory_show:\n", print(e)))
                    }
                  }  )
  if(is.null(inv)){ return() }
  
  inventory_path = inventory_find( inv_location )
  inv_location = dirname( inventory_path )
  full_paths = sapply(inv$tag, inventory_get, inv_location = inv_location)
  results = data.frame( tag = inv$tag,
                        exists = file.exists(full_paths) | dir.exists(full_paths),
                        full_path = full_paths ) 
  if( !all(results$exists)){
    warning("Some of your inventory items cannot be found! \n")
  } else {
    cat("Everything in your inventory seems to exist. Congratulations.\n")
  }
  return( results )
}

#' Copy all files listed in the inventory to the specified target location.
#'
#' @param inv_location Where to look for the inventory.
#' @param target_location The stuff gets put in <target_location>/inventory. 
#' To avoid copying ridiculous long file paths, files are renamed as "tag.ext" so that 
#' the new filename is the tag but the old extension (anything after the last period) is preserved. 
#' For folders or for files with no '.' in the name, only the tag is used.
#' @param overwrite Passed to file.copy and also checked before (over)writing the new .inventory.txt file.
#' @param verbose Print paths as files get copied?
#'
#' @export
#'
inventory_transfer = function( inv_location = NULL, target_location, overwrite = F, verbose = F ){
  inventory_check(inv_location)
  new_inv_location = file.path(target_location, "transferred_files")
  suppressWarnings( dir.create( new_inv_location, recursive = T ) ) 
  old_inv = new_inv = inventory_show( inv_location )
  extension = ifelse( grepl( pattern = "\\.", x = old_inv$filename ),
                      gsub(x = old_inv$filename, pattern = "^.*\\.", replacement = ""), 
                      "" )
  new_inv$filename = ifelse( nchar(extension) > 0, 
                             paste(old_inv$tag, extension, sep = "."), 
                             old_inv$tag )
  full_path_temp  = file.path( new_inv_location, basename(old_inv$filename) )
  full_path_final = file.path( new_inv_location, new_inv$filename )
  
  if(verbose){
    cat("Copying:\n")
  }
  for( i in seq_along( new_inv$tag )){
    if(verbose){
      cat("  ", old_inv$filename[i], "\n   to   \n", full_path_temp[i], "\n\n" )
    }
    # Use inventory_get just to compute the absolute path.
    transfer_worked = file.copy( from = inventory_get( old_inv$tag[i], inv_location = inv_location ), 
                                 to = new_inv_location, 
                                 overwrite = overwrite, copy.mode = T, recursive = T )
    if(verbose){
      cat("  ", full_path_temp[i], "\n   to   \n", full_path_final[i], "\n\n" )
    }
    rename_worked = file.rename(from = full_path_temp[i], to = full_path_final[i] )
    
    
    if( !transfer_worked ){
      warning( paste0("file.copy returned FALSE on", 
                      " tag = ", old_inv$tag[i], 
                      " file = ", old_inv$filename[i], 
                      "\n") )
    }
    if( !rename_worked ){
      warning( paste0("file.rename returned FALSE on", 
                      " tag = ", old_inv$tag[i], 
                      " file = ", old_inv$filename[i], 
                      " . freezr may not have permission to overwrite.",
                      "\n") )
    }
  }
  new_inv_path = file.path(new_inv_location, ".inventory.txt")
  if(file.exists(new_inv_path)){
    my_msg = paste0(new_inv_path, " already exists.\n")
    if( overwrite ){
      warning( my_msg )
    } else {
      stop( my_msg )
    }
  }
  utils::write.table( new_inv, new_inv_path, 
               quote = F, row.names = F, col.names = T, sep = "\t" )
  return()
}
ekernf01/freezr documentation built on Feb. 8, 2022, 5:22 a.m.