R/filename_functions.R

.FILE_EXTENSION_REGEX <- "\\.\\w+$"

.trim <- stringr::str_trim

#' Extracts file names from a vector of full path specifiers.
#'
#' @param paths Input paths/filenames (character).
#'
#' @return File names.
#'
#' @export
#'
fileName <- function(paths) {
  stringr::str_extract(.trim(paths), "[^\\/\\\\]+$")
}


#' Extract file extensions.
#'
#' An extension is taken to be a dot followed by one or more
#' alpha-numeric characters at the end of a path/filename. A
#' trailing dot is not an extension.
#'
#' @param paths Input paths/filenames (character).
#'
#' @return File extensions. Empty strings are returned for
#'   filenames with no extension (including any with a trailing dot).
#'
#' @export
#'
fileExtension <- function(paths) {
  exts <- stringr::str_extract(.trim(paths), .FILE_EXTENSION_REGEX)

  # replace any NA values (paths with no extension) with empty strings
  exts[ is.na(exts) ] <- ""

  # remove leading dots and return
  stringr::str_replace(exts, "^\\.", "")
}


#' Tests whether the input path/filenames have extensions.
#'
#' An extension is taken to be a dot followed by one or more
#' alpha-numeric characters at the end of a path/filename. A
#' trailing dot is not an extension.
#'
#' @param paths Input paths/filenames (character).
#'
#' @return A vector of logical values.
#'
#' @export
#'
fileHasExtension <- function(paths) {
  stringr::str_detect(.trim(paths), .FILE_EXTENSION_REGEX)
}

#' Removes the extension from each input filename.
#'
#' @param paths Input paths/filenames (character).
#' @param removeTrailingDot Whether to remove a trailing dot from filenames.
#'
#' @return Path/filenames with extensions (and optionally any trailing dots)
#'   removed.
#'
#' @export
#'
fileRemoveExtension <- function(paths, removeTrailingDot = TRUE) {
  rx <-
    if (removeTrailingDot) stringr::str_c("(\\.$)|(", .FILE_EXTENSION_REGEX, ")")
    else .FILE_EXTENSION_REGEX

  stringr::str_replace(.trim(paths), rx, "")
}

#' Splits input paths into path and filename.
#'
#' This function ASSUMES that the last commponent in an input
#' path is a filename, regardless of whether it has an extension
#' or not.
#'
#' @param paths Input paths/filenames (character).
#'
#' @return A character matrix with paths in column 1 and filenames
#'   (including extensions if any) in column 2.
#'
#' @export
#'
fileSplitPath <- function(paths) {
  parts.list <- stringr::str_split(.trim(paths), "[\\/\\\\]")

  fn <- function(parts) {
    n <- length(parts)
    if (n < 2) c("", parts)
    else c(stringr::str_c(parts[1:(n-1)], collapse="/"), parts[n])
  }

  out <- do.call(rbind, lapply(parts.list, fn))
  colnames(out) <- c("path", "file")
  out
}
mbedward/cermbTools documentation built on May 22, 2019, 12:19 p.m.