R/vroom_write.R

Defines functions output_column.factor output_column.character output_column.POSIXt output_column.double output_column.default output_column vroom_write_lines vroom_format vroom_write_opts get_vroom_write_opts vroom_write

Documented in output_column vroom_format vroom_write vroom_write_lines

#' Write a data frame to a delimited file
#'
#' @inheritParams vroom
#' @param x A data frame or tibble to write to disk.
#' @param file File or connection to write to.
#' @param escape The type of escape to use when quotes are in the data.
#'   - `double` - quotes are escaped by doubling them.
#'   - `backslash` - quotes are escaped by a preceding backslash.
#'   - `none` - quotes are not escaped.
#' @param quote How to handle fields which contain characters that need to be
#'   quoted.
#'   - `needed` - Values are only quoted if needed: if they contain a delimiter,
#'     quote, or newline.
#'   - `all` - Quote all fields.
#'   - `none` - Never quote fields.
#' @param bom If `TRUE` add a UTF-8 BOM at the beginning of the file. This is
#'   recommended when saving data for consumption by excel, as it will force
#'   excel to read the data with the correct encoding (UTF-8)
#' @param delim Delimiter used to separate values. Defaults to `\t` to write
#'   tab separated value (TSV) files.
#' @param eol The end of line character to use. Most commonly either `"\n"` for
#'   Unix style newlines, or `"\r\n"` for Windows style newlines.
#' @param na String used for missing values. Defaults to 'NA'.
#' @param col_names If `FALSE`, column names will not be included at the top of
#'   the file. If `TRUE`, column names will be included. If not specified,
#'   `col_names` will take the opposite value given to `append`.
#' @param append If `FALSE`, will overwrite existing file. If `TRUE`,
#'   will append to existing file. In both cases, if the file does not exist, a
#'   new file is created.
#' @export
#' @examples
#' # If you only specify a file name, vroom_write() will write
#' # the file to your current working directory.
#' out_file <- tempfile(fileext = "csv")
#' vroom_write(mtcars, out_file, ",")
#'
#' # You can also use a literal filename
#' # vroom_write(mtcars, "mtcars.tsv")
#'
#' # If you add an extension to the file name, write_()* will
#' # automatically compress the output.
#' # vroom_write(mtcars, "mtcars.tsv.gz")
#' # vroom_write(mtcars, "mtcars.tsv.bz2")
#' # vroom_write(mtcars, "mtcars.tsv.xz")
vroom_write <- function(
  x,
  file,
  delim = '\t',
  eol = "\n",
  na = "NA",
  col_names = !append,
  append = FALSE,
  quote = c("needed", "all", "none"),
  escape = c("double", "backslash", "none"),
  bom = FALSE,
  num_threads = vroom_threads(),
  progress = vroom_progress()
) {
  input <- x

  quote <- arg_match(quote)
  escape <- arg_match(escape)

  opts <- get_vroom_write_opts(quote, escape, bom)

  file <- connection_or_filepath(file, write = TRUE)

  if (NCOL(x) == 0) {
    if (!append && !inherits(file, "connection")) {
      # if file already exists, it is overwritten with an empty file!
      file.create(file)
    }
    return(invisible(input))
  }

  # This seems to work ok in practice
  buf_lines <- max(
    as.integer(Sys.getenv(
      "VROOM_WRITE_BUFFER_LINES",
      nrow(x) / 100 / num_threads
    )),
    1
  )

  # Run `output_column()` before `vroom_convert()` to ensure that any ALTREP
  # vectors created by `output_column()` will be fully materialized (#389)
  x[] <- lapply(x, output_column)

  # We need to convert any altrep vectors to normal vectors otherwise we can't fill the
  # write buffers from other threads. This should be the last manipulation done
  # to `x` before writing to ensure that no new altrep vectors are created.
  x <- vroom_convert(x)

  if (inherits(file, "connection")) {
    vroom_write_connection_(
      x,
      file,
      delim,
      eol,
      na_str = na,
      col_names = col_names,
      options = opts,
      num_threads = num_threads,
      progress = progress,
      buf_lines = buf_lines,
      is_stdout = file == stdout(),
      append = append
    )
  } else {
    vroom_write_(
      x,
      file,
      delim,
      eol,
      na_str = na,
      col_names = col_names,
      append = append,
      options = opts,
      num_threads = num_threads,
      progress = progress,
      buf_lines = buf_lines
    )
  }

  invisible(input)
}


get_vroom_write_opts <- function(quote, escape, bom) {
  v_opts <- vroom_write_opts()
  bitwOr(
    v_opts[paste0("quote_", quote)],
    bitwOr(
      v_opts[paste0("escape_", escape)],
      if (bom) v_opts["bom"] else 0
    )
  )
}

vroom_write_opts <- function() {
  c(
    "quote_none" = 0L,
    "escape_none" = 0L,
    "quote_needed" = 1L,
    "quote_all" = 2L,
    "escape_double" = 4L,
    "escape_backslash" = 8L,
    "bom" = 16L
  )
}

#' Convert a data frame to a delimited string
#'
#' This is equivalent to [vroom_write()], but instead of writing to
#' disk, it returns a string. It is primarily useful for examples and for
#' testing.
#'
#' @inheritParams vroom_write
#' @export
vroom_format <- function(
  x,
  delim = "\t",
  eol = "\n",
  na = "NA",
  col_names = TRUE,
  escape = c("double", "backslash", "none"),
  quote = c("needed", "all", "none"),
  bom = FALSE,
  num_threads = vroom_threads()
) {
  check_data_frame(x)

  if (NCOL(x) == 0) {
    return("")
  }

  quote <- arg_match(quote)
  escape <- arg_match(escape)

  opts <- get_vroom_write_opts(quote, escape, bom)

  # This seems to work ok in practice
  buf_lines <- max(
    as.integer(Sys.getenv(
      "VROOM_WRITE_BUFFER_LINES",
      nrow(x) / 100 / num_threads
    )),
    1
  )

  x[] <- lapply(x, output_column)
  vroom_format_(
    x,
    delim = delim,
    eol = eol,
    na_str = na,
    col_names = col_names,
    append = FALSE,
    options = opts,
    num_threads = vroom_threads(),
    progress = vroom_progress(),
    buf_lines = buf_lines
  )
}

#' Write lines to a file
#'
#' @param x A character vector.
#' @inheritParams vroom_write
#' @export
vroom_write_lines <- function(
  x,
  file,
  eol = "\n",
  na = "NA",
  append = FALSE,
  num_threads = vroom_threads()
) {
  check_character(x)

  x <- list(X1 = x)
  class(x) <- "data.frame"
  attr(x, "row.names") <- c(NA_integer_, -length(x[[1]]))

  vroom_write(
    x,
    file = file,
    delim = "",
    col_names = FALSE,
    eol = eol,
    na = na,
    append = append,
    quote = "none",
    escape = "none",
    num_threads = num_threads
  )
}

#' Preprocess column for output
#'
#' This is a generic function that applied to each column before it is saved
#' to disk. It provides a hook for S3 classes that need special handling.
#'
#' @keywords internal
#' @param x A vector
#' @examples
#' # Most types are returned unchanged
#' output_column(1)
#' output_column("x")
#'
#' # datetimes are formatted in ISO 8601
#' output_column(Sys.Date())
#' output_column(Sys.time())
#' @export
output_column <- function(x) {
  UseMethod("output_column")
}

#' @export
output_column.default <- function(x) {
  if (!is.object(x)) {
    return(x)
  }
  as.character(x)
}

#' @export
output_column.double <- function(x) {
  x
}

#' @export
output_column.POSIXt <- function(x) {
  format(x, "%Y-%m-%dT%H:%M:%OSZ", tz = "UTC", justify = "none")
}

#' @export
output_column.character <- function(x) {
  x
}

#' @export
output_column.factor <- function(x) {
  # TODO: look into doing writing directly in C++
  as.character(x)
}

Try the vroom package in your browser

Any scripts or data that you put into this service are public.

vroom documentation built on Jan. 27, 2026, 5:09 p.m.