R/zlib.R

Defines functions decompress compress decompressobj compressobj publicEval .onLoad

Documented in compress compressobj decompress decompressobj .onLoad publicEval

# Placeholder for the zlib enrionment
zlib <- NULL

#' The following 'zlib' enrivonment is generated by the .onLoad Behavior for R packages.
#'
#' The .onLoad function is automatically called when the package is loaded using
#' `library()` or `require()`. It initializes the an environment,
#' which can be reached from anywhere and is unique (i.e. cannot be ovwerwritten),
#' including defining a variety of constants / methods related to the zlib compression
#' library.
#'
#' Specifically, the function assigns a new environment named "zlib" containing
#' constants such as `DEFLATED`, `DEF_BUF_SIZE`, `MAX_WBITS`,
#' and various flush and compression strategies like `Z_FINISH`,
#' `Z_BEST_COMPRESSION`, etc.
#'
#' @seealso [publicEval()] for the method used to set up the public environment.
#' @seealso [zlib_constants()] for the method used to set up the constants in the environment. https://www.zlib.net/manual.html#Constants
#' @name zlib
#' @title zlib
#' @return No return value, called for side effect. An environment containing the zlib constants created onLoad.
#' @examples
#' # Load the package
#' library(zlib)
#' # Create a temporary file
#' temp_file <- tempfile(fileext = ".txt")
#'
#' # Generate example data and write to the temp file
#' example_data <- "This is an example string. It contains more than just 'hello, world!'"
#' writeBin(charToRaw(example_data), temp_file)
#'
#' # Read data from the temp file into a raw vector
#' file_con <- file(temp_file, "rb")
#' raw_data <- readBin(file_con, "raw", file.info(temp_file)$size)
#' close(file_con)
#' # Create a Compressor object gzip
#' compressor <- zlib$compressobj(zlib$Z_DEFAULT_COMPRESSION, zlib$DEFLATED, zlib$MAX_WBITS + 16)
#'
#' # Initialize variables for chunked compression
#' chunk_size <- 1024
#' compressed_data <- raw(0)
#'
#' # Compress the data in chunks
#' for (i in seq(1, length(raw_data), by = chunk_size)) {
#'    chunk <- raw_data[i:min(i + chunk_size - 1, length(raw_data))]
#'    compressed_chunk <- compressor$compress(chunk)
#'    compressed_data <- c(compressed_data, compressed_chunk)
#' }
#'
#' # Flush the compressor buffer
#' compressed_data <- c(compressed_data, compressor$flush())
#'
#'
#' # Create a Decompressor object for gzip
#' decompressor <- zlib$decompressobj(zlib$MAX_WBITS + 16)
#'
#' # Initialize variable for decompressed data
#' decompressed_data <- raw(0)
#'
#' # Decompress the data in chunks
#' for (i in seq(1, length(compressed_data), by = chunk_size)) {
#'   chunk <- compressed_data[i:min(i + chunk_size - 1, length(compressed_data))]
#'   decompressed_chunk <- decompressor$decompress(chunk)
#'   decompressed_data <- c(decompressed_data, decompressed_chunk)
#' }
#'
#' # Flush the decompressor buffer
#' decompressed_data <- c(decompressed_data, decompressor$flush())
#'
#' # Comporess / Decompress data in a single step
#'
#' original_data <- charToRaw("some data")
#' compressed_data <- zlib$compress(original_data,
#'                                  zlib$Z_DEFAULT_COMPRESSION,
#'                                  zlib$DEFLATED,
#'                                  zlib$MAX_WBITS + 16)
#' decompressed_data <- zlib$decompress(compressed_data, zlib$MAX_WBITS + 16)
#'
#' @details
#' @title What My Package Offers
#'
#' @section Methods:
#' * `compressobj(...)`: Create a compression object.
#' * `decompressobj(...)`: Create a decompression object.
#' * `compress(data, ...)`: Compress data in a single step.
#' * `decompress(data, ...)`: Decompress data in a single step.
#'
#' @section Constants:
#' * `DEFLATED`: The compression method, set to 8.
#' * `DEF_BUF_SIZE`: The default buffer size, set to 16384.
#' * `DEF_MEM_LEVEL`: Default memory level, set to 8.
#' * `MAX_WBITS`: Maximum size of the history buffer, set to 15.
#' * `Z_BEST_COMPRESSION`: Best compression level, set to 9.
#' * `Z_BEST_SPEED`: Best speed for compression, set to 1.
#' * `Z_BLOCK`: Block compression mode, set to 5.
#' * `Z_DEFAULT_COMPRESSION`: Default compression level, set to -1.
#' * `Z_DEFAULT_STRATEGY`: Default compression strategy, set to 0.
#' * `Z_FILTERED`: Filtered compression mode, set to 1.
#' * `Z_FINISH`: Finish compression mode, set to 4.
#' * `Z_FULL_FLUSH`: Full flush mode, set to 3.
#' * `Z_HUFFMAN_ONLY`: Huffman-only compression mode, set to 2.
#' * `Z_NO_COMPRESSION`: No compression, set to 0.
#' * `Z_NO_FLUSH`: No flush mode, set to 0.
#' * `Z_PARTIAL_FLUSH`: Partial flush mode, set to 1.
#' * `Z_RLE`: Run-length encoding compression mode, set to 3.
#' * `Z_SYNC_FLUSH`: Synchronized flush mode, set to 2.
#' * `Z_TREES`: Tree block compression mode, set to 6.
#'
#' @description
#' What My Package Offers
#'
#' This package provides several key features:
#'
#' \describe{
#'   \item{\strong{Robustness:}}{Built to handle even corrupted or incomplete gzip data efficiently without causing system failures.}
#'   \item{Demonstration:}{
#'   \preformatted{
#'   compressed_data <- memCompress(charToRaw(paste0(rep("This is an example string. It contains more than just 'hello, world!'", 1000), collapse = ", ")))
#'   decompressor <- zlib$decompressobj(zlib$MAX_WBITS)
#'   rawToChar(c(decompressor$decompress(compressed_data[1:300]), decompressor$flush()))  # Still working
#'   }}
#'   \item{\strong{Compliance:}}{Strict adherence to the GZIP File Format Specification, ensuring compatibility across systems.}
#'   \item{Demonstration:}{
#'   \preformatted{
#'   compressor <- zlib$compressobj(zlib$Z_DEFAULT_COMPRESSION, zlib$DEFLATED, zlib$MAX_WBITS + 16)
#'   c(compressor$compress(charToRaw("Hello World")), compressor$flush())  # Correct 31 wbits (or custom wbits you provide)
#'   # [1] 1f 8b 08 00 00 00 00 00 00 03 f3 48 cd c9 c9 57 08 cf 2f ca 49 01 00 56 b1 17 4a 0b 00 00 00
#'   }}
#'   \item{\strong{Flexibility:}}{Ability to manage Gzip streams from REST APIs without the need for temporary files or other workarounds.}
#'   \item{Demonstration:}{
#'   \preformatted{
#'     # Byte-Range Request and decompression in chunks
#'
#'     # Initialize the decompressor
#'     decompressor <- zlib$decompressobj(zlib$MAX_WBITS + 16)
#'
#'     # Define the URL and initial byte ranges
#'     url <- "https://example.com/api/data.gz"
#'     range_start <- 0
#'     range_increment <- 5000  # Adjust based on desired chunk size
#'
#'     # Placeholder for the decompressed content
#'     decompressed_content <- character(0)
#'
#'     # Loop to make multiple requests and decompress chunk by chunk
#'     for (i in 1:5) {  # Adjust the loop count based on the number of chunks you want to retrieve
#'       range_end <- range_start + range_increment
#'
#'       # Make a byte-range request
#'       response <- httr::GET(url, httr::add_headers(`Range` = paste0("bytes=", range_start, "-", range_end)))
#'
#'       # Check if the request was successful
#'       if (httr::http_type(response) != "application/octet-stream" || httr::http_status(response)$category != "Success") {
#'         stop("Failed to retrieve data.")
#'       }
#'
#'       # Decompress the received chunk
#'       compressed_data <- httr::content(response, "raw")
#'       decompressed_chunk <- decompressor$decompress(compressed_data)
#'       decompressed_content <- c(decompressed_content, rawToChar(decompressed_chunk))
#'
#'       # Update the byte range for the next request
#'       range_start <- range_end + 1
#'     }
#'
#'     # Flush the decompressor after all chunks have been processed
#'     final_data <- decompressor$flush()
#'     decompressed_content <- c(decompressed_content, rawToChar(final_data))
#'   }}
#' }
#'
#' In summary, while R’s built-in methods could someday catch up in functionality, the zlib package for now fills an important gap by providing a more robust and flexible way to handle compression and decompression tasks.
#' @keywords internal
.onLoad <- function(libname, pkgname) {
  assign("zlib", publicEval({
    # constants
    list2env(zlib_constants(), envir=environment())
    compressobj <- compressobj
    decompressobj <- decompressobj
    compress <- compress
    decompress <- decompress
  }, name="zlib"), envir = getNamespace(pkgname))
}

#' Evaluate Expression with Public and Private Environments
#'
#' `publicEval` creates an environment hierarchy consisting of
#' public, self, and private environments. The expression `expr` is
#' evaluated within these nested environments, allowing for controlled
#' variable scope and encapsulation.
#'
#' @section Environments:
#' * Public: Variables in this environment are externally accessible.
#' * Self: Inherits from Public and also contains Private and Public as children.
#' * Private: Variables are encapsulated and are not externally accessible.
#'
#' @param expr An expression to evaluate within the constructed environment hierarchy.
#' @param parentEnv The parent environment for the new 'public' environment. Default is the parent frame.
#' @param name Optional name attribute to set for the public environment.
#'
#' @return Returns an invisible reference to the public environment.
#'
#' @usage publicEval(expr, parentEnv = parent.frame(), name = NULL)
#'
#' @keywords internal
#'
#' @examples
#' publicEnv <- publicEval({
#'   private$hidden_var <- "I am hidden"
#'   public_var <- "I am public"
#' }, parentEnv = parent.frame(), name = "MyEnvironment")
#'
#' print(exists("public_var", envir = publicEnv))  # Should return TRUE
#' print(exists("hidden_var", envir = publicEnv))  # Should return FALSE
#'
#' @rdname publicEval
#' @name publicEval
publicEval <- function(expr, parentEnv = parent.frame(), name=NULL){
  public <- new.env(parent = parentEnv)
  self <- new.env(parent = public)
  private <- new.env(parent = self)
  self$self <- self
  self$public <- public
  self$private <- private

  eval(substitute(expr), envir = self, enclos = .Primitive('baseenv')())

  object_names <- names(self)
  object_names <- object_names[!(object_names %in% c("public","private","self"))]

  if(length(object_names))
    invisible(
      mapply(assign, object_names, mget(object_names, self), list(public),
             SIMPLIFY = FALSE, USE.NAMES = FALSE) )

  if(!is.null(name)&&is.character(name)) attr(public, "name") <- name
  return(invisible(public))
}

#' Create a Compression Object
#'
#' `compressobj` initializes a new compression object with specified parameters
#' and methods. The function makes use of `publicEval` to manage scope and encapsulation.
#'
#' @section Methods:
#' * `compress(data)`: Compresses a chunk of data.
#' * `flush()`: Flushes the compression buffer.
#'
#' @param level Compression level, default is -1.
#' @param method Compression method, default is `zlib$DEFLATED`.
#' @param wbits Window bits, default is `zlib$MAX_WBITS`.
#' @param memLevel Memory level, default is `zlib$DEF_MEM_LEVEL`.
#' @param strategy Compression strategy, default is `zlib$Z_DEFAULT_STRATEGY`.
#' @param zdict Optional predefined compression dictionary as a raw vector.
#'
#' @return Returns an environment containing the public methods `compress` and `flush`.
#'
#' @usage compressobj(
#'              level = -1,
#'              method = zlib$DEFLATED,
#'              wbits = zlib$MAX_WBITS,
#'              memLevel = zlib$DEF_MEM_LEVEL,
#'              strategy = zlib$Z_DEFAULT_STRATEGY,
#'              zdict = NULL
#'          )
#'
#' @examples
#' compressor <- compressobj(level = 6)
#' compressed_data <- compressor$compress(charToRaw("some data"))
#' compressed_data <- c(compressed_data, compressor$flush())
#'
#' @rdname compressobj
#' @name compressobj
#' @export
compressobj <- function(level=-1, method=zlib$DEFLATED, wbits=zlib$MAX_WBITS, memLevel=zlib$DEF_MEM_LEVEL, strategy=zlib$Z_DEFAULT_STRATEGY, zdict=NULL){
  return(publicEval({
    private$pointer <- create_compressor(level = level, method = method, wbits = wbits, memLevel = memLevel, strategy = strategy, zdict=zdict)
    compress <- function(data){
      return(compress_chunk(private$pointer, data))
    }
    flush <- function(mode = zlib$Z_FINISH){
      return(flush_compressor_buffer(private$pointer, mode = mode))
    }
  }))
}

#' Create a new decompressor object
#'
#' Initializes a new decompressor object for zlib-based decompression.
#'
#' @section Methods:
#' * `decompress(data)`: Compresses a chunk of data.
#' * `flush()`: Flushes the compression buffer.
#'
#' @param wbits The window size bits parameter. Default is 0.
#' @return A decompressor object with methods for decompression.
#'
#' @details
#' The returned decompressor object has methods for performing chunk-wise
#' decompression on compressed data using the zlib library.
#'
#' @examples
#' compressor <- zlib$compressobj(zlib$Z_DEFAULT_COMPRESSION, zlib$DEFLATED, zlib$MAX_WBITS + 16)
#' compressed_data <- compressor$compress(charToRaw("some data"))
#' compressed_data <- c(compressed_data, compressor$flush())
#' decompressor <- decompressobj(zlib$MAX_WBITS + 16)
#' decompressed_data <- c(decompressor$decompress(compressed_data), decompressor$flush())
#'
#' @export
decompressobj <- function(wbits = 0) {
  return(publicEval({
    private$pointer <- create_decompressor(wbits = wbits)
    decompress <- function(data) {
      return(decompress_chunk(private$pointer, data))
    }
    flush <- function(length = 256L)  {
      return(flush_decompressor_buffer(private$pointer, length = length))
    }
  }))
}

#' Single-step compression of raw data
#'
#' Compresses the provided raw data in a single step.
#'
#' @param data Raw data to be compressed.
#' @param level Compression level, default is -1.
#' @param method Compression method, default is `zlib$DEFLATED`.
#' @param wbits Window bits, default is `zlib$MAX_WBITS`.
#' @param memLevel Memory level, default is `zlib$DEF_MEM_LEVEL`.
#' @param strategy Compression strategy, default is `zlib$Z_DEFAULT_STRATEGY`.
#' @param zdict Optional predefined compression dictionary as a raw vector.
#'
#' @return A raw vector containing the compressed data.
#'
#' @details
#' The `compress` function simplifies the compression process by encapsulating
#' the creation of a compression object, compressing the data, and flushing the buffer
#' all within a single call. This is particularly useful for scenarios where the user
#' wants to quickly compress data without dealing with the intricacies of compression
#' objects and buffer management. The function leverages the `compressobj` function
#' to handle the underlying compression mechanics.
#'
#' @examples
#' compressed_data <- compress(charToRaw("some data"))
#'
#' @export
compress <- function(data, level=-1, method=zlib$DEFLATED, wbits=zlib$MAX_WBITS, memLevel=zlib$DEF_MEM_LEVEL, strategy=zlib$Z_DEFAULT_STRATEGY, zdict=NULL) {
  compressor <- compressobj(level, method, wbits, memLevel, strategy, zdict)
  compressed_data <- compressor$compress(data)
  compressed_data <- c(compressed_data, compressor$flush())
  return(compressed_data)
}

#' Single-step decompression of raw data
#'
#' Decompresses the provided compressed raw data in a single step.
#'
#' @param data Compressed raw data to be decompressed.
#' @param wbits The window size bits parameter. Default is 0.
#'
#' @return A raw vector containing the decompressed data.
#'
#' @details
#' The `decompress` function offers a streamlined approach to decompressing
#' raw data. By abstracting the creation of a decompression object, decompressing
#' the data, and flushing the buffer into one function call, it provides a hassle-free
#' way to retrieve original data from its compressed form. This function is designed
#' to work seamlessly with data compressed using the `compress` function or
#' any other zlib-based compression method.
#'
#' @examples
#' original_data <- charToRaw("some data")
#' compressed_data <- compress(original_data)
#' decompressed_data <- decompress(compressed_data)
#'
#' @export
decompress <- function(data, wbits = 0) {
  decompressor <- decompressobj(wbits)
  decompressed_data <- decompressor$decompress(data)
  decompressed_data <- c(decompressed_data, decompressor$flush())
  return(decompressed_data)
}

Try the zlib package in your browser

Any scripts or data that you put into this service are public.

zlib documentation built on Oct. 19, 2023, 1:13 a.m.