# Generated by using Rcpp::compileAttributes() -> do not edit by hand
# Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393
int_create_cdx_from_warc <- function(warc_path, warc_record_types, field_spec, cdx_path) {
invisible(.Call('warc_int_create_cdx_from_warc', PACKAGE = 'warc', warc_path, warc_record_types, field_spec, cdx_path))
}
#' Open a gzip file for reading or writing
#'
#' @param path path to file
#' @param mode "\code{write}", "\code{append}" or "\code{read}"
#' @return handle to the file
#' @export
gz_open <- function(path, mode) {
.Call('warc_gz_open', PACKAGE = 'warc', path, mode)
}
#' Return the current raw compressed offset in the file
#'
#' @param gzfile file handle
#' @return offset position (integer)
#' @export
gz_offset <- function(gzfile) {
.Call('warc_gz_offset', PACKAGE = 'warc', gzfile)
}
#' Return the current raw uncompressedf offset in the file
#'
#' @param gzfile file handle
#' @return offset position (integer)
#' @export
gz_tell <- function(gzfile) {
.Call('warc_gz_tell', PACKAGE = 'warc', gzfile)
}
#' Sets the starting position for the next \code{gz_read()} or \code{gz_write()}
#'
#' @param gzfile file handle
#' @param offset represents a number of bytes in the compressed data stream
#' @param from either "\code{start}", "\code{end}" or "\code{current}"
#' @return \code{TRUE} if successful
#' @export
gz_fseek <- function(gzfile, offset, from) {
.Call('warc_gz_fseek', PACKAGE = 'warc', gzfile, offset, from)
}
#' Sets the starting position for the next \code{gz_read()} or \code{gz_write()}
#'
#' @param gzfile file handle
#' @param offset represents a number of bytes in the uncompressed data stream
#' @param from either "\code{start}" or "\code{current}"
#' @return the resulting offset location as measured in bytes from the beginning of the
#' uncompressed stream, or –1 in case of error, in particular if the file is opened
#' for writing and the new starting position would be before the current position.
#' @export
gz_seek <- function(gzfile, offset, from) {
.Call('warc_gz_seek', PACKAGE = 'warc', gzfile, offset, from)
}
#' Read from a gz file into a raw vector
#'
#' @param gzfile file handle
#' @param len number of of characters
#' @export
gz_read_raw <- function(gzfile, len) {
.Call('warc_gz_read_raw', PACKAGE = 'warc', gzfile, len)
}
#' Read from a gz file into a character vector
#'
#' @param gzfile file handle
#' @param len number of of characters
#' @export
gz_read_char <- function(gzfile, len) {
.Call('warc_gz_read_char', PACKAGE = 'warc', gzfile, len)
}
#' Test for end of file
#'
#' @export
#' @param gzfile file handle
gz_eof <- function(gzfile) {
.Call('warc_gz_eof', PACKAGE = 'warc', gzfile)
}
#' Read a line from a gz file
#'
#' @export
#' @param gzfile file handle
#' @note line buffer max is 8,192 characters. The intent of this function is to use it
#' on well-known formats.
gz_gets <- function(gzfile) {
.Call('warc_gz_gets', PACKAGE = 'warc', gzfile)
}
#' Read a line from a gz file
#'
#' @export
#' @param gzfile file handle
#' @note line buffer max is 8,192 characters. The intent of this function is to use it
#' on well-known formats.
gz_gets_raw <- function(gzfile) {
.Call('warc_gz_gets_raw', PACKAGE = 'warc', gzfile)
}
#' Write a raw vector to a gz file
#'
#' @param gzfile file handle
#' @param buffer raw vector to write
#' @export
gz_write_raw <- function(gzfile, buffer) {
invisible(.Call('warc_gz_write_raw', PACKAGE = 'warc', gzfile, buffer))
}
#' Write an atomic character vector to a file
#'
#' @param gzfile file handle
#' @param buffer atomic character vector
#' @export
gz_write_char <- function(gzfile, buffer) {
invisible(.Call('warc_gz_write_char', PACKAGE = 'warc', gzfile, buffer))
}
#' Flush currenzt gzip stream
#'
#' This will flush all zlib output buffers for the current file
#' and terminate the gzip stream. The next \code{gz_write()} will
#' start a new gzip stream.
#'
#' @param gzfile file handle
#' @export
gz_flush <- function(gzfile) {
invisible(.Call('warc_gz_flush', PACKAGE = 'warc', gzfile))
}
#' Close the gz file
#'
#' @param gzfile file handle
#' @note if you want to properly flush the buffers and correctly terminate a gzip stream
#' then you \emph{must} call \code{gz_flush()} before closing the file.
#' @export
gz_close <- function(gzfile) {
invisible(.Call('warc_gz_close', PACKAGE = 'warc', gzfile))
}
#' Inflate a gzip stream from a file
#'
#' Given a gzip file that was built with concatenated individual gzip streams,
#' this function will expand the contents of the stream into a \code{raw} vector
#' and return it.
#'
#' @param path path to gzip individual stream compressed WARC file
#' @param raw_stream_pos position in the raw file at \code{path} (not the "gzip
#' stream position")
#' @note Since this is working with compressed files, the memory size of the returned
#' value may be quite large.
#' @export
gzip_inflate_from_pos <- function(path, raw_stream_pos) {
.Call('warc_gzip_inflate_from_pos', PACKAGE = 'warc', path, raw_stream_pos)
}
gzuncompress <- function(r_source, r_guess_size) {
.Call('warc_gzuncompress', PACKAGE = 'warc', r_source, r_guess_size)
}
#' Find the first occurrence (if any) of a sequence of raw bytes
#' (\code{pattern}) in \code{buffer}.
#'
#' @param buffer vector to search in
#' @param pattern sequence of bytes to look for
#' @return index in \code{buffer} or \code{-1} if not found
#' @export
find_sequence <- function(buffer, pattern) {
.Call('warc_find_sequence', PACKAGE = 'warc', buffer, pattern)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.