Nothing
# Copyright 2011-2012, 2014 Jan van der Laan
#
# This file is part of LaF.
#
# LaF is free software: you can redistribute it and/or modify it under the terms
# of the GNU General Public License as published by the Free Software
# Foundation, either version 3 of the License, or (at your option) any later
# version.
#
# LaF is distributed in the hope that it will be useful, but WITHOUT ANY
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
# A PARTICULAR PURPOSE. See the GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License along with
# LaF. If not, see <http://www.gnu.org/licenses/>.
#' Create a connection to a comma separated value (CSV) file.
#'
#' A connection to the file filename is created. Column types have to be
#' specified. These are not determined automatically as for example read.csv
#' does. This has been done to increase speed.
#'
#' After the connection is created data can be extracted using indexing (as in a
#' normal data.frame) or methods such as \code{\link{read_lines}} and
#' \code{\link{next_block}} can be used to read in blocks. For processing the
#' file in blocks the convenience function \code{\link{process_blocks}} can be
#' used.
#'
#' @param filename character containing the filename of the CSV-file
#' @param column_types character vector containing the types of data in each of
#' the columns. Valid types are: double, integer, categorical and string.
#' @param column_names optional character vector containing the names of the
#' columns.
#' @param sep optional character specifying the field separator used in the
#' file.
#' @param dec optional character specifying the decimal mark.
#' @param trim optional logical specifying whether or not white space at the end
#' of factor levels or character strings should be trimmed.
#' @param skip optional numeric specifying the number of lines at the beginning
#' of the file that should be skipped.
#' @param ignore_failed_conversion ignore (set to \code{NA}) fields that could
#' not be converted.
#'
#' @details
#' The CSV-file should not contain headers. Use the \code{skip} option to skip
#' any headers.
#'
#' @return
#' Object of type \code{\linkS4class{laf}}. Values can be extracted from this
#' object using indexing, and methods such as \code{\link{read_lines}},
#' \code{\link{next_block}}.
#'
#' @seealso
#' See \code{\link{read.csv}} for conventional access of CSV files. And
#' \code{\link{detect_dm_csv}} to automatically determine the column types.
#'
#' @examples
#' # Create temporary filename
#' tmpcsv <- tempfile(fileext="csv")
#'
#' # Generate test data
#' ntest <- 10
#' column_types <- c("integer", "integer", "double", "string")
#' testdata <- data.frame(
#' a = 1:ntest,
#' b = sample(1:2, ntest, replace=TRUE),
#' c = round(runif(ntest), 13),
#' d = sample(c("jan", "pier", "tjores", "corneel"), ntest, replace=TRUE)
#' )
#' # Write test data to csv file
#' write.table(testdata, file=tmpcsv, row.names=FALSE, col.names=FALSE, sep=',')
#'
#' # Create LaF-object
#' laf <- laf_open_csv(tmpcsv, column_types=column_types)
#'
#' # Read from file using indexing
#' first_column <- laf[ , 1]
#' first_row <- laf[1, ]
#'
#' # Read from file using blockwise operators
#' begin(laf)
#' first_block <- next_block(laf, nrows=2)
#' second_block <- next_block(laf, nrows=2)
#'
#' # Cleanup
#' file.remove(tmpcsv)
#'
#' @export
laf_open_csv <-function(filename, column_types,
column_names = paste("V", seq_len(length(column_types)), sep=""),
sep = ",", dec = '.', trim = FALSE, skip = 0,
ignore_failed_conversion = FALSE) {
# check filename
if (!is.character(filename))
stop("filename should be of type character.")
filename <- path.expand(filename[1])
if (!.file_readable(filename))
stop("Can not access file '", filename, "'.")
# check column_types
types <- .laf_to_typecode(column_types)
# check column_names
if (!is.character(column_names))
stop("column_names should be of type character.")
if (length(column_names) != length(column_types))
stop("Lengths of column_names and column_types do not match.")
column_names <- make.names(column_names, unique=TRUE)
# check sep
if (!is.character(sep))
stop("sep should be of type character")
sep <- sep[1]
if (nchar(sep) != 1)
stop("The number of characters in sep is not equal to one.")
# check dec
if (!is.character(dec))
stop("dec should be of type character")
dec <- dec[1]
if (nchar(dec) != 1)
stop("The number of characters in dec is not equal to one.")
# check trim
if (!is.logical(trim))
stop("trim should be of type logical")
trim <- trim[1]
# check skip
if (!is.numeric(skip))
stop("skip should be of type numeric")
skip <- as.integer(skip[1])
# check ignore_failed_conversion
if (!is.logical(ignore_failed_conversion))
stop("ignore_failed_conversion should be of type logical")
ignore_failed_conversion <- ignore_failed_conversion[1]
# open file
p <- .Call("laf_open_csv", PACKAGE="LaF", filename, types, sep, dec,
trim, skip, ignore_failed_conversion)
# create laf-object
result <- new(Class="laf",
file_id = as.integer(p),
filename = filename,
file_type = "csv",
column_types = types,
column_names = column_names,
column_widths = integer(0),
options = list(
sep=sep,
dec=dec,
skip=skip,
trim=trim)
)
return(result)
}
#' Create a connection to a fixed width file.
#'
#' A connection to the file filename is created. Column types have to be
#' specified. These are not determined automatically as for example
#' read.fwf does. This has been done to increase speed.
#'
#' After the connection is created data can be extracted using indexing (as in a
#' normal data.frame) or methods such as read_lines and next_block can be used
#' to read in blocks. For processing the file in blocks the (faster) convenience
#' function process_blocks can be used.
#'
#' @param filename character containing the filename of the CSV-file.
#' @param column_types character vector containing the types of data in each of
#' the columns. Valid types are: double, integer, categorical and string.
#' @param column_widths numeric vector containing the width in number of character
#' of each of the columns.
#' @param column_names optional character vector containing the names of the
#' columns.
#' @param dec optional character specifying the decimal mark.
#' @param trim optional logical specifying whether or not whitespace at the end
#' of factor levels or character strings should be trimmed.
#' @param ignore_failed_conversion ignore (set to \code{NA}) fields that could
#' not be converted.
#'
#' @details
#' Only use \code{ignore_failed_conversion } when you are sure that the column
#' specification is correct. Otherwise, this option can hide an incorrect
#' specification.
#'
#' @return
#' Object of type \code{\linkS4class{laf}}. Values can be extracted from this object
#' using indexing, and methods such as \code{\link{read_lines}}, \code{\link{next_block}}.
#'
#' @seealso
#' See \code{\link{read.fwf}} for conventional access of fixed width files.
#'
#' @export
laf_open_fwf <-function(filename, column_types, column_widths,
column_names = paste("V", seq_len(length(column_types)), sep=""),
dec = ".", trim = TRUE, ignore_failed_conversion = FALSE) {
# check filename
if (!is.character(filename))
stop("filename should be of type character.")
filename <- path.expand(filename[1])
if (!.file_readable(filename))
stop("Can not access file '", filename, "'.")
# check column_types
types <- .laf_to_typecode(column_types)
# check column widths
if (!is.numeric(column_widths))
stop("column_widths should be of type numeric.")
if (length(column_widths) != length(column_types))
stop("Lengths of column_widths and column_types do not match.")
column_widths <- as.integer(column_widths)
# check column_names
if (!is.character(column_names))
stop("column_names should be of type character.")
if (length(column_names) != length(column_types))
stop("Lengths of column_names and column_types do not match.")
column_names <- make.names(column_names, unique=TRUE)
# check dec
if (!is.character(dec))
stop("dec should be of type character")
dec <- dec[1]
if (nchar(dec) != 1)
stop("The number of characters in dec is not equal to one.")
# check trim
if (!is.logical(trim))
stop("trim should be of type logical")
trim <- trim[1]
# check ignore_failed_conversion
if (!is.logical(ignore_failed_conversion))
stop("ignore_failed_conversion should be of type logical")
ignore_failed_conversion <- ignore_failed_conversion[1]
# open file
p <- .Call("laf_open_fwf", PACKAGE="LaF", filename, types, column_widths,
dec, trim, ignore_failed_conversion)
# create laf-object
result <- new(Class="laf",
file_id = as.integer(p),
filename = filename,
file_type = "fwf",
column_types = types,
column_names = column_names,
column_widths = column_widths,
options = list(
dec=dec,
trim=trim)
)
return(result)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.