Nothing
########################################################################/**
# @RdocDefault readTable
#
# @title "Reads a file in table format"
#
# @synopsis
#
# \description{
# @get "title" and creates a data frame from it, with cases corresponding
# to lines and variables to fields in the file.
#
# \emph{WARNING: This method is very much in an alpha stage.
# Expect it to change.}
#
# This method is an extension to the default @see "utils::read.table"
# function in \R. It is possible to specify a column name to column class
# map such that the column classes are automatically assigned from the
# column header in the file.
#
# In addition, it is possible to read any subset of rows.
# The method is optimized such that only columns and rows that are of
# interest are parsed and read into \R's memory. This minimizes memory
# usage at the same time as it speeds up the reading.
# }
#
# \arguments{
# \item{file}{A @connection or a filename. If a filename, the path
# specified by \code{path} is added to the front of the
# filename. Unopened files are opened and closed at the end.}
# \item{colClasses}{Either a named or an unnamed @character @vector.
# If unnamed, it specified the column classes just as used by
# @see "utils::read.table".
# If it is a named vector, \code{names(colClasses)} are used to match
# the column names read (this requires that \code{header=TRUE}) and
# the column classes are set to the corresponding values.
# }
# \item{isPatterns}{If @TRUE, the matching of \code{names(colClasses)} to
# the read column names is done by regular expressions matching.}
# \item{defColClass}{If the column class map specified by a named
# \code{colClasses} argument does not match some of the read column
# names, the column class is by default set to this class. The
# default is to read the columns in an "as is" way.}
# \item{header}{If @TRUE, column names are read from the file.}
# \item{skip}{The number of lines (commented or non-commented) to skip
# before trying to read the header or alternatively the data table.}
# \item{nrows}{The number of rows to read of the data table.
# Ignored if \code{rows} is specified.}
# \item{rows}{An row index @vector specifying which rows of the table
# to read, e.g. row one is the row following the header.
# Non-existing rows are ignored. Note that rows are returned in
# the same order they are requested and duplicated rows are also
# returned.}
# \item{col.names}{Same as in \code{read.table()}.}
# \item{check.names}{Same as in \code{read.table()}, but default value
# is @FALSE here.}
# \item{path}{If \code{file} is a filename, this path is added to it,
# otherwise ignored.}
# \item{...}{Arguments passed to @see "utils::read.table" used internally.}
# \item{stripQuotes}{If @TRUE, quotes are stripped from values before
# being parse.
# This argument is only effective when \code{method=="readLines"}.
# }
# \item{method}{If \code{"readLines"}, \code{(readLines())} is used
# internally to first only read rows of interest, which is then
# passed to \code{read.table()}.
# If \code{"intervals"}, contiguous intervals are first identified in
# the rows of interest. These intervals are the read one by one
# using \code{read.table()}.
# The latter methods is faster and especially more memory efficient
# if the intervals are not too many, where as the former is preferred
# if many "scattered" rows are to be read.}
# \item{verbose}{A @logical or a @see "Verbose" object.}
# }
#
# \value{
# Returns a @data.frame.
# }
#
# @author
#
# \seealso{
# @see "readTableIndex".
# @see "utils::read.table".
# @see "colClasses".
# }
#
# @keyword IO
#*/#########################################################################
setMethodS3("readTable", "default", function(file, colClasses=NULL, isPatterns=FALSE, defColClass=NA, header=FALSE, skip=0, nrows=-1, rows=NULL, col.names=NULL, check.names=FALSE, path=NULL, ..., stripQuotes=TRUE, method=c("readLines", "intervals"), verbose=FALSE) {
# Argument 'file' and 'path':
if (inherits(file, "connection")) {
} else if (is.character(file)) {
pathname <- Arguments$getReadablePathname(file, path=path, mustExist=TRUE)
file <- file(pathname)
} else {
throw("Unknown data type of argument 'file': ", mode(file))
}
# Argument 'colClasses':
# colClasses <- Arguments$getCharacters(colClasses)
# Argument 'isPatterns':
isPatterns <- Arguments$getLogical(isPatterns)
# Argument 'defColClass':
defColClass <- Arguments$getCharacter(defColClass, asGString=FALSE)
# Argument 'skip':
skip <- Arguments$getInteger(skip, range=c(0,Inf))
# Argument 'nrows':
nrows <- Arguments$getInteger(nrows)
# Argument 'rows':
if (!is.null(rows))
rows <- Arguments$getIntegers(rows, range=c(1,Inf))
# Argument 'col.names':
if (!is.null(col.names))
col.names <- Arguments$getCharacters(col.names)
# Argument 'stripQuotes':
stripQuotes <- Arguments$getLogical(stripQuotes)
# Argument 'method':
method <- match.arg(method)
# Argument 'verbose':
verbose <- Arguments$getVerbose(verbose)
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# If file is not open, open it and close it when done.
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
if (!isOpen(file)) {
open(file, open="r")
on.exit(close(file), add=TRUE)
}
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Skip lines at the beginning?
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
if (skip > 0) {
readLines(file, n=skip)
verbose && cat(verbose, "Skipped the first ", skip, " lines.")
skip <- 0
}
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Get the formals of read.table()
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
formals <- formals(read.table)
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Read the header
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
if (header) {
sep <- list(...)$sep
if (is.null(sep))
sep <- formals$sep
quote <- list(...)$quote
if (is.null(quote))
quote <- formals$quote
colnames <- scan(file=file, what=character(0), sep=sep, quote=quote, nlines=1, quiet=TRUE)
# colnames <- readLines(file, n=1)
# colnames <- unlist(strsplit(colnames, split=split))
colnames <- trim(colnames)
# if (!is.null(quote) && nchar(quote) > 0) {
# }
names <- paste("'", colnames, "'", sep="")
verbose && cat(verbose, "Read ", length(colnames), " column names: ",
paste(names, collapse=", "))
}
if (!is.null(col.names))
colnames <- col.names
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Create colClasses?
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
isMap <- !is.null(names(colClasses))
if (!is.null(colClasses) && isMap) {
# Should colClasses be found using regular expression
# patterns or as is?
if (isPatterns) {
colClasses2 <- rep(NA_character_, times=length(colnames))
for (kk in seq_along(colClasses)) {
pattern <- names(colClasses)[kk]
colClass <- colClasses[kk]
# Find matching column names and assign the current column
# class to those columns.
incl <- (regexpr(pattern, colnames) != -1)
colClasses2[incl] <- colClass
}
colClasses <- colClasses2
} else {
colClasses <- colClasses[colnames]
}
colClasses[is.na(colClasses)] <- defColClass
verbose && cat(verbose, "Column classes: ")
verbose && print(verbose, colClasses)
}
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Read full data table?
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
if (is.null(rows)) {
verbose && enter(verbose, "Reading the complete data table")
df <- read.table(file, colClasses=colClasses, header=FALSE,
skip=0, nrows=nrows, check.names=check.names,
col.names=colnames, ...)
verbose && str(verbose, df)
verbose && exit(verbose)
# Return table
return(df)
}
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Read only certain rows?
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
if (method == "readLines") {
# Skip certain lines
if (skip > 0) {
readLines(file, n=skip)
verbose && cat(verbose, "Skipped the first ", skip, " lines.")
}
# Read all lines
verbose && enter(verbose, "Reading lines of interest")
t <- system.time({
lines <- readLines(file, n=max(rows))
}, gcFirst = FALSE)
verbose && printf(verbose, "Read %d lines in %.2f seconds.\n", length(lines), t[3])
# Did we try to read non-existing rows?
keep <- (rows <= length(lines))
rows <- rows[keep]
if (verbose && any(!keep)) {
verbose && cat(verbose, "Skipped ", sum(!keep), " non-existing rows.")
}
keep <- NULL; # Not needed anymore
# Keep only those of interest
lines <- lines[rows]
verbose && cat(verbose, "Kept ", length(lines), " lines.")
verbose && exit(verbose)
if (stripQuotes) {
quote <- list(...)$quote
if (is.null(quote))
quote <- formals$quote
if (nchar(quote) > 0) {
verbose && enter(verbose, "Stripping quotes from all lines: ", quote)
quotes <- unlist(strsplit(quote, split=""))
for (kk in seq_along(quotes))
lines <- gsub(quotes[kk], "", lines, fixed=TRUE)
verbose && exit(verbose)
}
}
verbose && enter(verbose, "Re-reading the lines as a data table")
con <- textConnection(lines)
tryCatch({
t <- system.time({
df <- read.table(con, colClasses=colClasses, header=FALSE,
skip=skip, nrows=nrows, check.names=check.names,
col.names=colnames, ...)
}, gcFirst = FALSE)
verbose && printf(verbose, "Read a %dx%d table in %.2f seconds.\n", nrow(df), ncol(df), t[3])
}, finally = {
close(con)
})
lines <- NULL; # Not needed anymore
verbose && exit(verbose)
} else if (method == "intervals") {
remap <- TRUE
rows2 <- unique(rows)
if (identical(rows, rows2)) {
rows2 <- sort(rows2)
if (identical(rows, rows2))
remap <- FALSE
}
# Get contiguous intervals of rows indices.
intervals <- seqToIntervals(rows2)
rows2 <- NULL; # Not needed anymore
verbose && cat(verbose, "Reading row intervals: ")
verbose && print(verbose, intervals)
nextRow <- 1
df <- NULL
rownames <- NULL
ready <- FALSE
for (ii in seq_len(nrow(intervals))) {
from <- intervals[ii,"from"]
to <- intervals[ii,"to"]
verbose && cat(verbose, "Interval [", from, ",", to, "]")
# Skip to the next row
skip <- (from - nextRow)
# Read 'nrows' from there on.
nrows <- (to-from+1)
tryCatch({
dfI <- read.table(file, colClasses=colClasses, header=FALSE,
skip=skip, nrows=nrows, check.names=check.names,
col.names=colnames, ...)
}, error = function(ex) {
# Ignore non-existing rows => we're done.
ready <<- (regexpr("no lines available", ex$message) != -1)
if (!ready)
signalCondition(ex)
})
if (ready)
break
# Did we read that many rows?
to <- min(to, from+nrow(dfI)-1)
# Assign rows names
rownames(dfI) <- from:to
rownames <- c(rownames, from:to)
if (is.null(df)) {
df <- dfI
} else {
df <- rbind(df, dfI)
}
dfI <- NULL; # Not needed anymore
nextRow <- to+1
}
# Finally, if 'rows' where not an order sets of unique row numbers,
# return a table with rows in the same order as the requested ones.
if (remap) {
idx <- match(rows, rownames)
idx <- idx[!is.na(idx)]
df <- df[idx,]
rownames <- rownames[idx]
idx <- NULL; # Not needed anymore
}
}
verbose && str(verbose, df)
# Return table
df
})
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.