tests/TabularTextFileSet.R

source("incl/start.R")

message("*** TabularTextFileSet ...")

# Setup a file set consisting of all *.dat tab-delimited files
# in a particular directory
pathA <- system.file("exData", "dataSetA,original", package="R.filesets")
ds <- TabularTextFileSet$byPath(pathA, pattern="[.]dat$")
print(ds)


# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Extract one column with a particular name (one per file)
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Read column 'y' and a subset of the rows from each of the
# tab-delimited files and combine into a matrix
rows <- c(3:5, 8, 2)
data <- extractMatrix(ds, column="y", colClasses="integer", rows=rows, drop=TRUE)
print(data)


# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Read data frames from each of the files
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
dataList <- lapply(ds, FUN=readDataFrame)
print(dataList)

rows <- c(3:5, 8, 2)
dataList <- lapply(ds, FUN=readDataFrame, rows=rows)
print(dataList)



# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Read common columns and stack into one data frame
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
colNames <- Reduce(intersect, lapply(ds, getColumnNames))
cat("Common column names:\n")
print(colNames)

# Read the *common* columns "as is" (hence 'NA')
colClasses <- rep(NA, times=length(colNames))
names(colClasses) <- colNames
cat("Column class patterns:\n")
print(colClasses)

data <- readDataFrame(ds, colClasses=colClasses, verbose=TRUE)
print(data)


# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Translate column names on the fly
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
lapply(ds, FUN=setColumnNamesTranslator, function(names, ...) toupper(names))
data <- readDataFrame(ds, colClasses=c("(X|Y)"="integer", "CHAR"="character"))
print(data)



# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# ADVANCED: Translation of fullnames
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
message("- Translation of fullnames")

## Extra sanity checks to troubleshoot stall on CRAN MS Windows servers
path <- getPath(ds)
cat(sprintf("Data set path: %s\n", sQuote(path)))
stopifnot(length(path) > 0)
pattern <- ",fullnames[.]txt$"
cat(sprintf("Pattern: %s\n", sQuote(path)))
files <- dir(pattern = pattern, path = path, full.names = TRUE, all.files = TRUE)
cat(sprintf("Data set files [n = %d]:\n", length(files)))
print(files)
stopifnot(length(files) > 0)

fnts <- TabularTextFileSet$byPath(path, pattern = pattern)
print(fnts)
str(as.list(fnts))

cat("Data set before applying fullname translator:\n")
print(ds)
appendFullNamesTranslator(ds, as.list(fnts))

cat("Data set after applying fullname translator:\n")
print(ds)

cat("Default fullnames:\n")
print(head(getFullNames(ds, translate=FALSE)))
cat("Translated fullnames:\n")
print(head(getFullNames(ds)))

cat("Default fullnames:\n")
print(getFullNames(ds, translate=FALSE))
cat("Translated fullnames:\n")
print(getFullNames(ds))

message("*** TabularTextFileSet ... DONE")

source("incl/end.R")
HenrikBengtsson/R.filesets documentation built on Jan. 31, 2024, 8:11 a.m.