Nothing
##***********************************************************************
## $Id: timeseries-io.R 159 2012-03-20 10:13:01Z mariotomo $
##
## this file is part of the R library delftfews. delftfews is free
## software: you can redistribute it and/or modify it under the terms
## of the GNU General Public License as published by the Free Software
## Foundation, either version 3 of the License, or (at your option)
## any later version.
##
## delftfews is distributed in the hope that it will be useful, but
## WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
## General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with the nens libraray. If not, see
## <http://www.gnu.org/licenses/>.
##
## Purpose : input-output functions relative to timeseries
##
## initial programmer : Mario Frasca
## contributors: Mario Frasca, Michèl van Leeuwen,
##
## initial date : 20091120
##
EPOCH <- as.POSIXct("1970-01-01 00:00:00", "%Y-%m-%d %H:%M:%S", tz="UTC")
require("XML")
require("logging")
saveTimestampedInput <- function(origin) {
## stores a copy of the input file.
## the target file is named as the input file itself, but is placed
## in a directory next to the input file and named as the current
## timestamp, cut to the whole hour.
parts <- strsplit(origin, "/", fixed=TRUE)[[1]]
dirName <- format(Sys.time(), "%Y%m%d%H00")
parts <- c(parts[1:(length(parts) - 1)], dirName, parts[length(parts)])
destination <- paste(parts, collapse="/")
dir.create(paste(parts[1:(length(parts) - 1)], collapse="/"), showWarnings=FALSE)
file.copy(from=origin, to=destination)
}
read.PI <- function(filename, step.seconds=NA, na.action=na.fill, parameterId, is.irregular=FALSE, filter.timestamp, skip.short.lived=NA, base) {
## creates a data.frame containing all data in matrix form.
isToBeFiltered <- !missing(filter.timestamp)
groupByStep <- function(seconds, values, step.seconds, flags=NA, missVal=NA, keepThese=TRUE) {
## groups the values by seconds, one each step.
##
## a value may be NA, or be flagged as NA (flag 9), or be equivalent
## to NA (equal to missVal)
## if more values are given in one step, the last is returned
if(!any(is.na(missVal))) # missing value was specified
values[values == missVal] <- NA
if(!any(is.na(flags))) # a flags array was provided
values[flags == 9] <- NA
if(length(!is.na(values)) > 0) {
## start counting steps from first second, not from 1970-01-01
seconds <- seconds - base
result <- aggregate(values[keepThese], by=list(ceiling(seconds[keepThese]/step.seconds)*step.seconds), function(x) tail(x, n=1))
colnames(result) <- c('s', "v")
result$s <- result$s + base
} else {
result <- structure(list(s = numeric(0), v = numeric(0)), .Names = c("s", "v"), class = "data.frame")
}
return (result)
}
## first scans document to find all necessary timestamps, then
## produces empty data.frame with all timestamps found, then adds
## columns corresponding to each series nodes.
## Read XML file
doc <- XmlDoc$new(filename)
if(exists("doSaveInput") && get("doSaveInput"))
saveTimestampedInput(filename)
## time offset in seconds from the timeZone element (which is in hours)
timeOffset <- as.double(doc$getText("/TimeSeries/timeZone")) * 60 * 60
## if missing as actual argument, base is the earliest timestamp in
## the whole file.
if(missing(base)) {
events <- doc$getAttribute(c("date", "time"), "/TimeSeries/series/event")
if(is.matrix(events))
base <- as.seconds(as.POSIXct(min(apply(events, 1, function(x) paste(x, collapse=" "))),
"%Y-%m-%d %H:%M:%S", tz='UTC'))
else
## well, if there are no events, then base is 1970-01-01
base <- 0
}
## we only operate on the "series" nodes.
if(missing(parameterId)) {
seriesNodes <- doc$.getNodeSet("/TimeSeries/series")
headerNodes <- doc$.getNodeSet("/TimeSeries/series/header")
} else {
seriesNodes <- doc$.getNodeSet(paste("/TimeSeries/series[", paste(sprintf("./header/parameterId='%s'", parameterId), collapse=" or "), "]", sep=""))
headerNodes <- doc$.getNodeSet(paste("/TimeSeries/series/header[", paste(sprintf("./parameterId='%s'", parameterId), collapse=" or "), "]", sep=""))
}
## reset the name of the elements in seriesNodes from the general
## "series" to "lp".location.parameter
lp <- sapply(c("locationId", "parameterId"),
function(name) sapply(headerNodes, function(el) xmlValue(xmlElementsByTagName(el, name)[[1]])),
simplify=FALSE)
names(seriesNodes) <- paste("lp", lp$locationId, lp$parameterId, sep=".")
## scan the document to find the first and the last (step-valid)
## timestamps. use them to initialize the result data.frame.
getElementSeconds <- function(node, tagname) {
## returns the timestamps in seconds since EPOCH for the given node
eventNodeList <- xmlElementsByTagName(node, tagname)
dates <- sapply(eventNodeList, xmlGetAttr, name = "date")
times <- sapply(eventNodeList, xmlGetAttr, name = "time")
timestamps <- as.POSIXct(paste(dates, times), "%Y-%m-%d %H:%M:%S", tz="UTC")
as.numeric(difftime(timestamps, EPOCH, tz="UTC"), units="secs")
}
## finally extract all the timestamped values and fill in the blanks
getValues <- function(node, as.zoo=FALSE) {
## get the values as a named column
header <- xmlElementsByTagName(node, "header")[[1]]
missVal <- xmlElementsByTagName(header, 'missVal')[[1]]
missVal <- as.numeric(xmlValue(missVal))
eventNodeList <- xmlElementsByTagName(node, "event")
dates <- sapply(eventNodeList, xmlGetAttr, name = "date")
times <- sapply(eventNodeList, xmlGetAttr, name = "time")
values <- as.numeric(sapply(eventNodeList, xmlGetAttr, name = "value"))
flags <- as.numeric(sapply(eventNodeList, xmlGetAttr, name = "flag"))
timestamps <- as.POSIXct(paste(dates, times), "%Y-%m-%d %H:%M:%S", tz="UTC")
seconds <- as.numeric(difftime(timestamps, EPOCH, tz="UTC"), units="secs")
if(!is.na(skip.short.lived)) {
grouped <- groupByStep(seconds, values, step.seconds, flags, missVal, c(diff(seconds) > skip.short.lived, TRUE))
} else {
grouped <- groupByStep(seconds, values, step.seconds, flags, missVal)
}
## 3106 - this is the place for filtering the data. we just read
## it, modified according to the settings, but we have not yet
## stored it in the partial result.
if(isToBeFiltered) {
old.length <- length(grouped$v)
if(old.length > 0) {
validTimestamps <- sapply(EPOCH + grouped$s - timeOffset, filter.timestamp)
logfinest("surviving timestamps: %s", validTimestamps)
grouped <- grouped[validTimestamps, ]
}
logdebug("filtering, %d/%d values survive", length(grouped$v), old.length)
}
if(as.zoo) {
result <- zoo(cbind(grouped$v), order.by=EPOCH + grouped$s - timeOffset)
} else {
column <- rep(NA, length(result.index))
column[as.seconds(result.index) %in% (grouped$s - timeOffset)] <- grouped$v
result <- na.action(column)
}
return(result)
}
seconds <- sapply(seriesNodes, getElementSeconds, tagname='event')
if(is.irregular) {
if (is.na(step.seconds)) { # here step.seconds means granularity and must be set.
## no granularity means keep data precise to the second.
step.seconds <- 1
}
result <- zoo(order.by=structure(numeric(0), class=c("POSIXct", "POSIXt")))
dim(result) <- c(0, 0)
for(name in names(seriesNodes)) {
logdebug("doing column '%s'", name)
item <- getValues(seriesNodes[[name]], as.zoo=TRUE)
if(nrow(item) == 0) {
item <- zoo(cbind(v=NA), order.by=index(result))
}
## TODO_01: this is a workaround on a zoo problem. only two
## lines ought to be needed and are clearly marked in this
## block.
if(dim(result)[1] == 0) {
cn <- c(colnames(result), name)
dim(result)[2] <- dim(result)[2] + 1
colnames(result) <- cn
if(dim(item)[1] != 0) {
result <- zoo(order.by=index(item))
for(n in cn)
result <- cbind(result, NA)
colnames(result) <- cn
result[, name] <- item
}
} else {
colnames(item) <- name # THIS LINE SURVIVES AFTER BUG SOLVING
if(dim(item)[1] == 0) {
actually <- item
item <- result[, 1]
faking <- TRUE
} else {
faking <- FALSE
}
result <- cbind(result, item) # THIS LINE SURVIVES AFTER BUG SOLVING
if(faking)
result[, ncol(result)] <- NA
}
## END_01
}
} else {
if(all(lapply(seconds, length) == 0)) {
## we have only series without events, so we make a fictive
## `seconds` list from startDate, endDate and `step.seconds`.
if (is.na(step.seconds))
stop("no events, we really need `step.seconds`.")
getHeaderSeconds <- function(node, tagname) {
headerNodeList <- xmlElementsByTagName(node, 'header')
startDate <- sapply(headerNodeList, getElementSeconds, tagname='startDate')
endDate <- sapply(headerNodeList, getElementSeconds, tagname='endDate')
seq(from=startDate, to=endDate, by=step.seconds)
}
seconds <- sapply(seriesNodes, getHeaderSeconds)
}
if (is.na(step.seconds)) {
step.seconds <- get.step(seconds)
}
startsAndEnds <- range(seconds)
first <- startsAndEnds[1]
last <- first + ceiling(diff(startsAndEnds) / step.seconds) * step.seconds
## result zoo is indexed on timestamps. you can retrieve them using the `index` function.
result.index <- EPOCH + seq(from=first, to=last, by=step.seconds) - timeOffset
if(isToBeFiltered)
result.index <- result.index[sapply(result.index, filter.timestamp)]
## column-bind the timestamps to the collected values. column names are taken from names(seriesNodes).
result <- zoo(cbind(mapply(getValues, seriesNodes)), order.by=result.index, frequency=1.0/step.seconds)
}
return(result)
}
write.PI <- function(data, data.description, filename, global.data)
## generic function, saves a timeseriesset to a file
UseMethod('write.PI')
write.PI.data.frame <- function(data, data.description, filename, global.data=NA) {
data <- zoo(data[-1], order.by=data$timestamps)
return(write.PI.zoo(data, data.description, filename, global.data=global.data))
}
write.PI.zoo <- function(data, data.description, filename, global.data=NA) {
## exports parts of the data data.frame to the XML filename
## data.description is a data.frame, looking like this:
##-----------------------------------------------------------
## type locationId parameterId missVal units column
##1 instantaneous 155 Ai2 -999.0 m^3/min result
##2 instantaneous 156 Ai3 -999.0 mmHg check
if(missing(data.description)) {
data.description <- data.frame(column=colnames(data),
type='instantaneous',
locationId=sapply(strsplit(colnames(data), '.', fixed=TRUE), function(x) x[2]),
parameterId=sapply(strsplit(colnames(data), '.', fixed=TRUE), function(x) paste(x[-(1:2)], collapse='.')),
units='-',
InfVal=-999,
stationName='-',
longName='-')
}
timeStep <- get.step(data)
looksLikeNULL <- function(object, name) {
if(!(name %in% names(object)))
return(TRUE)
if(is.na(object[[name]]))
return(FALSE)
if(is.null(object[[name]]))
return(TRUE)
if(object[[name]] == 'NULL')
return(TRUE)
return(FALSE)
}
CreateSeriesNode <- function(item) {
## gets a configuration item (item is a row of data.description)
## and returns the corresponding 'series' node, filled with
## 'event' nodes
## cut uninteresting columns
actualdata <- data.frame(seconds = as.seconds(index(data)))
actualdata$column <- as.vector(data[, item[['column']]])
## cut rows that generate no 'event' node.
if(looksLikeNULL(item, 'missVal')) {
actualdata <- subset(actualdata, !is.na(column))
}
if(looksLikeNULL(item, 'InfVal')) {
actualdata <- subset(actualdata, !is.infinite(column))
}
## what are the true start and end instants of this series?
if(nrow(actualdata) != 0) {
start <- EPOCH + min(actualdata$seconds)
end <- EPOCH + max(actualdata$seconds)
} else {
start <- end <- EPOCH
if('startDate' %in% names(item))
start <- EPOCH + as.numeric(item[['startDate']]) * 60
if('endDate' %in% names(item))
end <- EPOCH + as.numeric(item[['endDate']]) * 60
}
tsDate <- function(x) { format.POSIXct(x, format="%Y-%m-%d") }
tsTime <- function(x) { format.POSIXct(x, format="%H:%M:%S") }
CreateSeriesEventNode <- function(i) {
## gets a timestamped value and returns an xmlNode
## it is called by an `apply` by row, so gets arguments as strings.
ts <- EPOCH + as.numeric(i[1])
value <- as.numeric(i[2])
missingValueNode <- function(name) {
if(looksLikeNULL(item, name) || is.na(item[[name]]) || is.nan(item[[name]]))
## write 0 and flag the value to "9"
xmlNode(name = 'event', attrs=c(date=tsDate(ts), time=tsTime(ts), value=0, flag=9))
else
## use the given special missing value
xmlNode(name = 'event', attrs=c(date=tsDate(ts), time=tsTime(ts), value=item[[name]], flag=8))
}
if(is.infinite(value))
missingValueNode('InfVal')
else if(is.na(value))
missingValueNode('missVal')
else
xmlNode(name = 'event', attrs=c(date=tsDate(ts), time=tsTime(ts), value=value, flag=0))
}
## timeStep
## Time step for typical profile if variable to be defined for the historical event.
## Attributes:
## -unit: enumeration of: second, minute, hour, day, week, nonequidistant
## -multiplier: defines the number of units given above in a time step (not relevant for nonequidistant time steps)
## -divider: same function as the multiplier, but defines fraction of units in time step.
if(nrow(data) != nrow(actualdata)) # we removed rows
timeStepNode <- xmlNode('timeStep', attrs=c(unit="nonequidistant"))
else
timeStepNode <- xmlNode('timeStep', attrs=c(unit="second", multiplier=timeStep))
## NOTA BENE: elements in the header must respect the order
## specified in the xsd. FEWS will not read the file otherwise.
headerNode <- xmlNode('header',
xmlNode('type', item[['type']]),
xmlNode('locationId', item[['locationId']]),
xmlNode('parameterId', item[['parameterId']]),
timeStepNode,
xmlNode('startDate', attrs=c(date=tsDate(start), time=tsTime(start))),
xmlNode('endDate', attrs=c(date=tsDate(end), time=tsTime(end)))
)
if(looksLikeNULL(item, 'missVal') && !looksLikeNULL(item, 'InfVal') && !is.na(item['InfVal']))
headerNode <- addChildren(headerNode, kids=list(xmlNode('missVal', item[['InfVal']])))
for (name in c('missVal', 'longName','stationName', 'units'))
if (!looksLikeNULL(item, name) & !is.na(item[name]))
headerNode <- addChildren(headerNode, kids=list(xmlNode(name, item[[name]])))
for (name in colnames(global.data))
if (!is.na(global.data[1, name]) & !sum(grep('\\.', name)))
headerNode <- addChildren(headerNode, kids=list(xmlNode(name, global.data[1, name])))
seriesNode <- xmlNode('series', headerNode)
seriesNode <- addChildren(seriesNode, kids=apply(actualdata, 1, CreateSeriesEventNode))
return(seriesNode)
}
TimeSeriesNode <- xmlNode('TimeSeries',
attrs = c(
'xmlns' = "http://www.wldelft.nl/fews/PI",
'xmlns:xsi' = "http://www.w3.org/2001/XMLSchema-instance",
'xsi:schemaLocation' = "http://www.wldelft.nl/fews/PI http://fews.wldelft.nl/schemas/version1.0/pi-schemas/pi_timeseries.xsd",
'version' = "1.2"),
xmlNode('timeZone', list("0.0")))
if(any(!(data.description$column %in% names(data)))) {
logwarn("removing data.descriptions rows not available in the data")
data.description <- subset(data.description, column %in% names(data))
}
TimeSeriesNode <- addChildren(TimeSeriesNode, kids=apply(data.description, 1, CreateSeriesNode))
saveXML(TimeSeriesNode, file=filename, prefix = '<?xml version="1.0" encoding="UTF-8"?>\n')
}
read.BfG <- function(filename, column="value") {
## creates a data.frame containing all data in matrix form.
## Read data file
Q <- read.table(filename, skip = 4)
## assign names to columns
names(Q) <- c("year", "month", "day", "hour", column)
dates <- paste(Q$year, sprintf("%02.0f",Q$month), sprintf("%02.0f",Q$day), sep = "-")
times <- paste(sprintf("%02.0f", Q$hour), "00", "00", sep = ":")
## create the columns expected in our format
Q$timestamps <- as.POSIXct(paste(dates, times), "%Y-%m-%d %H:%M:%S", tz="UTC")
## remove the original first four columns, set 'timestamps' as the first column.
return(Q[column], Q$timestamps)
}
splitToNumeric <- function(x) {
## translates a string configuration submatrix to numeric
## not exported, tested
ow <- options("warn")
options(warn=-1)
result <- apply(x[,2:ncol(x), drop=FALSE], 1:2, as.numeric)
options(ow)
if (any(is.na(result)))
return(as.data.frame(x, stringsAsFactors=FALSE))
rownames(result) <- x[,1]
result <- as.data.frame(result, stringsAsFactors=FALSE)
names(attributes(result)$row.names) <- NULL
return(result)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.