## metaclipR.DatasetSubset Construct a directed graph for DatasetSubset metadata encoding
##
## Copyright (C) 2017 Predictia (http://www.predictia.es)
##
## This program is free software: you can redistribute it and/or modify
## it under the terms of the GNU General Public License as published by
## the Free Software Foundation, either version 3 of the License, or
## (at your option) any later version.
##
## This program is distributed in the hope that it will be useful,
## but WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
## GNU General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with this program. If not, see <http://www.gnu.org/licenses/>.
#' @title Directed metadata graph construction for DatasetSubsets
#' @description Build a directed metadata graph from climate4R grids that are subsets of Datasets
#' @param package package
#' @param version version
#' @param graph A previously initialized metaclipR output list ($graph + $parentnodename). Default to \code{NULL}, which starts a new empty graph
#' @param fun function name (default, and only value accepted is \code{"subsetGrid"})
#' @param arg.list arg.list. See details
#' @param output A climate4R grid, resulting from the application of \code{\link[transformeR]{subsetGrid}}.
#' @param disable.command Better not to touch. For internal usage only (used to re-use most of the code in other functions, but skipping command tracking)
#' @details This function takes as reference the semantics defined in the Data Source and Transformation ontology
#' defined in the Metaclip Framework (\url{http://metaclip.predictia.es/}).
#'
#' @return A named list with the updated graph in element \code{"graph"} and the parent node name,
#' sometimes needed for linking subsequent operations.
#'
#' \strong{Argument list}
#'
#' A list of arguments passed to \code{\link{subsetGrid}} is required. The different
#' arguments are explained in the the help menu of \code{subsetGrid}.
#' The following list need to be specified. The default values are also indicated:
#'
#' \itemize{
#' \item \code{var = NULL}
#' \item \code{lonLim = NULL}
#' \item \code{latLim = NULL}
#' \item \code{members = NULL}
#' \item \code{years = NULL}
#' \item \code{outside = FALSE}
#' \item \code{drop = TRUE}
#' }
#'
#' @references
#' \href{https://docs.google.com/presentation/d/1CQyxVIj501N7VylMR9i_T_XwFDId6MDNvpPnuaLXgnI/present}{Visual schema of the data transformation ontology}
#'
#' \href{http://www.meteo.unican.es/en/climate4r}{Climate4R page at University of Cantabria}
#' @export
#' @importFrom igraph make_empty_graph add_vertices add_edges
#' @importFrom transformeR getShape getSeason getRefDates getGrid
#' @importFrom utils tail
#' @author D. San MartÃn, J. Bedia
#' @family subsetting
#' @examples
#' data(S4_tas_iberia)
#' require(transformeR)
#' require(igraph)
#' # An example subset from S4_tas_iberia
#' arg.list <- list(grid = S4_tas_iberia,
#' members = 1:3, # First three members (out of 15)
#' season = NULL,
#' years = 1991:1995, # Period 1991-1995
#' lonLim = c(-5,3), # longitudinal extent
#' latLim = c(37,43),
#' drop = FALSE) # latitudinal extent
#' # Dataset subset is applied using subsetGrid:
#' dataset.subset <- do.call("subsetGrid", args = arg.list)
#' # Just for illustration, visualization of the climatology:
#' plotClimatology(climatology(dataset.subset, by.member = FALSE),
#' backdrop.theme = "countries")
#' # Encoding provenance:
#' dsgraph <- metaclipR.DatasetSubset(package = "transformeR",
#' version = "1.1.1",
#' fun = "subsetGrid",
#' arg.list = arg.list,
#' output = "dataset.subset")
#' # The Graph containing the metadata has been created.
#' # This is a bit congested, but note that it is not conceived to be visualized in R
#' plot(dsgraph$graph, vertex.size = 5, edge.arrow.size=0.1)
metaclipR.DatasetSubset <- function(package = "transformeR",
version = "1.3.2",
graph = NULL,
fun = NULL,
arg.list = NULL,
output = NULL,
disable.command = FALSE) {
# First, an empty graph is created
emptygraph <- TRUE
if (is.null(graph)) {
graph <- make_empty_graph(directed = TRUE)
} else {
parent.node <- graph$parentnodename
graph <- graph$graph
if (class(graph) != "igraph") {
stop("The input graph has not a valid format")
}
emptygraph <- FALSE
}
if (is.character(output)) output <- get(output)
output.shape <- getShape(output)
output$Data <- NULL
# dataset <- attr(output, "dataset")
# DSname <- paste(dataset, "subset", sep = ".")
DatasetSubset.nodename <- paste0("DatasetSubset.", randomName())
graph <- add_vertices(graph,
nv = 1,
name = DatasetSubset.nodename,
label = "DatasetSubset",
className = "ds:DatasetSubset")
# Link with parent node if existing
if (!emptygraph) {
graph <- add_edges(graph,
c(getNodeIndexbyName(graph, parent.node),
getNodeIndexbyName(graph, DatasetSubset.nodename)),
label = paste0("ds:hadDatasetSubset"))
}
# Variable ---------------------
var.name <- output$Variable$varName
units <- attr(output$Variable, "units")
vlevel <- ifelse(is.null(output$Variable$level), -9999, output$Variable$level)
grd <- getGrid(output)
var.nodename <- paste("Variable", randomName(), sep = ".")
graph <- add_vertices(graph,
nv = 1,
name = var.nodename,
label = var.name,
className = "ds:Variable",
description = "Encode metadata of a Variable",
attr = list("ds:withUnits" = units,
"ds:hasVerticalLevel" = vlevel,
"ds:hasHorizontalResX" = attr(grd, "resX"),
"ds:hasHorizontalResY" = attr(grd, "resY")))
graph <- add_edges(graph,
c(getNodeIndexbyName(graph, DatasetSubset.nodename),
getNodeIndexbyName(graph, var.nodename)),
label = "ds:hasVariable")
# TemporalResolution ---------------------
# hasTimeStep
time.step <- getHasTimeStep(output)
# hasCellMethod - Recovers the cell method from the last aggregation level
ind <- tail(grep("cellfun", names(attributes(output$Variable))), 1)
cell.method <- if (!is.null(ind)) {
attributes(output$Variable)[[ind]]
} else {
"undefined"
}
timeres.nodename <- paste("TemporalResolution", randomName(), sep = ".")
graph <- add_vertices(graph,
nv = 1,
name = timeres.nodename,
label = "TemporalResolution",
className = "ds:TemporalResolution",
description = "Encode metadata of temporal resolution",
attr = list("ds:hasTimeStep" = time.step,
"ds:hasCellMethod" = cell.method))
graph <- add_edges(graph,
c(getNodeIndexbyName(graph, DatasetSubset.nodename),
getNodeIndexbyName(graph, timeres.nodename)),
label = "ds:hasTemporalResolution")
# VariableStandardDefinition --------------------------------
# This is controlled by the dictionary
if (attr(output$Variable, "use_dictionary")) {
main.url <- "http://cfconventions.org/standard-names.html"
version.tag <- "v46"
} else {
main.url <- "NULL"
version.tag <- "NULL"
}
varstdef.nodename <- paste("VarStdDef", randomName(), sep = ".")
graph <- add_vertices(graph,
nv = 1,
name = varstdef.nodename,
label = "StandardDef",
className = "ds:VariableStandardDefinition",
description = "Encode metadata of Variable Standard Definition (if any)",
attr = list("ds:hasMainURL" = main.url,
"ds:withVersionTag" = as.character(version.tag)))
graph <- add_edges(graph,
c(getNodeIndexbyName(graph, var.nodename),
getNodeIndexbyName(graph, varstdef.nodename)),
label = "ds:hasStandardDefinition")
# SpatialExtent (Horizontal) -------------------------------------
hext.nodename <- paste("HorizontalExtent", randomName(), sep = ".")
graph <- add_vertices(graph,
nv = 1,
name = hext.nodename,
label = "HorizontalExtent",
className = "ds:SpatialExtent",
description = "Encode metadata of (Horizontal) Spatial Extents",
attr = list("ds:xmin" = grd$x[1],
"ds:ymin" = grd$y[1],
"ds:xmax" = grd$x[2],
"ds:ymax" = grd$y[2])
)
graph <- add_edges(graph,
c(getNodeIndexbyName(graph, DatasetSubset.nodename),
getNodeIndexbyName(graph, hext.nodename)),
label = "ds:hasHorizontalExtent")
# Spatial Extent (Vertical) --------------------------------------
vextent.nodename <- paste("VerticalExtent", randomName(), sep = ".")
graph <- add_vertices(graph,
nv = 1,
name = vextent.nodename,
label = "VerticalExtent",
className = "ds:SpatialExtent",
description = "Encode metadata of (Vertical) Spatial Extents")
graph <- add_edges(graph,
c(getNodeIndexbyName(graph, DatasetSubset.nodename),
getNodeIndexbyName(graph, vextent.nodename)),
label = "ds:hasVerticalExtent")
# Realization ----------------------------------------------------
if ("member" %in% names(output.shape)) {
n.mem <- output.shape["member"]
member.node.names <- vector("list", n.mem)
for (i in 1:n.mem) {
mem <- output$Members[i]
if (is.null(mem)) mem <- i
member.node.names[[i]] <- paste("Realization", i, randomName(), sep = ".")
graph <- add_vertices(graph,
nv = 1,
name = member.node.names[[i]],
label = mem,
className = "ds:Realization",
description = "Encode metadata of Realizations",
attr = list("ds:hasMember" = mem))
graph <- add_edges(graph,
c(getNodeIndexbyName(graph, DatasetSubset.nodename),
getNodeIndexbyName(graph, member.node.names[[i]])),
label = "ds:hasRealization")
}
# TemporalInstant (initialization) -----------------------------------------------
# First check if initialization times are defined for the dataset
if (!is.null(output$InitializationDates)) {
n.inits <- length(output$InitializationDates[[1]])
for (i in 1:n.mem) {
node.orig <- member.node.names[[i]]
for (j in 1:n.inits) {
node.dest <- paste0(node.orig, "_init.", j)
init.date <- output$InitializationDates[[i]][[j]] %>% as.POSIXlt(tz = "GMT") %>%
as.POSIXct() %>% format(format = "%Y-%m-%d %H:%M:%S", usetz = TRUE)
graph <- add_vertices(graph,
nv = 1,
name = node.dest,
label = init.date, #paste0("InitTime_", j),
className = "ds:TemporalInstant",
description = "Encode metadata for Initialization Times",
attr = list("prov:generatedAtTime" = init.date))
graph <- add_edges(graph,
c(getNodeIndexbyName(graph, node.orig),
getNodeIndexbyName(graph, node.dest)),
label = "ds:hasInitializationTime")
}
}
}
}
# TemporalPeriod -----------------------------------------------
filter.month <- as.list(getSeason(output))
names(filter.month) <- paste("ds:filterMonth", 1:length(filter.month), sep = ".")
startTime <- getRefDates(output)[1] %>% as.POSIXlt(tz = "GMT") %>%
as.POSIXct() %>% format(format = "%Y-%m-%d %H:%M:%S", usetz = TRUE)
endTime <- tail(getRefDates(output, "end"), 1) %>% as.POSIXlt(tz = "GMT") %>%
as.POSIXct() %>% format(format = "%Y-%m-%d %H:%M:%S", usetz = TRUE)
timeper.nodename <- paste("TemporalPeriod", randomName(), sep = ".")
graph <- add_vertices(graph,
nv = 1,
name = timeper.nodename,
label = "TemporalPeriod",
className = "ds:TemporalPeriod",
description = "Encode metadata of Temporal Periods",
attr = c(
list("prov:startedAtTime" = startTime,
"prov:endedAtTime" = endTime),
filter.month))
graph <- add_edges(graph,
c(getNodeIndexbyName(graph, DatasetSubset.nodename),
getNodeIndexbyName(graph, timeper.nodename)),
label = "ds:hasValidTemporalExtent")
# ARGUMENT - COMMAND METADATA ----------------------------------------------
if (!disable.command) {
if ("grid" %in% names(arg.list)) arg.list <- arg.list[-grep("grid", names(arg.list))]
graph <- metaclip.graph.Command(graph, package, version, fun, arg.list, DatasetSubset.nodename)
}
return(list("graph" = graph, "parentnodename" = DatasetSubset.nodename))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.