#require(stringr)
#require("Rgraphviz")
#require(CodeDepends)
#require(codetools)
algebr = new.env()
####
# Provenance functions
####
#
# Initiates a new derivation graph object for provenance recording
#
# @return derivation graph object in internal data.frame format
#
algebr$newDerivationGraph <-function(){
g = list(V = c(), E = list(), eAttrs=list(), nAttrs=list(), attrs=list(), fCalls=list(), exps=list())
g$attrs <- list(node=list(shape="ellipse", fixedsize=FALSE, fillcolor="white", style="filled"),
edge=list(style="solid", arrowhead="normal"))
return(g)
}
#
#' Retrieve derivation graph for export / vizalization
#'
#' This function returns a derivation graph from the previously recorded provenance, if available.
#' The graph can be serialized as dot/gv and other formats using rGraphviz
#'
#' @param g Derivation graph in internal data.frame format as created by the internal function 'algebr$newDerivationGraph'
#'
#' @return Returns derivation graph of the script in Ragraph format (Rgraphviz) or NULL, if there is no provanance available
#'
#' @seealso \code{\link{enableProvenance}}, \code{\link{disableProvenance}}, \code{\link{reset_provenance}}
#' @export
#'
#'
getScriptGraph <- function(g=algebr$scriptGraph){
if(is.null(g$V) || length(g$V) == 0){
warning("There is no derivation graph available because no provenance was recorded. Returning NULL")
return(NULL)
}
gR <- graph::graphNEL(nodes = g$V,
edgeL = g$E,
edgemode = "directed")
graph::graph.par(list(fontsize=11))
gR <- Rgraphviz::layoutGraph(gR) #graphNEL format (graph package, just edges and nodes)
#Ragraph format (GraphViz package, includes layout information)
gRlayout <- Rgraphviz::agopen(gR, name="f", attrs=g$attrs, nodeAttrs=g$nAttrs, edgeAttrs=g$eAttrs)
}
algebr$provenanceCallback <- function(algebr_env = algebr) {
#TODO: review the counting of rec_num, for capturing semantics and parsing in-outputs
isFirstCall = TRUE
function(expr, value, ok, visible, data=algebr_env) {
if(isFirstCall){
#don't track first task executed (= call to enableProvenance() )
isFirstCall <<- FALSE
return(TRUE)
}
algebr=data
#------------------------------------------------------------------------------------
# Collect provenance information from workspace changes that may be used for parsing
#------------------------------------------------------------------------------------
algebr$history_list = append(algebr$history_list, expr)
#cat(as.character(as.expression(expr)))
algebr$new_ls = ls(envir = globalenv())
#notify and track new variables
# new_vars = algebr$new_ls[!algebr$new_ls %in% algebr$last_ls]
# # if(length(new_vars)>0)
# # cat(paste("The following variables have been initialized: ", paste(new_vars, collapse = " "),"\n"))
#
# ls(envir = globalenv())[ls() %in% algebr$ls_last]
#
# info = CodeDepends::scriptInfo(CodeDepends::readScript(txt=as.character(as.expression(expr))))
# side_effects = new_vars[!new_vars %in% info[[1]]@outputs]
##save last captured call sementics to temporary call stack
algebr$tempCallStack = NULL
if(dim(algebr$callStack)[1]>0){
algebr$tempCallStack <- subset(algebr$callStack, rec_num == algebr$rec_num) ##TODO: Review counting re_num (as mentioned above)
}
expr=rewriteReplacementFunction(expr) #brings commands thast use replacement functions into a parser-friendly form
info = CodeDepends::scriptInfo(CodeDepends::readScript(txt=as.character(as.expression(expr))))
side_effects = c()
lapply(algebr$new_ls, function(var){
vare = algebr$enquote(var)
obj=eval(parse(text=vare), envir = globalenv())
if(!isTRUE(attr(obj,"isTracked")) && !(var %in% info[[1]]@outputs)){
side_effects <<- append(side_effects, var)
}else
return(invisible())
});side_effects
#------------------------------------------------------------------------------------
# Parse available provenance information to enhance the derivation graph
#------------------------------------------------------------------------------------
# actually parsing the last executed expression to a graph:
algebr$scriptGraph=algebr$parseCommand(expr,algebr$scriptGraph, first_call = TRUE)
cmd_id = algebr$scriptGraph$first_call
sapply(side_effects, function(variable){
algebr$scriptGraph <<- algebr$addNodeObject(var = variable, g = algebr$scriptGraph, isOutput = TRUE)
algebr$scriptGraph <<- algebr$addEdgeOutput(output = variable,cmd = cmd_id,g = algebr$scriptGraph, hidden = TRUE)
obj=eval(parse(text=algebr$enquote(variable)), envir = globalenv())
if(!isTRUE(attr(obj, "isTracked"))){
attr(obj, "isTracked") <-TRUE
assign(variable, obj,envir = globalenv())
}
})
#if(length(side_effects)>0)
# warning(paste("These variables have been initialized from side-efects of the previous task: ", paste(side_effects, collapse = " ")))
#estimates semantics of functions and expressions that are not explicitely stated
algebr$scriptGraph=algebr$estimateMissingSemantics(algebr$scriptGraph)
#look
#-----------------------------------------------------------------------------------
# Be aware that the last_ls variable is overwritten IN THE END of the callback, but may be used during parsing from various methods
# So please don't move it!
algebr$last_ls = algebr$new_ls
algebr$rec_num = algebr$rec_num+1 #record number
## for debugging
# tst = function(){
# sapply(ls(envir = globalenv()),function(var){
# obj=eval(parse(text=algebr$enquote(var)), envir = globalenv())
# return(isTRUE(attr(obj, "isTracked")))
# })}
#
# print(tst())
return(TRUE)
}
}
algebr$enabled = function(){
return(isTRUE(algebr$isEnabled))
}
#' Enable / disable provenance tracking
#'
#' @return (invisible)
#' @export
#'
#' @seealso \code{\link{disableProvenance}}, \code{\link{reset_provenance}}
enableProvenance <- function(){
if(is.null(algebr$rec_num))
algebr$rec_num = 1
if(is.null(algebr$version_history))
algebr$version_history = list()
if(is.null(algebr$history_list))
algebr$history_list = list()
if(is.null(algebr$scriptGraph))
algebr$scriptGraph = algebr$newDerivationGraph()
if(is.null(algebr$callStack))
algebr$callStack = data.frame()
if(!algebr$enabled()){
algebr$callback <- addTaskCallback(algebr$provenanceCallback())
algebr$isEnabled = TRUE
}else{
warning("Provenance tracking is already enabled!")
return(invisible())
}
algebr$last_ls = ls(envir = globalenv())
for(var in algebr$last_ls){
obj=eval(parse(text=algebr$enquote(var)), envir = globalenv())
if(!isTRUE(attr(obj, "isTracked"))){
attr(obj, "isTracked") <-TRUE
assign(var, obj,envir = globalenv())
}
}
invisible()
}
#' Enable / disable provenance tracking
#'
#' @return (invisible)
#' @export
#'
#' @seealso \code{\link{enableProvenance}}, \code{\link{reset_provenance}}
disableProvenance <-function(){
if(algebr$enabled()){
algebr$isEnabled= FALSE
removeTaskCallback(algebr$callback)
}else{
warning("Provenance tracking is already disabled!")
}
invisible()
}
#' Reset / delete internal provenance record
#'
#' Deletes all internally recorded provenance and sets the tracker back to default state
#'
#' @return (invisible)
#' @export
#'
#' @seealso \code{\link{enableProvenance}}, \code{\link{disableProvenance}}
reset_provenance <-function(){
if(algebr$enabled()){
disableProvenance()
}
algebr$rec_num = 1
algebr$version_history = list()
algebr$history_list = list()
algebr$scriptGraph = algebr$newDerivationGraph()
algebr$callStack = data.frame()
}
#' Retrieve the list of all recorded commands
#'
#' @return list of expressions
#' @export
#'
provenance_history <- function(){
return(algebr$history_list)
}
###
# Functions for creating a derivation graphs
####
algebr$addNodeLabel <- function(node_id, g, label){
g$nAttrs$label[[node_id]]=label
return(g)
}
algebr$getNodeLabel <- function(node_id, g){
g$nAttrs$label[[node_id]]
}
algebr$addNode = function(node_id, g, label=NULL, color=NULL, shape=NULL){
node_id = algebr$unquote(as.character(as.expression(node_id)))
if (all(g$V != node_id)){
g$V = append(g$V, node_id)
g$E[[node_id]]=list(edges=c(), weights=c())
}
if(!is.null(label))
g=algebr$addNodeLabel(node_id, g, label)
if(!is.null(color))
g$nAttrs$fillcolor[[node_id]]="orange"
g$last_vt=node_id
return(g)
}
algebr$instance <- function(var, pos=0, forInput=FALSE){
if(!is.character(var))
var=as.character(substitute(var))
if(forInput){
versions = getVersions(var)
inst=versions[(dim(versions))[1]+pos,]
if(algebr$rec_num==inst$rec_num && dim(versions)[1]>1)
return(versions[(dim(versions))[1]+pos-1,])
else
return(inst)
}
versions = getVersions(var)
return(versions[(dim(versions)[1]+pos),])
}
algebr$addNewVersionRecord <- function(var){
# print(paste("new version record",var))
var=as.character(as.expression(var))
#print(var)
var_e = parse(text=algebr$enquote(var))[[1]]
if(is.name(var_e)){
obj=eval(var_e, envir = globalenv())
if(!isTRUE(attr(obj, "isTracked"))){
attr(obj, "isTracked") <-TRUE
assign(var, obj,envir = globalenv())
# print(paste("Start tracking variable",var))
}
}
# print(paste("Version update of", var))
if(is.null(algebr$version_history[[var]]))
algebr$version_history[[var]]=data.frame()
#make sure that versions updated only ONCE per execution (even if this method might be called multiple times)
else if(algebr$instance(var)$rec_num>=algebr$rec_num){
return()
}
IID=algebr$unquote(var)
num=dim(algebr$version_history[[var]])[1]
if(num>0){
IID=paste0(IID,"~",num+1)
}
var0=var
if(stringr::str_detect(var,pattern = "<-")){
var0=paste0("`",var,"`")
}
command = paste(deparse(provenance_history()[[algebr$rec_num]]), collapse = "\n")
obj=eval(parse(text=var0),envir = globalenv())
functionalType = functionalType(obj)
if(is.null(functionalType))
functionalType = NA
instance=data.frame(rec_num = algebr$rec_num, IID=IID, class=paste(class(obj), collapse = " "), semantics = getObjectSemantics(var0), functionalType = functionalType, command = command, timestamp=timestamp(quiet = TRUE),stringsAsFactors = FALSE)
#instance=data.frame(rec_num = algebr$rec_num, IID=IID, class=paste(class(eval(parse(text=var0),envir = globalenv())), collapse = " "), semantics = getObjectSemantics(var0), command = command, timestamp=timestamp(quiet = TRUE),stringsAsFactors = FALSE)
# instance=data.frame(rec_num = algebr$rec_num, IID=IID, class=class(eval(parse(text=paste0("`",var,"`")),envir = globalenv())), semantics = getObjectSemantics(var), timestamp=timestamp(quiet = TRUE),stringsAsFactors = FALSE)
algebr$version_history[[var]]=rbind(algebr$version_history[[var]], instance)
}
#' Get history of a variable's name bindings
#'
#' This function returns a versioning history showing all modifications (initialization, updates and replacements) of a variable's name binding in the global environment
#' The version based on recording provenance, i.e. it only captures modifcations that where don after calling enableProvenance() and before calling disableProvenance()
#'
#' @param var Name of the variable, either string or object
#'
#' @return A data.frame describing changes of a variable's name binding
#' @export
#'
getVersions <- function(var){
if(!is.character(var))
var=as.character(substitute(var))
#if(stringr::str_detect(var,pattern = "<-")){
# var=paste0("`",var,"`")
#}
#print(var)
return(algebr$version_history[[var]])
}
algebr$addNodeObject <- function(var, g, isInput=FALSE, isOutput=FALSE, isSubset=FALSE) {
var = algebr$unquote(as.character(as.expression(var)))#ensure that variable name is a string
#print(paste("addN:",var))
isVersionUpdated=FALSE
label=var
node_name=var
if((isSubset || exists(var,envir = globalenv())) && is.null(algebr$version_history[[var]])){
algebr$addNewVersionRecord(var)
isVersionUpdated=TRUE
}
if(isInput && isOutput)
#normally never happens...
warning(paste0("Algebr: The variable \"",var,"\" addEclassified as Input AND output. The resulting derivation graph may be flawed."))
if(isInput){
#verify that object existed before the last task addEdgeexecuted
#if not, it shall be treaded as a litteral, e.g. value of class 'symbol' or 'expression'
#IMPORTANT: this test is heuristic, because the workspace cannot (yet?) really be examined before execution
# -- so there might be cases of missclassification as literal
#if-clause assumes that the previously executed task did not add variables to any parent environment
if(!var %in% algebr$last_ls && !isSubset && !exists(var, envir = parent.env(globalenv()))){
return(algebr$addNodeLiteral(label = var, g))
}
ver_num = dim(subset(getVersions(var), rec_num<algebr$rec_num))[1]
#versioning support of variables
# print(paste(ver_num, "version_num1",var, algebr$rec_num))
if(ver_num>1){
node_name = paste0(var,"~",ver_num)
label=node_name
}
class=getVersions(var)[ver_num, "semantics"]
functionalType = getVersions(var)[ver_num, "functionalType"]
#print(paste("CLASS,",class))
if(length(class)==0){
class=getVersions(var)[ver_num+1, "semantics"]
functionalType = getVersions(var)[ver_num+1, "functionalType"]
}
semantics = algebr$toSemanticLabel(class)
if(!is.na(functionalType)){
semantics = paste0(functionalType,"Data: ",semantics)
}
label=paste0(label, " \\n[",semantics,"]")
}else if(isOutput){
#for outputs, its sufficient to check if objects exists in current workspace
if(!var %in% algebr$new_ls && !isSubset && !exists(var, envir = parent.env(globalenv()))){
return(algebr$addNodeLiteral(label = var, g))
}
#create entry in version history
if((isSubset || exists(var,envir = globalenv())) && !isVersionUpdated){
algebr$addNewVersionRecord(var)
}
#versioning support of variables
ver_num = dim(getVersions(var))[1]
#print(paste(ver_num, "version_num2",var, algebr$rec_num))
if(ver_num>1){
node_name = paste0(var,"~",ver_num)
label=node_name
}
semantics = getVersions(var)[ver_num, "semantics"]
semantics = algebr$toSemanticLabel(semantics)
functionalType = getVersions(var)[ver_num, "functionalType"]
if(!is.na(functionalType)){
semantics = paste0(functionalType,"Data: ",semantics)
}
label=paste0(label, " \\n[",semantics,"]")
}
g=algebr$addNode(node_name,g, label = label)
return(g)
}
algebr$addEdgeOutput <- function(output, cmd, g, hidden=FALSE) {
if(all(g$E[[cmd]]$edges != output)){
g$E[[cmd]]$edges = append(g$E[[cmd]]$edges, output)
g$E[[cmd]]$weights = append(g$E[[cmd]]$weights, 1)
g$eAttrs$color[[paste0(cmd, "~", output)]] = "red"
if(hidden)
g$eAttrs$style[[paste0(cmd, "~", output)]] = "dashed"
}
if(cmd %in% names(g$fCalls)){
if(is.null(g$fCalls[[cmd]]$outputs)){
g$fCalls[[cmd]]$outputs = list(output)
}else{
g$fCalls[[cmd]]$outputs = append(g$fCalls[[cmd]]$outputs, output)
}
}
#sligthly redundand code...
if(cmd %in% names(g$exps)){
if(is.null(g$fexps[[cmd]]$outputs)){
g$exps[[cmd]]$outputs = list(output)
}else{
g$exps[[cmd]]$outputs = append(g$exps[[cmd]]$outputs, output)
}
}
return(g)
}
algebr$addEdgeInput <- function(input, cmd, g, label=NULL, hidden=FALSE){
if(all(g$E[[input]]$edges != cmd)){
g$E[[input]]$edges = append(g$E[[input]]$edges, cmd)
g$E[[input]]$weights = append(g$E[[input]]$weights, 1)
if(!is.null(label)){
g$eAttrs$label[[paste0(input,"~",cmd)]] =label
}
g$eAttrs$arrowhead[[paste0(input, "~", cmd)]] = "onormal"
if(hidden)
g$eAttrs$style[[paste0(input, "~", cmd)]] = "dashed"
}else
warning(paste("AlgebR: Dublicate edge from", input,"to",cmd,"! Second edge (and possibly the label) will not display in derivation graph."))
if(cmd %in% names(g$fCalls)){
if(is.null(g$fCalls[[cmd]]$inputs)){
g$fCalls[[cmd]]$inputs = list(input)
}else{
g$fCalls[[cmd]]$inputs = append(g$fCalls[[cmd]]$inputs, input)
}
}
#sligthly redundand code...
if(cmd %in% names(g$exps)){
if(is.null(g$fexps[[cmd]]$inputs)){
g$exps[[cmd]]$inputs = list(input)
}else{
g$exps[[cmd]]$inputs = append(g$exps[[cmd]]$inputs, input)
}
}
g$last_vt = input
return(g)
}
algebr$addEdgeDerivation <- function(parent, child, g, label=NULL, hidden=FALSE){
if(all(g$E[[parent]]$edges != child)){
g$E[[parent]]$edges = append(g$E[[parent]]$edges, child)
g$E[[parent]]$weights = append(g$E[[parent]]$weights, 1)
if(!is.null(label)){
g$eAttrs$label[[paste0(parent,"~",child)]] =label
}
g$eAttrs$arrowhead[[paste0(parent, "~", child)]] = "normal"
if(hidden)
g$eAttrs$style[[paste0(parent, "~", child)]] = "dashed"
}else
warning(paste("AlgebR: Dublicate edge from", parent,"to",child,"! Second edge (and possibly the label) will not display in derivation graph."))
g$last_vt = parent
return(g)
}
algebr$addEdgeFunctionCall <- function(fun_id, call_id, g, hidden=FALSE){
if(all(g$E[[fun_id]]$edges != call_id)){
g$E[[fun_id]]$edges = append(g$E[[fun_id]]$edges, call_id)
g$E[[fun_id]]$weights = append(g$E[[fun_id]]$weights, 1)
g$eAttrs$color[[paste0(fun_id, "~", call_id)]] = "blue"
if(hidden){
g$eAttrs$style[[paste0(fun_id, "~", call_id)]] = "dashed"
}
}
return(g)
}
algebr$addNodeLiteral = function(label, g){
vt_id=paste0("lt_",algebr$makeid()) #generates a random id for the node to be unique
g=algebr$addNode(node = vt_id, g = g,label = label)
return(g)
#g$V = append(g$V, cmd_id)
}
algebr$addNodeExpression = function(label, g){
cmd_id=paste0("expr_",algebr$makeid())
g=algebr$addNode(node_id = cmd_id, label = label,g = g, color = "orange")
return(g)
}
algebr$addNodeOperation = function(label, g){
cmd_id=paste0("fcall_",algebr$makeid())
g=algebr$addNode(node_id = cmd_id, label = label,g = g, color = "orange")
return(g)
}
algebr$parseCommand = function(cmd, g=list(V = c(), E = list(), attrs=list(), eAttrs=list(), nAttrs=list(), chunks=list(), last_vt=NULL), first_call=FALSE, isInput=FALSE, isOutput=FALSE){
# print(paste0("cmd:", deparse(cmd)))
if(first_call)
g$first_call = NULL
#print(paste(as.character(as.expression(cmd)), class(cmd)))
cmd_id = NULL
#CASE 1: cmd is some kind of variable
cmdInfo = CodeDepends::getInputs(algebr$removeTheAt(cmd))
if(is.name(cmd)){
g=algebr$addNodeObject(cmd, g, isInput= isInput, isOutput=isOutput)
cmd_id=g$last_vt
#CASE 2: cmd is some kind of literal
}else if(any(sapply(list(is.numeric, is.symbol, is.character, is.logical), function(x){x(cmd)}))){
#print("---------------------")
#print(paste0("found literal: ",cmd))
# print("---------------------")
g=algebr$addNodeLiteral(as.character(as.expression(cmd)), g)
cmd_id=g$last_vt
#CASE 2: cmd is an assignment (TODO: handle operators like ->, <<- ...)
}else if(any(class(cmd) ==c("=", "<-", "<<-"))){
cmd = rewriteReplacementFunction(cmd) #turn calls to replacement functions into a parser-friendly style
# handle left-hand side of assignmet
#print(paste("something wrong? - sould be an assignment",as.character(as.expression(cmd))))
g = algebr$parseCommand(cmd[[2]], g, isOutput=TRUE)
output_id=g$last_vt
#---- Parses right-hand side of the assignment:
g = algebr$parseCommand(cmd[[3]], g, isInput=TRUE)
#--------------------------------------------------------------
cmd_id = g$last_vt
g=algebr$addEdgeOutput(output = output_id, cmd = cmd_id, g)
#case 3: cmd is some kind of call, (possibly right hand of an assignment)
}else if(class(cmd)=="call"){
#Case 3.1: cmd is a function definition (whole definition can hardly be displayed or has to be parsed with special care)
if(cmd[[1]]=='function'){
cmd_id=paste0("def_",algebr$makeid())
# cmd_o=paste(as.character(as.expression(cmd), collapse="\n", sep=""))
g$chunks[[cmd_id]]=list(code=cmd, id=cmd_id)
g$nAttrs$fillcolor[[cmd_id]]="orange"
g$V = append(g$V, cmd_id)
g$E[[cmd_id]]=list(edges=c(), weights=c())
#handle function definitions
}else if(algebr$containsOnlyPrimitives(cmd = cmdInfo)){
#print(paste0("only primitives:", deparse(cmd)))
exp=as.character(as.expression(cmd))
g=algebr$addNodeExpression(label = exp,g = g)
cmd_id = g$last_vt
g$exps[[cmd_id]] = list(exp=exp, semantics=NA) #semantics must be estimated after evaluation of the whole task
outputs = append(cmdInfo@outputs, cmdInfo@updates)
inputs = cmdInfo@inputs
if(length(inputs)>0)
sapply(inputs, function(input){
#TODO: This solution needs to be reviewd (probably conflicting versioning)
g <<- algebr$addNodeObject(input, g, isInput = TRUE)
instance = algebr$instance(input, forInput = TRUE)
IID = instance$IID
g <<- algebr$addEdgeInput(input = IID, cmd = cmd_id, g = g)
})
if(length(outputs)>0)
sapply(outputs, function(output){
g <<- algebr$addNodeObject(output, g, isOutput = TRUE)
instance = algebr$instance(output)
IID = instance$IID
g <<- algebr$addEdgeOutput(output = IID, cmd = cmd_id, g = g)
})
}else if(cmd[[1]] =='[[' || cmd[[1]]=='['|| cmd[[1]]=='$'|| cmd[[1]]=='@'){
reference = cmd #TODO: add (this) object reference to profenance of this node
findParent <- function(cmd){
if(length(cmd)==1)
return(cmd)
else
return(findParent(cmd[[2]]))
}
parent_name = findParent(cmd)
#print(paste("Parent: ",parent))
g=algebr$addNodeObject(var = parent_name, g = g, isInput = isInput,isOutput = isOutput)
parent_id = g$last_vt
#g=algebr$parseCommand(as.name(selout$selection),g, isInput = FALSE, isOutput = FALSE) ##TODO not so clear how to handel isInput/isOutput flags here (may lead to misclassification in addNodeObject currently)
g=algebr$addNodeObject(var = cmd, g = g, isInput = isInput,isOutput = isOutput, isSubset=TRUE)
query = g$last_vt
cmd_id=g$last_vt
# print(paste(parent, selout$selection," <- create selection"))
if(isInput){
g=algebr$addEdgeInput(parent_id, query, g)
}else if(isOutput){
g=algebr$addEdgeOutput(parent_id, query, g)
parent_old=algebr$instance(as.character(parent_name), forInput = TRUE)$IID
parent_new=algebr$instance(as.character(parent_name))$IID
g=algebr$addEdgeDerivation(parent = parent_old,child = parent_new,g = g, hidden = TRUE)
}
#-------------------
}else if(any(cmd[[1]]==c("log","sin","cos"))&& (is.character(cmd[[2]]) || is.numeric(cmd[[2]]))){
##this actually seems never to be aplied
g = algebr$parseCommand(as.character(as.expression(cmd)));
query=g$last_vt
g$nAttrs$fillcolor[[query]]="orange"
cmd_id=g$last_vt
#------------------------------------
g = algebr$parseCommand(cmd[[2]], g, isInput=TRUE)
#-------------------------------------
g=algebr$addEdgeInput(g$last_vt, query, g)
#-------------------
#Case 3.2: cmd is a function call or some operation (e.g. mathematical, logical)
}else
# print(paste("found call: ",as.character(as.expression(cmd))))
##TODO: preserve mathematical expressions in one node,
#i.e. diferentiate between mathematical/logical operations and function calls
if(eval(call("is.function", cmd[[1]]))){
# TODO: review the way, semantics are evaluated from a call stack
# Normally the nodes and semantics should match well, but for the rare case that one function is executed multiple times in one task
# Semantics missmatched if the stack is not processed in the right order.
# Ideally, expressions should perhaps be parsed the same way as the parser works (leftmost-innermost ?) and the stack should be evaluated from the first to the last call
#function_obj = algebr$funFromString(as.character(as.expression(cmd[[1]])))
function_obj = algebr$funFromString(as.character(cmd[[1]]))
label=algebr$unquote(as.character(as.expression(cmd[[1]])))
call_function = label
semantics = NA
#add semantics to label, if available
if(captureSemantics(function_obj)){
fid=attr(function_obj, "fid")
sel=which(algebr$tempCallStack$fid == fid)
if(length(sel)>0){
sel=sel[1] #select first element
semantics=algebr$tempCallStack[sel,]$semantics
label=paste0(label,"\\n[",algebr$toSemanticLabel(semantics),"]")
sel=-1*sel
algebr$tempCallStack = algebr$tempCallStack[sel,] #remove evaluated call
}
}
g=algebr$addNodeOperation(label, g)
cmd_id = g$last_vt
if(is.null(g$fCalls_count))
g$fCalls_count=1
g$fCalls[[cmd_id]]=list(fname=call_function, command=cmd, count=g$fCalls_count, semantics = semantics) #called function (without any annotation)
g$fCalls_count=g$fCalls_count+1
if(captureSemantics(function_obj)){
g$fCalls[[cmd_id]]=append(g$fCalls[[cmd_id]], list(wrapper_fid = attr(function_obj, "fid")))
}
##TODO:review this; HEURISTIC: for graphics function create dependency on previously called plot-funcion
if(call_function %in% c("text", "points", "lines", "title", "par", "abline","arrow","axis","Axis","box", "grid","legend","lines", "pch","rug")){
for(i in ((length(g$fCalls)-1):1)){
if(stringr::str_detect(g$fCalls[[i]]$fname, "plot")){
cmd_id_plot = names(g$fCalls[i])
g=algebr$addEdgeInput(input = cmd_id_plot, cmd = cmd_id, g=g,hidden = TRUE,label = "[heuristic]")
}
}
}
# print(paste("label: ",label))
fdef=eval(cmd[[1]])
if(!is.primitive(fdef))
cmd=match.call(fdef, cmd)
#print(cmd[2:length(cmd)])
#print(paste0("label: ", label))
#g$nAttrs$label[[cmd_id]]=label
#g$nAttrs$fillcolor[[cmd_id]]="orange"
#g$nAttrs$style[[cmd_id]]="filled"
#g$V = append(g$V, cmd_id)
#g$E[[cmd_id]]=list(edges=c(), weights=c())
if(length(cmd)>1)
for(i in 2:length(cmd)){
# print(paste("cmd:",cmd, length(cmd)))
arg=cmd[[i]]
#------------------------------------
g = algebr$parseCommand(arg, g, isInput=TRUE, isOutput=FALSE)
# connect operand/arguments as input to the operator/function:
label=names(cmd[i])
g=algebr$addEdgeInput(g$last_vt, cmd_id, g, label)
}
if(!is.primitive(get(algebr$unquote(as.character(as.expression(cmd[[1]])))))){
function_obj = algebr$funFromString(cmd[[1]])
if(isTRUE(attr(function_obj,"SemanticWrapper"))){
function_obj = attr(function_obj,"wFun")
}
# globals = eval(call("findGlobals", function_obj, merge=FALSE))
globals = codetools::findGlobals(fun = function_obj,merge = FALSE)
##TODO: expore function references to other packages
ls_func = globals$functions[globals$functions %in% ls(envir = globalenv())]
hiddenCallBlackList <- c("captureSemantics<-","captureSemantics","functionalType<-","addSemanticPedigree")
if(length(ls_func)>0)
sapply(ls_func, function(x){
if(x %in% hiddenCallBlackList) #ignore functions from the blacklist,i,e, tracker functions
return()
x<-algebr$unquote(x)
g <<- algebr$addNodeObject(x,g = g,isInput = TRUE)
x_IID = algebr$instance(as.character(x))$IID
g <<- algebr$addEdgeFunctionCall(fun_id = x_IID,call_id = cmd_id,g = g, hidden = TRUE)
})
ls_vars = globals$variables[globals$variables %in% ls(envir = globalenv())]
if(length(ls_vars)>0)
sapply(ls_vars, function(x){
if(x %in% c("algebr")) ##try not to track the tracker...
return()
g <<- algebr$addNodeObject(x,g = g,isInput = TRUE) #add node as input if not yet registered
x_IID = algebr$instance(as.character(x))$IID
#TODO: detect (with scriptInfo of CodeDepends ?) if global variable is updated
g <<- algebr$addEdgeInput(input = x_IID, cmd = cmd_id, g=g, hidden=TRUE)
})}
}
#for all calls:
for(V in g$V){
cmd_ids = as.character(cmd_id)
call_label=g$fCalls[[cmd_ids]]$fname
if(!is.null(call_label) && V==call_label){
V_IID = algebr$instance(V)$IID #link latest instance of this function
g=algebr$addEdgeFunctionCall(V_IID, cmd_ids, g)
}
}
if(is.null(g$first_call))
g$first_call=cmd_id
}
g$last_vt=cmd_id
return(g)
}
algebr$containsOnlyPrimitives = function(cmd){
if(class(cmd)== "ScriptNodeInfo")
cmdInfo=cmd
else
cmdInfo = CodeDepends::getInputs(algebr$removeTheAt(cmd))
funs=names(cmdInfo@functions)
sel = funs %in% c("[","[[","$","@") #exclude expressions that only contain subset-operators
funs=funs[!sel]
if(length(funs)==0)
return(FALSE)
isPrimitive=sapply(funs, function(fun_string){
function_obj = algebr$funFromString(fun_string)
return(is.primitive(function_obj))
})
return(all(isPrimitive))
}
#' Rewrite replacement function
#'
#' Turns calls of replacement functions into a logically equivalent, parser-friendly form
#'
#' See http://adv-r.had.co.nz/Functions.html
#' @param expr
#'
#' @return A standardized call expression that calls a replacement function by the same syntax as a standard function is calles
#' @export
#'
#' @examples
#' > rewriteReplacementFunction(quote(attr(t, "semantics") <- "test"))
#' # t <- `attr<-`(t, "semantics", "test")
#'
#' > rewriteReplacementFunction(quote(functionalType(meuse) <- "SField"))
#' #meuse <- `functionalType<-`(meuse, "SField")
#'
#'
rewriteReplacementFunction = function(expr){
if(length(expr)<2)
return(expr)
if(is.call(expr[[2]])){
fname= expr[[2]][[1]]
#exception for these operators:
if(any(as.character(fname) %in% c('[[','[','$','@'))){
#print(expr)
return(expr)
}
fname=as.name(paste0(fname,"<-"))
# find out if a replacement function was used:
if(exists(as.character(fname)) && eval(call("is.function", fname))){
op = expr[[1]]
value = expr[[3]]
obj = expr[[2]][[2]]
args= as.list(expr[[2]][-1]);args
cl=append(fname, args);cl
cl=append(cl,value);cl
cl=as.call(cl);cl
cl=list(op, obj, cl);cl
cl=as.call(cl);cl
return(cl)
}
}
return(expr)
}
###
# Semantics related functions
####
#' Get/estimate object semantics of a given object
#'
#' @param var object or name of the object
#' @param env The environment in which the object shall be evaluated
#' @param isLiteral TRUE/FALSE allowed. Force var to be interpreted as a literal, even if it is for instance a name of an object in the workspace.
#'
#' @return Returns the semantic reference type of the object.
#' If the reference type is unknown because there is no annotation is available, a reference type is estimated based on a heuristic mapping.
#' If the mapping is 'unsave', the type will be prefixed with a questionmark '(?)' and a warning is given out.
#' For objects, where no semantic mapping is defined, simply the class will be returned
#'
#' @export
#'
getObjectSemantics <- function(var, env=globalenv(), isLiteral=FALSE){
if((!is.character(var) && !is.symbol(var) && !is.name(var)) || isLiteral) {
obj=var
var=as.character(as.expression(substitute(var)))
}else{
var=as.character(var)
obj = tryCatch(eval(parse(text=var),envir = env),error = function(e) var)
}
##for expressions such as meuse[1] (subset), it will be -ASSUMED- that the semantics are the same as with paren data set. i.e. meuse
# print(paste0("parse variable ", paste(var, collapse = ""), " of class ", class(var)))
#print(paste("ljfoaf ", substitute(var, env = globalenv()), var,class(obj), isLiteral))
try({
var_e = parse(text=algebr$enquote(var))
var_e = var_e[[1]]
if(!is.symbol(var_e) && var_e[[1]]=="["){
var_e=var_e[[2]]
var=deparse(var_e)
obj = tryCatch(eval(var_e,envir = env),error = function(e) var)
}},silent=TRUE
)
if(!is.null(attr(obj, "semantics")))
return(attr(obj, "semantics"))
# test if object is annotated with semantics, if not, predict semantics from class and structure (note that the latter is only a generic assumption, i.e. based on heuristics)
# be carful about the order of if-statements, because some classes extend others but imply different semantics
if (any(sapply(list(is.numeric, is.character, is.factor, is.symbol, is.name, is.expression), function(fun) {
return(fun(obj))
}))) {
if(length(obj)<=1)
return("Q")
else return("Q set")
}
if (is.logical(obj)) {
if(length(obj)<=1)
return("bool")
else return("bool set")
}
if (is(obj, "SField")) { ## for the actual mss package
SFieldData_observations = slot(obj, "observations")
sObs = getObjectSemantics(SFieldData_observations, env = environment())
sObs = paste0("(",sObs,")")
return(paste(sObs, "x SExtend"))
}
if (is(obj, "SpatialLinesDataFrame")) {
semantics= "(?)S x Q set"
warning(paste0("No semantic annotation available for object ",var,". Assumend semantics will be: ",semantics))
return(semantics)
}
if (is(obj, "SpatialLines")) {
return("S set")
}
if (is(obj, "SpatialPixelsDataFrame")) {
semantics= "(?)S x Q set"
warning(paste0("No semantic annotation available for object ",var,". Assumend semantics will be: ",semantics))
return(semantics)
}
if (is(obj, "SpatialPointsDataFrame")) {
semantics= "(?)S x Q set"
warning(paste0("No semantic annotation available for object ",var,". Assumend semantics will be: ",semantics))
return(semantics)
}
if (is(obj, "SpatialPixels") || is(obj, "SpatialPoints")) {
if(length(obj)==1)
return("S")
else
return("S set")
}
if (is(obj, "SpatialMultiPointsDataFrame")) {
#how many points
semantics= "(?)S x Q set"
warning(paste0("No semantic annotation available for object ",var,". Assumend semantics will be: ",semantics))
return(semantics)
}
if (is(obj, "SpatialMultiPoints")) {
if(length(obj)==1)
return("S")
else
return("S set")
}
if (is(obj, "SpatialGridDataFrame")) {
semantics = "(?)S x Q set"
warning(paste0("No semantic annotation available for object ",var,". Assumend semantics will be: ",semantics))
return(semantics)
}
if (is(obj, "SpatialGrid")) {
return("R set")
}
if (is(obj, "SpatialPolygonsDataFrame")) {
#TODO: how many Polygons?
semantics= "(?)R x Q set"
warning(paste0("No semantic annotation available for object ",var,". Assumend semantics will be: ",semantics))
return(semantics)
}
if (is(obj, "SpatialPolygons")) {
if(length(obj)==1)
return("R")
else
return("R set")
}
if (is(obj, "Spatial")) {
semantics= "(?) S set"
warning(paste0("No semantic annotation available for object ",var,". Assumend semantics will be: ",semantics))
return(semantics)
}
if (is(obj, "Date") || is(obj, "POSIXct") || is(obj, "POSIXctLt")) {
if(length(obj)<=1)
return("T")
else
return("T set")
}
if (is(obj, "xts")) {
semantics= "(?) T x Q set"
warning(paste0("No semantic annotation available for object ",var,". Assumend semantics will be: ",semantics))
return(semantics)
}
if (is(obj, "STFDF")) {
semantics= "(?) S x T x Q set"
warning(paste0("No semantic annotation available for object ",var,". Assumend semantics will be: ",semantics))
return(semantics)
}
if (is(obj, "STF")) {
isTimeAnnotated = !is.null(attr(obj@time,"semantics"))
isSpaceAnnotated = !is.null(attr(obj@time,"semantics"))
if(isTimeAnnotated){
time_s = getObjectSemantics(obj@time)
}else{
if(length(obj@time)<=1){
time_s = "T"
}else{
time_s = "(T set)"
}
}
space_s = getObjectSemantics(obj@sp)
if(stringr::str_detect(space_s, "set$")){
space_s=paste0("(",space_s,")")
}
if(stringr::str_detect(time_s, "set$")){
time_s=paste0("(",time_s,")")
}
semantics = paste0(space_s, " x ", time_s)
if(!isTimeAnnotated || !isSpaceAnnotated){
semantics = paste("(?)",semantics)
warning(paste0("No semantic annotation available for object ",var,". Assumend semantics will be: ",semantics))
}
return(semantics)
}
if(any(sapply(list(is.data.frame, is.list, is.array, is.matrix), function(fun) {
return(fun(obj))
}))) {
return("Q set")
}
if(is.function(obj) && any(sapply(list(sum, mean, median, sd, var), function(fun) {
return(identical(fun, obj))
}))) {
return("Qstat")
}
return(paste0("(?)Class:",class(obj)))
}
#' Get default call semantics that where defined for a wrapped function
#'
#' @param x An object of type function, should be a semantic wrapper function
#'
#' @return
#' - returns a list of pre-defined semantics which are alowed for this function
#' - returns "dynamic" if the function is semantics-enabled but does not have pre-defined semantics
#' - returns NULL if the function is not semantics-enabled and does not implement pre-defined semantics
#' @export
#' @seealso \code{\link{getObjectSemantics}}, \code{\link{captureSemantics}}
#'
getDefaultCallSemantics = function(x){
if(!is.function(x)){stop("The given object x is not a function. Call semantics are an attribute only for semantics-enabled funtions.")}
if(!captureSemantics(x)){
warning("This function is not semantics-enabled. Please use 'captureSemantics' in order to create a semantic wrapper.")
return(NULL)
}
return(attr(x,"callSemantics"))
}
algebr$estimateCallSemantics <- function(args, output) {
s_output = getObjectSemantics(output)
s_inputs = sapply(args, function(arg){getObjectSemantics(arg, env = environment())})
call_semantics = paste(paste(s_inputs, collapse = " -> "), s_output, sep=" -> ")
return(call_semantics)
}
# setting call semantics is not yet supported, because the captureSemantics object cannot be accessed from the function body (I don't know how)
# setting semantics permanently has to be done by creating a new semantic wrapper
#algebr$`callSemantics<-` = function(x, semantics){
# if(!captureSemantics(x)){
# stop("This function is not semantics-enabled. Please use 'captureSemantics' in order to create a semantic wrapper.")
# }
# return(attr(x,"callSemantics") <- semantics)
#}
#annotates a given variable with the callsemantics of procedure x, standard postprocessor for function wrappers
algebr$genericProcedureAnnotator <- function(procedureName){
function(args, output, callSemantics){
output=addSemanticPedigree(obj = output, name = procedureName, procedure = callSemantics)
return(output)
}
}
#' Show whether a function is wrapped or not
#'
#' @param fun The function that is / is not wrapped
#'
#' @return TRUE/FALSE, depending on whether the function is wrapped
#' @export
#'
#' @seealso \code{\link{captureSemantics<-}}
captureSemantics <- function(fun){
return(isTRUE(attr(fun,"SemanticWrapper")))
}
#' Capture Semantics - automatically create a semantic function wrapper
#'
#' @param fun The function to be wrapped
#' @param semantics (optional) The default call semantics, given as a collection of strings
#' @param procedureName (optional) The name of the semantic procedure that corresponds to this function
#' @param validator (optional) A boolean function that validates function calls and returns TRUE/FALSE if the call is valid or invalid
#' Validators should be written according to the following schema:
#'
#' function(args, output, default_semantics, call_semantics){
#' if(TRUE) #insert requirements
#' return(TRUE)
#' else
#' return(FALSE)
#' }
#'
#' args are a named list of all input arguments of the function call.
#' output is the object that was created and annotated during the call
#' default_semantics are the default call semantics of the function, if any. If not, NA
#' call_semantics are the estimated semantics of the call, given as a single string
#'
#' @param postprocessor (optional) A postprocessor function that can be used to annotate the output of a function
#' Postprocessors should be written according to the following schema:
#'
#' function(args, output, call_semantics){
#' #annotate output
#' return(output)
#' }
#'
#' The arguments arguments of the postprocessor are similar to the validator. The postprocessor is called before the validator
#'
#' @param value Boolean value that indicates whethere or not a function should be wrapped. Wrapped functions can be 'unwrapped' by setting this value to FALSE
#'
#' @return A wrapped/unwrapped function (depending on the value-parameter)
#' @seealso \code{\link{captureSemantics}}
#' @export
#'
`captureSemantics<-` <- function(fun, semantics = NA, procedureName = "unknown", validator=NULL, postprocessor=algebr$genericProcedureAnnotator(procedureName), value){
if(is.null(semantics)){
semantics=NA
}
bool=value #shall function be wrapped or not
#returieving the function names seems not to be possible from here
#fname=as.character(substitute(fun,env = globalenv()))
fid=paste0("w_",algebr$makeid())
wFun = fun #wrapped function
if(!bool){
if(isTRUE(attr(fun,"SemanticWrapper"))){
fun= attr(fun,"wFun")
}
return(fun)
}
if(is.primitive(fun)){
stop("Primitive functions are not supported!")
}
if(isTRUE(attr(fun,"SemanticWrapper"))){
fun = attr(fun,"wFun")
fun = `captureSemantics<-`(fun, value = value, semantics = semantics)
warning("Function was already wrapped. The old wrapper is replaced.")
return(fun)
}
wrapper=function(){
#TODO: Improve wrapping behaviour, e.g. by capturing and passing the original variable names with non-standard evaluation
ls_fun=ls(envir = environment())
args = sapply(ls_fun, function(var){
#print(var)
out=list(get(var))
names(out[1]) <- var
return(out)
})
#remove semantics-argument not to be passed on
if(!"semantics" %in% names(formals(wFun))){
args=args[names(args)!="semantics"]
}
isConsistent = TRUE
output = do.call(wFun, args, envir = environment())
call_semantics = algebr$estimateCallSemantics(args, output)
if(!is.null(postprocessor)){
output = postprocessor(args, output, call_semantics)
}
#re-estimate call semantics
call_semantics = algebr$estimateCallSemantics(args, output)
isValid = TRUE
if(!is.null(validator)){
isValid = validator(args, output, semantics, call_semantics)
if(!isValid){
warning("Post-validation of function call failed!")
}
}
if(length(semantics==1) && is.na(semantics)){ ## estimate semantics from in/output
semantics = call_semantics
}else{
s=stringr::str_to_upper(stringr::str_trim(semantics))
sc=stringr::str_to_upper(stringr::str_trim(call_semantics))
if(!sc %in% s){
warning(paste("Inconsistent function semantics, given is ",call_semantics,"but expected was one of the following: ",paste(semantics,collapse=", ")))
isConsistent = FALSE
}
if(!isValid)
call_semantics = paste0(call_semantics, ": INVALID!")
else if(!isConsistent)
call_semantics = paste0(call_semantics, ": INCONSISTENT!")
}
## The call is only recordet if it occurs from the global environment (to avoid confusion if they are called internaly or recursively(?))
if(algebr$enabled() && identical(parent.frame(),globalenv())){
#cat(paste0("Call: ",call_semantics,"\n"))
callSemantics=data.frame(rec_num=algebr$rec_num, semantics=call_semantics,fid=fid, time = timestamp(quiet = TRUE), stringsAsFactors = FALSE)
algebr$callStack=rbind(algebr$callStack,callSemantics)
}
## check if output was annotated if not, apply generic annotator
rec_nums = attr(output, "semanticPedigree")$rec_num
if(algebr$enabled() && (is.null(rec_nums) || !algebr$rec_num %in% rec_nums)){
genericProcessor = algebr$genericProcedureAnnotator(procedureName)
output=genericProcessor(args, output, call_semantics)
cat("Information: using 'genericProcedureAnnotator'-function to annotate output. Consider customizing the postprocessor-function in order to apply user-defined semantics.")
#warning("Function output was not annotated with pedigree. Please consider using 'genericProcedureAnnotator'-function or write a costum postprocessor function that annotates the output object!")
}
return(output)
}
formals_w = formals(wFun)
formals_w$semantics = semantics
#print(paste(deparse(formals_w), "formals"))
formals(wrapper) <- formals_w
attr(wrapper,"SemanticWrapper") <- TRUE
if(length(semantics) == 1 && is.na(semantics)){
attr(wrapper,"callSemantics") <- "dynamic"
}else{
attr(wrapper,"callSemantics") <- semantics
}
attr(wrapper,"wFun") <- wFun
attr(wrapper,"fid") <- fid
fun=wrapper
return(fun)
}
#' Adds an entry to the semantic pedigree reccord of an object.
#'
#' Adds an entry to the semantic pedigree reccord of an object.
#' The records corresponds to the "semanticPedigree"-attribute of annotated objects.
#'
#' @param obj The object that should be annoted
#' @param attr (optional string) The name of the attribute to be annotated
#' @param name (string) - The name of the semantic procedure that created/modified the object
#' @param procedure (string) - The signature of the semantic procedure that created/modified the object
#' @param result_semantics (string) The semantics of the result that is modified
#' @param parent_semantics (otpional string) If the result_semantics refer to an attribute or subset of a larger dataset, the semantics of the enclosing dataset
#'
#' @return The annotated object
#' @export
#'
addSemanticPedigree <- function(obj, attr="ALL", name = NA, procedure, result_semantics=NULL, parent_semantics=NULL){
#print(paste("adding semantics for", substitute(obj), attr, name, procedure, result_semantics, parent_semantics))
## attr might be of length > 1. in this case create one record for each attr
if(length(attr)>1){
for(attr_n in attr){
obj <- addSemanticPedigree(obj, attr_n, name, procedure, result_semantics, parent_semantics)
}
return(obj)
}
varname = as.character(substitute(obj))
if(is.null(attr(obj, "semanticPedigree"))){
attr(obj, "semanticPedigree") <- data.frame()
}
if(attr=="ALL" && is.null(parent_semantics)){
if(!is.null(result_semantics)){
attr(obj, "semantics") <-result_semantics
}
}else{
if(!is.null(result_semantics) && attr!="ALL"){
attr(obj[[attr]], "semantics") <-result_semantics
}
if(!is.null(parent_semantics)){
attr(obj, "semantics") <-parent_semantics
}
}
if(attr=="ALL" && is.null(parent_semantics)){
result_semantics = getObjectSemantics(obj)
parent_semantics = NA
}else if(attr=="ALL" && !is.null(result_semantics)){
parent_semantics = getObjectSemantics(obj)
}else{
#print(paste("estimating semantics of ---",varname, attr))
result_semantics = getObjectSemantics(obj[[attr]])
parent_semantics = getObjectSemantics(obj)
}
command=NA
rec_num=NA
if(algebr$enabled()){
rec_num=algebr$rec_num
if(algebr$rec_num <= length(provenance_history()))
command=paste(deparse(expr=provenance_history()[[algebr$rec_num]]),collapse="\n")
#if the call is currently executed, the command is still unknown but can be interfered later from the record number
}
record = data.frame(procedureName=name, procedure=procedure, result_attribute=attr, result_semantics=result_semantics, parent_semantics = parent_semantics, rec_num=rec_num, command=command, stringsAsFactors = FALSE)
attr(obj,"semanticPedigree") <- rbind(attr(obj,"semanticPedigree"), record)
if(attr=="ALL" && !is.null(names(obj))){
for(name in names(obj)){
try({
# in some cases, the "names" don't refer to attributes but to ids or something else
# so they are not always reliable for accessing atrributes
if(is.null(attr(obj[[name]],"semanticPedigree"))){
attr(obj[[name]],"semanticPedigree") <- data.frame()
}
attr(obj[[name]],"semanticPedigree") <- rbind(attr(obj[[name]],"semanticPedigree"), record)
}, silent= TRUE)
}
}else if(attr!="ALL" && attr %in% names(obj)){
if(is.null(attr(obj[[attr]],"semanticPedigree"))){
attr(obj[[attr]],"semanticPedigree") <- data.frame()
}
attr(obj[[attr]],"semanticPedigree") <- rbind(attr(obj[[attr]],"semanticPedigree"), record)
}
return(obj)
}
algebr$findMissingPedigreeCommands <- function(ped){
for(i in 1:dim(ped)[1]){
record = ped[i,]
if(is.na(record$command) && !is.na(record$rec_num)){
if(record$rec_num <= length(provenance_history())){
record$command=paste(deparse(expr=provenance_history()[[record$rec_num]]),collapse="\n")
}
}
ped[i,] = record
}
return(ped)
}
#' Get semantic pedigree of an object
#'
#' Retrieves and prints the semantic pedigree of a record of annotated data.
#' The records corresponds to the "semanticPedigree"-attribute of annotated objects.
#'
#' @param obj The object from which pedigree should queried
#' @param attr (optional) the attribute/subset from which pedigree should be queried
#'
#' @return The semantic-pedigree record, as a data.frame, if available.
#' @export
getSemanticPedigree <- function(obj, attr="ALL"){
varname = as.character(substitute(obj))
sn = slotNames(obj)
if(!is.null(sn)){
hasPedigree = sapply(sn, function(slotName){
pedigree=attr(slot(obj, slotName),"semanticPedigree")
return(!is.null(pedigree))
} ,USE.NAMES = FALSE)
if(any(hasPedigree)){
message=paste("Information: Semantic pedigree is available for the following slot(s):", paste(sn[hasPedigree], collapse = ", "),"\n")
cat(message)
}
}
if(is.null(attr(obj,"semanticPedigree")))
return(NULL)
out=NULL
if(attr=="ALL"){
out = attr(obj,"semanticPedigree")
}else if(attr %in% names(obj)){
out = attr(obj[[attr]],"semanticPedigree")
}else{
out = attr(obj,"semanticPedigree")
sel1 = out$attr == "ALL"
sel2= out$att == attr
sel = sel1 | sel2 #select all records refering to either the specified attribute or "ALL" attributes
out = out[sel,]
}
if(!is.null(out))
out = algebr$findMissingPedigreeCommands(out)
return(out)
}
###
# Utility functions
####
algebr$unquote = function(str){
if(stringr::str_detect(str,"^`.*`$"))
return(stringr::str_sub(str, 2,-2))
else
return(str)
}
algebr$enquote = function(str){
if(!stringr::str_detect(str,"^`.*`$") && stringr::str_detect(str,".*<-")){
str=paste0("`",str,"`")
return(str)
}else{
return(str)
}
}
algebr$getChunks = function(g, cmd){
return(g$chunks[[as.character(cmd)]]$code)
}
# visualy compare if each vertex of a graph has a matching list of edges (required for GraphViz)
algebr$compareVE = function(g){
out=list(nodes=sort(g$V), edges_names= sort(names(g$E)))
count=out$nodes[summary(as.factor(out$nodes))>1]
if(length(count)>0)
cat("ERROR: Certain nodes occur more than once", count,"\n")
sel=which(!out$nodes %in% out$edges_names)
# print(out$nodes %in% out$edges_names)
if(length(sel)>0)
cat(paste("Error: These nodes are not matched by the lists of edges: ",paste(out$nodes[sel], collapse = ", ")),"\n")
sel=which(!out$edges_names %in% out$nodes)
#print(out$edges_names %in% out$nodes)
if(length(sel)>0)
cat(paste("Error: These nodes do not occur in the lists of nodes: ",paste(out$edge_names[sel], collapse=", ")),"\n")
cat(paste())
cat(paste("Lists of edges: ",length(out$edges_names),"\n"))
cat(paste("Number of vertexes: ",length(out$nodes),"\n"))
return(out)
}
#creates a random id of 6 digits using letters and numbers
algebr$makeid=function(){
range = c(LETTERS, 0:9,letters)
rnds=runif(n = 6,min = 1,max = length(range))
paste(sapply(rnds, function(rnd){
range[rnd]
}),collapse = "")
}
algebr$funFromString = function(string_var, env = globalenv()){
# print(paste("funFromString:",string_var))
fun_obj = try(get(string_var,envir = env), silent = TRUE)
if(is.function(fun_obj))
return(fun_obj)
#in some cases, the following method works better when the other fails (for instance, when the function contained in a subset, i.e. parent$function())
if(stringr::str_detect(string_var,pattern = "<-")){
string_var=paste0("`",string_var,"`")
}
fun_obj=eval(parse(text=as.character(as.expression(string_var))), envir = env)
return(fun_obj)
}
##this function is created as a workaround for Issue https://github.com/duncantl/CodeDepends/issues/4
## in CodeDepends. Expressions manipulated with this function are not correct, but it will prevents getInputs from returning wrong inputs
algebr$removeTheAt = function(expr){
if(length(expr)==1){
return(expr)
}
eList = as.list(expr)
if(eList[[1]]==as.symbol("@")){
return(eList[[2]])
}
outList=sapply(eList, algebr$removeTheAt)
outExp = as.call(outList)
return(outExp)
}
#' Get functional type of a spatio-temporal dataset
#'
#' @param obj The object from which the functional type shall be retrieved
#' @param attr The attribute from which the functional type shall be retrieved
#'
#' @return The object that is annotated
#' @seealso \code{\link{functionalType<-}}
#' @export
functionalType <- function(obj,attr="ALL"){
out = NULL
if(attr!="ALL" && !is.null(attr) && !is.na(attr) && attr %in% names(obj)){
out=attr(obj[[attr]], "functionalType")
}
if(is.null(out))
out=attr(obj, "functionalType")
return(out)
}
#' Assign a functional type to a spatio-temporal dataset
#'
#' @param obj The object that is annotated
#' @param attr (optional string) the attribute that is annotated
#' @param value The annotated object
#' @param parent indicates whether the generationType is mapped to a parent dataset (using gendata or not)
#'
#' @return Returns the object annotated with semantic pedigree
#' @export
#'
#' @seealso \code{\link{functionalType}}
`functionalType<-` <- function(obj,attr="ALL",value, parent = TRUE){
parent_semantics = NULL
if(value == "SField"){
if(parent)
parent_semantics = "S x Q set"
obj=addSemanticPedigree(obj,attr = attr, name = value, procedure = "S -> Q",result_semantics = "Q set", parent_semantics = parent_semantics)
}else if(value == "Field"){
if(parent)
parent_semantics = "S x T x Q set"
obj=addSemanticPedigree(obj,attr = attr, name = value, procedure = "S x T -> Q",result_semantics = "Q set", parent_semantics = parent_semantics)
} else if(value == "TField"){
if(parent)
parent_semantics = "T x Q set"
obj=addSemanticPedigree(obj,attr = attr, name = value, procedure = "T -> Q",result_semantics = "Q set", parent_semantics = parent_semantics)
}else if(value == "InvField"){
if(parent)
parent_semantics = "Q x Occurs set"
obj=addSemanticPedigree(obj,attr = attr, name = value, procedure = "Q -> Occurs",result_semantics = "Q set", parent_semantics = parent_semantics)
}else if(value == "SInvField"){
if(parent)
parent_semantics = "Q x R set"
obj=addSemanticPedigree(obj,attr = attr, name = value, procedure = "Q -> R",result_semantics = "R set", parent_semantics = parent_semantics)
} else if(value == "TInvField"){
if(parent)
parent_semantics = "Q x T set"
obj=addSemanticPedigree(obj,attr = attr, name = value, procedure = "Q -> T",result_semantics = "T set", parent_semantics = parent_semantics)
} else if(value == "Lattice"){
if(parent)
parent_semantics = "R x I x Q set"
obj=addSemanticPedigree(obj,attr = attr, name = value, procedure = "R -> I -> Q",result_semantics = "Q set", parent_semantics = parent_semantics)
} else if(value == "SLattice"){
if(parent)
parent_semantics = "R x Q set"
obj=addSemanticPedigree(obj,attr = attr, name = value, procedure = "R -> Q",result_semantics = "Q set", parent_semantics = parent_semantics)
} else if(value == "TLattice"){
if(parent)
parent_semantics = "I x Q set"
obj=addSemanticPedigree(obj,attr = attr, name = value, procedure = "I -> Q",result_semantics = "Q set", parent_semantics = parent_semantics)
} else if(value == "Event"){
if(parent)
parent_semantics = "D x S x T set"
obj=addSemanticPedigree(obj,attr = attr, name = value, procedure = "D -> S x T",result_semantics = "S x T set", parent_semantics = parent_semantics)
} else if(value == "SEvent"){
if(parent)
parent_semantics = "D x S set"
obj=addSemanticPedigree(obj,attr = attr, name = value, procedure = "D -> S",result_semantics = "S set", parent_semantics = parent_semantics)
} else if(value == "TEvent"){
if(parent)
parent_semantics = "D x T set"
obj=addSemanticPedigree(obj,attr = attr, name = value, procedure = "D -> T",result_semantics = "T set", parent_semantics = parent_semantics)
} else if(value == "MarkedEvent"){
if(parent)
parent_semantics = "D x S x T x Q set"
obj=addSemanticPedigree(obj,attr = attr, name = value, procedure = "D -> S x T x Q",result_semantics = "S x T x Q set", parent_semantics = parent_semantics)
} else if(value == "SMarkedEvent"){
if(parent)
parent_semantics = "D x S x Q set"
obj=addSemanticPedigree(obj,attr = attr, name = value, procedure = "D -> S x Q",result_semantics = "S x Q set", parent_semantics = parent_semantics)
} else
stop("Functional type name is unknown. Please consider adding the semantics manually using addSemanticPedigree
and setting the attribute functionalType, if you refer to a valid spatio-temporal generation Type")
#### TODO: Komplete list of Events, add objects and Trajectories
if(attr!="ALL" && !is.null(attr) && !is.na(attr) && attr %in% names(obj)){
attr(obj[[attr]], "functionalType") <- value
#print(paste0("Setting functional type to object of class", class(obj), " type: ",value," attribute: ",attr))
}else{
attr(obj, "functionalType") <- value
#print(paste0("Setting functional type to object of class", class(obj), " type: ",value))
}
return(obj)
}
#captureSemantics(`functionalType<-`, postprocessor=NULL) <- TRUE
algebr$isAlreadyAnnotated = function(obj){
#return(FALSE)
#look if object is already annoted
rec_nums = attr(obj, "semanticPedigree")$rec_num
isAnnotated= algebr$enabled() && !is.null(rec_nums) && algebr$rec_num %in% rec_nums
return(isAnnotated)
}
algebr$findExpressionSemantics = function(exp, exp_id, cmd, g){
in_vec = algebr$findDependencySemantics(exp, exp$inputs, cmd, g)
in_vec = paste(in_vec, collapse = " -> ")
out_sem = algebr$findDependencySemantics(exp, exp$outputs, cmd, g)
out_sem_str = out_sem
if(length(out_sem)>1){
out_sem_str = paste(paste0("(",out_sem,")"), collapse = " x ")
}
exp$semantics=paste0(in_vec, " -> ", out_sem_str)
label = g$nAttrs$label[[exp_id]]
label = paste0(label,"\\n[",algebr$toSemanticLabel(exp$semantics),"]")
g$nAttrs$label[[exp_id]] <- label
out_iids = NULL
if(stringr::str_detect(exp_id,"^expr_")){
g$exps[[exp_id]] <- exp
g$exps[[exp_id]]$out_sem = out_sem_str
out_iids = unlist(g$exps[[exp_id]]$outputs)
}else if(stringr::str_detect(exp_id,"^fcall_")){
g$fCalls[[exp_id]] <- exp
g$fCalls[[exp_id]]$out_sem = out_sem_str
out_iids = unlist(g$fCalls[[exp_id]]$outputs)
}else{
stop(paste0("Could not determine whether input is a simple expression or a function call: ", exp_id))
}
# print(out_sem)
# print(out_iids)
if(is.null(out_iids))
return(g)
# print(paste(out_iids,out_sem))
mapply(function(out_iid, out_sem){
# print("TEST1")
var = algebr$varFromIID(out_iid)
attr=algebr$findAttr(var)
# print("TEST2")
tryCatch({
#look if call is function call
fname = cmd[[1]]
if (is.function(eval(fname, envir = globalenv()))) {
name = deparse(fname)
} else
name = "expression"
}, error = function(e) {
name = "expression"
warning(e)
})
#print("TEST3")
if (is.na(attr)) {
obj = eval(parse(text = algebr$enquote(var)), envir = globalenv())
if(algebr$isAlreadyAnnotated(obj))
return(g)
#print(paste("!!! ",class(obj), substitute(obj,env = globalenv()),name,exp$semantics))
obj = addSemanticPedigree(obj = obj, name = name, procedure = exp$semantics, result_semantics = out_sem)
assign(var, obj, envir = globalenv())
#print(paste0("Assigninged semantic pedigree to ",var))
} else{
#find parent var
parent_var = parse(text = algebr$enquote(var))[[1]][[2]]
obj = eval(parent_var, envir = globalenv())
if(algebr$isAlreadyAnnotated(obj))
return(g)
obj = addSemanticPedigree(obj = obj, attr = attr, name = name, procedure = exp$semantics, result_semantics = out_sem)
assign(deparse(parent_var), obj, envir = globalenv())
#print(paste0("Assigninged semantic pedigree to ",parent_var))
}
return(g)
}, out_iid=out_iids, out_sem=out_sem)
return(g)
}
algebr$findDependencySemantics <- function(exp, dep_nodes, cmd, g) {
if(!is.null(dep_nodes)&& length(dep_nodes)>0){
sem_vec = sapply(dep_nodes, function(dep_id){
record=algebr$findInstanceRecord(dep_id)
if(!is.null(record)){
return(record$semantics) #should always return a value normally
} else if(dep_id %in% names(g$fCalls)){
out_sem=g$fCalls[[dep_id]]$out_sem
if(!is.null(out_sem))
return(out_sem)
else
return("?")
}else if(dep_id %in% names(g$exps)){
out_sem=g$exps[[dep_id]]$out_sem
if(!is.null(out_sem))
return(out_sem)
else
return("?")
}else if(stringr::str_detect(dep_id,"^lt_")){
value=g$nAttrs$label[[dep_id]] ##may not be the most stable solution... TODO: save literal values somewhere else
return(getObjectSemantics(value,isLiteral = TRUE))
}else{
return("?")
}
})
return(sem_vec)
}else{
#print(paste("No dep nodes for ", as.character(as.expression(substitute(dep_nodes)))))
in_sem="?"
tryCatch({value_xyz = eval(cmd)
in_sem=getObjectSemantics(value_xyz)},
error= function(e){
warning(paste("semantics of expression ", exp, "could not be evaluated"))
in_sem="?"})
return(in_sem)
}
}
algebr$estimateMissingSemantics = function(g){
#estimate missing semantics of expressions
#finds semantics of inputs/outputs (dependend nodes, "dep_nodes") for an expression
mapply(function(exp, exp_id){
if(is.na(exp$semantics)){
cmd=parse(text=exp$exp)[[1]]
g <<- algebr$findExpressionSemantics(exp, exp_id, cmd,g = g)
return()
}
return()
}, exp=g$exps, exp_id=names(g$exps))
#estimate missing semantics of function calls
mapply(function(fCall, call_id){
if(is.na(fCall$semantics)){
g <<- algebr$findExpressionSemantics(fCall, call_id, fCall$command,g= g)
#if(stringr::str_detect(node_id, "^lt_")){
# assume it is a literal, try to parse label to value #TODO make this more "formal"
return()
}
return()
}, fCall=g$fCalls, call_id=names(g$fCalls))
#print(g$exps)
#print(g$fCalls)
return(g)
}
#test=algebr$estimateMissingSemantics(algebr$scriptGraph)
#test$exps$expr_lkjnmQ
#test$fCalls$fcall_crrNKR
algebr$varFromIID <-function(iid){
var=stringr::str_replace(iid, "~\\d*$","")
if(!is.null(getVersions(var)))
return(var)
else
return(NULL)
}
algebr$findInstanceRecord = function(node_id){
var=algebr$varFromIID(node_id)
if(is.null(var))
return(NULL)
hist = getVersions(var)
if(!is.null(hist)){
sel=which(hist$IID == node_id)
if(length(sel)==1)
return(hist[sel,])
else {
return(NULL)}
}else{
return(NULL)
}
}
#for testing
# exp=quote(meuse[[c(1,1)]]);exp
# algebr$findAttr(exp)
# exp=quote(meuse[[1]]);exp
# algebr$findAttr(exp)
# exp=quote(meuse["zinc"]);exp
# algebr$findAttr(exp)
# exp=quote(meuse[1,1]);exp
# algebr$findAttr(exp)
# exp=quote(meuse$zinc);exp
# algebr$findAttr(exp)
# exp=quote(meuse[c("zinc", "copper")]);exp
# algebr$findAttr(exp)
# exp=quote(meuse[c(1, 2)]);exp
# algebr$findAttr(exp)
# exp=quote(intZincPointData@observations[1]);exp
# algebr$findAttr(exp)
algebr$findAttr = function(exp){
if(is.character(exp))
exp=parse(text=exp)[[1]]
expl=as.list(exp);exp
if (length(expl) >= 3) {
#inputs may be e.g. meuse$zinc, meuse[["zinc"]], meuse[[1]], meuse[[c(1,1)]]
if (expl[[1]] == "[[") {
par = expl[[3]]
if (is.character(par)) {
attrN = par
} else if (is.numeric(par)) {
str = paste0("names(", deparse(expl[[2]]), "[", par, "])")
attrN = eval(parse(text = str), envir = globalenv())
} else if (algebr$containsOnlyPrimitives(par)) {
pval = eval(par, envir = globalenv())
pval
if (is.numeric(pval) && length(pval) > 0) {
str = paste0("names(", deparse(expl[[2]]), "[", pval[1], "])")
attrN = eval(parse(text = str), envir = globalenv())
} else{
attrN = NA
}
} else{
attrN = NA
}
} else if (expl[[1]] == "$") {
if (is.name(expl[[3]])){
attrN = as.character(expl[[3]])
} else{
attrN = NA
}
} else if (expl[[1]] == "[") {
if(length(expl)==4){
par= expl[[4]]
}else
par= expl[[3]]
if(is.numeric(par)){
str = paste0("names(", deparse(expl[[2]]), "[", par, "])")
attrN = eval(parse(text = str), envir = globalenv())
}else if(is.character(par)){
attrN = par
}else
if(algebr$containsOnlyPrimitives(par)){
pval = eval(par, envir = globalenv())
if(is.character(pval)){
#character selector of multiple columns
attrN = pval
}else if(is.numeric(pval) || is.logical(pval)){
str = paste0("names(", deparse(expl[[2]]), "[", deparse(par), "])")
attrN = eval(parse(text = str), envir = globalenv())
}
}
} else{
attrN = NA
}
return(attrN)
}
return(NA)
}
algebr$toSemanticLabel = function(str_sem){
out = str_sem
if(stringr::str_detect(str_sem, " -> "))
out = stringr::str_replace_all(string = out, pattern = " -> ",replacement = " ⇒ ")
if(stringr::str_detect(str_sem, " x "))
out = stringr::str_replace_all(string = out, pattern = " x ",replacement = " ⨉ ")
#print(out)
#out=paste0(out,"TEST")
return(out)
}
#' Retrieves packages internals
#'
#' @return An environment in which all internal variables and functions of the SpatialSemantics-package are saved
#' @export
spsem_internals <- function(){
return(algebr)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.