own_error <- function (message, custom_code, call = sys.call(-1), ...) {
structure(
class = c("own_error", "error", "condition"),
list(message = message, call = call, custom_code = custom_code, ...)
)
}
parse <- function (text, line_no1 = 1, line_non = NULL,
variables = NULL, functions = NULL, hypotheses = NULL,
interactive = FALSE) {
# Prepare
if (is.null(variables)) {
variables <- data.frame(matrix(ncol = 8, nrow = 0));
colnames(variables) <- c("id", "name", "precursors", "columns", "origin", "type", "value", "generation");
}
if (is.null(functions)) {
functions <- data.frame(matrix(ncol = 8, nrow = 0));
colnames(functions) <- c("id", "name", "lines", "signature", "packages", "arguments", "depth", "breakpoint");
}
if (is.null(hypotheses)) {
hypotheses <- data.frame(matrix(ncol = 6, nrow = 0));
colnames(hypotheses) <- c("id", "name", "columns", "functions", "models", "formulas");
}
env <<- new.env()
# Run loop
if (is.null(line_non))
line_non <- length(text)
while (line_no1 < line_non + 1) {
line_no2 <- line_no1;
break_ <- FALSE;
while (!break_ && line_no2 < line_non + 1) {
tryCatch(
expr = {
line <- paste0(text[line_no1:line_no2], collapse="\n");
row <- base::parse(text=line);
if (!is.language(row) || length(row) == 0) {
break;
}
for (exp in row) {
before_funcs <- nrow(functions);
c(variables, functions, hypotheses) %<-% recursion(exp, variables, functions, hypotheses)
if (nrow(functions) > before_funcs) {
functions[(before_funcs + 1):nrow(functions), ]$lines <- list(list(line_no1, line_no2));
functions[nrow(functions), ]$signature <- line;
}
}
break;
},
warning = function (w) {
if (strict) {
stop(w$message);
} else {
warning(w);
break_ <<- TRUE;
}
},
error = function (e) {
if (grepl("unexpected end of input", e$message) ||
grepl("INCOMPLETE_STRING", e$message)) {
line_no2 <<- line_no2 + 1;
} else {
e$lines <- c(line_no1, line_no2)
stop(e); # error rethrow
}
},
finally = function (...) {}
)
}
if (interactive) {
incProgress(line_no2 - line_no1 + 1)
}
line_no1 <- line_no2 + 1;
}
list(variables, functions, hypotheses);
}
parse_diff <- function (text, old_text, vv, ff, hh) {
text_diff <- ses(old_text, text)
for (diff in text_diff) {
# Introspect the diff data
diff_type <- regmatches(diff, regexpr("[acd]", diff))
diff_pair <- lapply(
strsplit(diff, "[acd]"),
function (line) lapply(
line,
function (part) as.integer(if (grepl(",", part)) strsplit(part, ",")[[1]] else c(part, part))
)
)[[1]]
c(first_line, last_line) %<-% diff_pair[[1]]
c(new_first_line, new_last_line) %<-% diff_pair[[2]]
c(offset_before, offset_after) %<-% c(0, 0)
diff_size <- (new_last_line - new_first_line + (diff_type == "a")) - (last_line - first_line + (diff_type == "d"))
# Split the functions to before/within/after the change
prior_functions <- subset(ff, sapply(lines, function (lines) lines[[2]]) < first_line)
changed_functions <- subset(ff, sapply(lines, function (lines) first_line <= lines[[2]] && lines[[1]] <= last_line))
later_functions <- subset(ff, sapply(lines, function (lines) lines[[1]]) > last_line)
# Process the affected content
later_functions$lines <- lapply(later_functions$lines, function (f) as.list(unlist(f) + diff_size))
if (nrow(changed_functions)) {
offset_before <- changed_functions[1, ]$lines[[1]][[1]] - first_line
offset_after <- changed_functions[nrow(changed_functions), ]$lines[[1]][[2]] - last_line
# For possible premature return
first_line <- changed_functions[1, ]$lines[[1]][[1]]
}
new_first_line <- new_first_line - offset_before
new_last_line <- new_last_line - offset_after
clean_operation <- offset_before == 0 && offset_after == 0 # Full-function addition or deletion
if (diff_type == "a" && clean_operation) {
prior_functions <- rbind(prior_functions, changed_functions)
changed_functions <- subset(changed_functions, FALSE)
}
prior_variables <- subset(vv, type == "column" | origin %in% prior_functions$id)
prior_variables <- subset(prior_variables, type != "column" | id %in% unlist(prior_variables$columns))
withProgress(
expr = c(new_variables, new_functions, new_hypotheses) %<-% parse(text, new_first_line, new_last_line,
interactive = TRUE,
variables=prior_variables, hypotheses=hh),
min = new_first_line,
max = new_last_line,
message = "Refreshing"
)
if (diff_type == "d" && clean_operation) {
new_functions <- subset(new_functions, FALSE)
}
new_hypotheses <- subset(new_hypotheses, sapply(new_hypotheses$functions, function (f) length(intersect(f, new_functions$id))) > 0)
ff <- rbind(prior_functions, new_functions, later_functions)
# If no acutal functions have been affected, just continue onward
if (!nrow(changed_functions) && !nrow(new_functions))
next
# Check if the code continuity has been preserved: i.e. if variable creation or modification ocasions can be translated
new_created_variables <- subset(new_variables, origin %in% new_functions$id)
new_modified_variables <- subset(new_variables, id %in% new_functions$breakpoint)
old_created_variables <- subset(vv, origin %in% changed_functions$id)
old_modified_variables <- subset(vv, id %in% changed_functions$breakpoint)
if (nrow(old_created_variables) != nrow(new_created_variables) ||
!setequal(old_created_variables$name, new_created_variables$name) ||
nrow(old_modified_variables) != nrow(new_modified_variables) ||
!setequal(old_modified_variables$id, new_modified_variables$id)) {
# I.e. continuity has been disrupted
hh$functions <- lapply(hh$functions, function (f) if (length(f)) as.list(intersect(f, prior_functions$id)) else f)
hh$models <- lapply(hh$models, function (m) if (length(m)) as.list(intersect(m, prior_variables$id)) else m)
hh$formulas <- lapply(hh$formulas, function (f) if (length(f)) as.list(intersect(f, prior_variables$id)) else f)
hh <- subset(hh, lapply(hh$functions, length) > 0)
return(list(prior_variables, prior_functions, hh, TRUE, first_line))
}
# Replace the variable names across the collections
if (nrow(new_created_variables)) {
new_created_variables[match(old_created_variables$name, new_created_variables$name), ]$columns <- Map(
function (cc1, cc2) as.list(union(cc1, cc2)),
new_created_variables[match(old_created_variables$name, new_created_variables$name), ]$columns,
old_created_variables$columns
)
vv[vv$id %in% old_created_variables$id, ] <- new_created_variables[match(old_created_variables$name, new_created_variables$name), ]
variable_id_mapping <- new_created_variables[match(old_created_variables$name, new_created_variables$name), ]$id
names(variable_id_mapping) <- old_created_variables$id
ff$arguments <- lapply(ff$arguments, function (args) if (length(args)) lapply(args, function (arg) if (arg %in% names(variable_id_mapping)) variable_id_mapping[[arg]] else arg) else args)
ff$breakpoint <- lapply(ff$breakpoint, function (b) if (b %in% names(variable_id_mapping)) variable_id_mapping[[b]] else b)
hh$models <- lapply(hh$models, function (m) if (length(m)) lapply(m, function (m) if (m %in% names(variable_id_mapping)) variable_id_mapping[[m]] else m) else m)
hh$formulas <- lapply(hh$formulas, function (f) if (length(f)) lapply(f, function (f) if (f %in% names(variable_id_mapping)) variable_id_mapping[[f]] else f) else f)
}
hh$functions <- lapply(hh$functions, function (f) if (length(f)) as.list(intersect(f, ff$id)) else f)
hh <- subset(hh, lapply(hh$functions, length) > 0)
updated_hypotheses <- subset(new_hypotheses, id %in% hh$id)
if (nrow(updated_hypotheses)) {
updated_hypotheses$functions <- Map(function (f1, f2) as.list(union(f1, f2)),
updated_hypotheses$functions,
hh[match(updated_hypotheses$id, hh$id), ]$functions)
updated_hypotheses$models <- Map(function (m1, m2) as.list(union(m1, m2)),
updated_hypotheses$models,
hh[match(updated_hypotheses$id, hh$id), ]$models)
updated_hypotheses$formulas <- Map(function (f1, f2) as.list(union(f1, f2)),
updated_hypotheses$formulas,
hh[match(updated_hypotheses$id, hh$id), ]$formulas)
hh[match(updated_hypotheses$id, hh$id), ] <- updated_hypotheses
}
hh <- rbind(hh, subset(new_hypotheses, !id %in% hh$id))
}
list(vv, ff, hh, FALSE, first_line)
}
find_hypothesis <- function (exp, hypotheses, variables, add = FALSE) {
functions <- data.frame(matrix(ncol = 8, nrow = 0));
colnames(functions) <- c("id", "name", "lines", "signature", "packages", "arguments", "depth", "breakpoint");
c(variables, functions, h) %<-% recursion(exp[[length(exp)]], variables, functions, data.frame(),
addition_mode = TRUE)
if (nrow(functions)) {
con_column_ids <- lapply(collect_columns(variables, functions),
function (column) column$id)
} else if (is.name(exp[[length(exp)]])) {
c(con_column_index, variables) %<-% find_variable(as.character(exp[[length(exp)]]), variables,
add = TRUE,
type_constraint = c("column", "constant"),
check_shadowing = FALSE)
con_column_ids <- list(variables[con_column_index, ]$id)
}
if (length(exp) == 3) {
# TODO: detect lsmeans/emmeans contrast functions?
name <- paste0(as.character(exp)[c(2, 1, 3)], collapse = ' ');
if (is.name(exp[[2]])) {
c(dep_column_index, variables) %<-% find_variable(as.character(exp[[2]]), variables,
add = TRUE,
type_constraint = c("column", "constant"),
check_shadowing = FALSE);
dep_column_id <- variables[dep_column_index, ]$id;
} else {
before_funcs <- nrow(functions);
c(variables, functions, h) %<-% recursion(exp[[2]], variables, functions, data.frame(),
addition_mode = TRUE)
dep_columns <- collect_columns(variables, functions[before_funcs:nrow(functions), ]);
dep_column_id <- dep_columns[[length(dep_columns)]]$id;
}
columns <- list(
dependant = dep_column_id,
control = con_column_ids
)
} else if (length(exp) == 2) {
name <- paste0(exp, collapse = '');
columns <- list(
dependant = NULL,
control = con_column_ids
)
} else {
name <- '';
columns <- NULL;
}
index <- match(name, hypotheses$name);
if (is.na(index)) {
hypothesis <- NULL;
index <- nrow(hypotheses) + 1
} else {
hypothesis <- hypotheses[index, ]
}
if (index > nrow(hypotheses) && add) {
hypothesis <- list(
id = paste0(c("h", UUIDgenerate()), collapse = "-"),
name = name,
columns = list(columns),
functions = list(list()),
models = list(list()),
formulas = list(list())
);
hypotheses[index,] <- hypothesis;
}
list(index, hypotheses, variables);
}
collect_columns <- function (variables, functions) {
array <- list();
for (var_index in 1:nrow(variables)) {
variable <- as.list(variables[var_index, ]);
for (func_args in functions$arguments) {
if (variable$id %in% func_args) {
if (variable$type == "column") {
array[[length(array) + 1]] <- variable;
} else if (variable$type == "constant") {
# TODO: somehow set the "column" type to the variable?
# TODO: if yes: which data variable it should be assigned to?
# or can it just wait until later?
# or it won't be any different from the old workflow with subsequent update_hypothesis call?
array[[length(array) + 1]] <- variable;
} else if (variable$type == "formula") {
# TODO: `col1 ~ col2 ~ cols` case?
}
}
}
}
array;
}
find_variable <- function (name, variables, add = FALSE, force = FALSE, type_constraint = NULL,
check_shadowing = TRUE) {
var <- NULL;
index <- nrow(variables) + 1;
if (!force) {
packages <- find(name);
# If the name can be found in the packages
is_function_name <- !!length(packages);
if (is_function_name && eval_) {
# If the name can _only_ be found in the packages
is_function_name <- !length(setdiff(grep("package:", packages), 1:length(packages)));
}
if (nrow(variables) > 0)
for (i in 1:nrow(variables)) {
old_var <- as.list(variables[i, ]);
if (old_var$name == name && (is.null(type_constraint) || any(old_var$type == type_constraint))) {
is_function_name <- FALSE; # having the variable by the name overrules the function name search (probably)
var <- old_var;
index <- i;
}
}
if (is_function_name && check_shadowing) {
return(list(index, variables))
}
}
if (index > nrow(variables)) {
if (!add) {
e <- own_error(paste0(c("The variable `", name, "` has never occured before, but addition has not been granted"), collapse=""),
custom_code = 1)
stop(e)
}
var <- list(
id = paste0(c("v", UUIDgenerate()), collapse = "-"),
name = name,
precursors = list(list()),
columns = list(list()),
origin = NA,
type = "constant",
value = NA,
generation = 0
);
variables[index, ] <- var;
}
list(index, variables);
}
argument_recursion <- function (args, func,
variables, functions, hypotheses,
depth, addition_mode = FALSE) {
for (arg in args) {
if (missing(arg)) {
} else if (is.atomic(arg)) {
func$arguments <- append(func$arguments, arg)
} else if (is.name(arg)) {
# NB: Technically, thyCatch wrapping here is a crutch, however it does not seem possbile to ensure that
# the existence of the variables that are being called within in some of the cases,
# even though the code remains valid overall
tryCatch(
expr = {
c(var_index, variables) %<-% find_variable(as.character(arg), variables,
add = addition_mode);
if (variables[var_index, ]$type == "constant" && !is.na(variables[var_index, ]$value)) {
func$arguments <- append(func$arguments, variables[var_index, ]$value); # value is (supposed to be) atomic
} else if (variables[var_index, ]$type == "formula") {
func$arguments <- append(func$arguments, variables[var_index, ]$value); # value is a hypothesis id
} else {
func$arguments <- append(func$arguments, variables[var_index, ]$id)
}
},
warning = function (...) {},
error = function (...) {},
finally = function (...) {}
)
} else if (is.call(arg) && identical(arg[[1]], quote(`~`))) {
c(hyp_index, hypotheses, variables) %<-% find_hypothesis(arg, hypotheses, variables, add = TRUE);
func$arguments <- append(func$arguments, hypotheses[hyp_index,]$id);
hypotheses[hyp_index,]$functions[[1]] = append(hypotheses[hyp_index,]$functions[[1]], func$id);
} else if (is.call(arg) && (identical(arg[[1]], quote(`c`)) || identical(arg[[1]], quote(`list`)))) {
c(func, variables, functions, hypotheses) %<-% argument_recursion(as.list(arg)[2:length(arg)], func,
variables, functions, hypotheses,
depth, addition_mode = addition_mode)
} else {
before_funcs <- nrow(functions);
c(variables, functions, hypotheses) %<-% recursion(arg, variables, functions, hypotheses,
addition_mode = addition_mode, depth = depth);
if (nrow(functions) != before_funcs) {
func$arguments <- append(func$arguments, functions[nrow(functions), ]$id);
}
}
}
list(func, variables, functions, hypotheses);
}
hypothesis_subroutine <- function (exp, variables, functions, hypotheses,
addition_mode = FALSE, depth = 0) {
# data[data$col1 == value, ]$col2
if (identical(exp[[1]], quote(`$`))) {
data_name <- as.character(exp[[2]][[2]]);
col2_name <- as.character(exp[[3]]);
lookup <- exp[[2]][[3]];
# data[data$col1 == value, "col2"]
} else if (identical(exp[[1]], quote(`[`)) && is.character(exp[[4]])) {
data_name <- as.character(exp[[2]]);
col2_name <- exp[[4]];
lookup <- exp[[3]];
} else {
return(list(variables, functions, hypotheses));
}
# data
c(var_index, variables) %<-% find_variable(data_name, variables,
add = TRUE,
type_constraint = "data");
do_force_add <- !length(variables[var_index, ]$columns[[1]]);
# col2
c(col_index, variables) %<-% find_variable(col2_name, variables,
add = TRUE,
force = do_force_add,
type_constraint = "column",
check_shadowing = FALSE);
variables[col_index, ]$type <- "column";
if (variables[col_index, ]$generation == 0) {
variables[col_index, ]$generation <- variables[var_index, ]$generation + 1;
}
if (!(variables[col_index, ]$id %in% variables[var_index, ]$columns[[1]])) {
variables[var_index, ]$columns[[1]] <- append(variables[var_index, ]$columns[[1]], variables[col_index, ]$id);
}
# data$col1 == value
before_funcs <- nrow(functions);
c(variables, f, hypotheses) %<-% recursion(lookup, variables, functions, hypotheses,
addition_mode = addition_mode, depth = depth);
columns <- list()
for (func_args in f[(before_funcs + 1):nrow(f), ]$arguments) {
for (arg in func_args) {
for (var_index_ in 1:nrow(variables)) {
var <- as.list(variables[var_index_, ]);
if (var$id == arg && var$type == "column") {
columns[[length(columns) + 1]] <- var;
break;
}
}
}
}
formula <- paste0(
c(
variables[col_index, ]$name,
'~',
paste0(
lapply(columns, function (var) var$name),
collapse = ' + '
)
),
collapse = ' '
);
formula <- base::parse(text=formula)[[1]];
c(hyp_index, hypotheses, variables) %<-% find_hypothesis(formula, hypotheses, variables,
add = TRUE);
func <- list(
id = paste0(c("f", UUIDgenerate()), collapse = "-"),
name = paste0(as.character(exp)[c(2, 1, 3)], collapse = ""),
lines = NA,
signature = NA,
packages = NA,
arguments = list(append(list(variables[var_index, ]$id, variables[col_index, ]$id), lapply(columns, function (var) var$id))),
depth = depth - addition_mode,
breakpoint = NA
);
hypotheses[hyp_index, ]$functions[[1]] <- append(hypotheses[hyp_index, ]$functions[[1]], func$id);
functions[nrow(functions) + 1, ] <- func;
list(variables, functions, hypotheses);
}
recursion <- function (exp, variables, functions = NULL, hypotheses,
addition_mode = FALSE, force_as_function = FALSE,
depth = 0) {
if (is.null(functions)) {
functions <- data.frame(matrix(ncol = 8, nrow = 0))
colnames(functions) <- c("id", "name", "lines", "signature", "packages", "arguments", "depth", "breakpoint")
}
depth <- depth + 1;
# Is assignment line
if (is.call(exp) && (identical(exp[[1]], quote(`<-`)) || identical(exp[[1]], quote(`=`)))) {
is_mutation <- FALSE;
if (is.call(exp[[2]])) {
is_mutation <- TRUE;
c(variables, tmp, hypotheses) %<-% recursion(exp[[2]], variables, NULL, hypotheses,
addition_mode = TRUE, depth = depth)
var_name <- exp[[2]];
while (!is.name(var_name)) {
var_name <- var_name[[2]];
}
var_name <- as.character(var_name);
} else {
var_name <- as.character(exp[[2]]);
}
# Find the precursors and invoked functions
c(variables, new_functions, hypotheses) %<-% recursion(exp[[3]], variables, NULL, hypotheses,
addition_mode = TRUE, depth = depth)
var_geneneration <- 0;
precursor_variable_ids <- list();
if (nrow(variables)) {
for (var_index in 1:nrow(variables)) {
variable <- as.list(variables[var_index, ]);
for (func_args in new_functions$arguments) {
if (variable$id %in% func_args) {
if (variable$type == "column") {
# TODO: do smth?
} else if (variable$name == var_name) {
is_mutation <- TRUE;
} else if (!(variable$id %in% precursor_variable_ids)) {
if (var_geneneration < variable$generation + 1) {
var_geneneration <- variable$generation + 1;
}
precursor_variable_ids <- append(precursor_variable_ids, variable$id);
}
}
}
}
}
# Get or create the variable
c(var_index, variables) %<-% find_variable(var_name, variables,
add = TRUE,
force = !is_mutation,
type_constraint = c("data", "model", "constant"));
last_function <- new_functions[nrow(new_functions), ]
if (nrow(last_function) && is.na(variables[var_index, ]$origin)) {
variables[var_index, ]$origin <- last_function$id;
}
# TODO: reassess the case when there are no new functions
if (is_mutation && nrow(new_functions)) {
new_functions[nrow(new_functions), ]$breakpoint <- variables[var_index, ]$id;
} else {
variables[var_index, ]$generation <- var_geneneration;
variables[var_index, ]$precursors <- list(precursor_variable_ids);
}
# If evaluation is disabled and we can't confirm that the assigned value is a dataframe,
# we can still meaningfully assume that the variable is a dataframe if it is actually declared as one
# or infer it from the fact it has been read from the file source.
# If evaluation is enabled, the data type will be set upon the evaluation later on
if (!eval_ &&
nrow(last_function) &&
(last_function$name %in% list("subset", "[", "data.frame", "as.data.frame", "table", "rbind", "cbind") ||
startsWith(last_function$name, "read"))) {
variables[var_index, ]$type <- "data";
# Explicit formula variable declarations
} else if (nrow(new_functions) == 1 && last_function$name == "~") {
selector <- sapply(hypotheses$functions, function (h_funcs) last_function$id %in% h_funcs);
variables[var_index, ]$value <- hypotheses[selector, ]$id;
variables[var_index, ]$type <- "formula";
hypotheses[selector, ]$formulas[[1]] = append(hypotheses[selector, ]$formulas[[1]], variables[var_index, ]$id);
} else if (nrow(hypotheses)) {
for (func_id in new_functions$id) {
func_args <- new_functions[new_functions$id == func_id, ]$arguments[[1]];
selector <- sapply(hypotheses$functions, function (h_funcs) func_id %in% h_funcs) | hypotheses$id %in% func_args
if (any(selector)) {
variables[var_index, ]$type <- "model";
hypotheses[selector, ]$models <- lapply(hypotheses[selector, ]$models,
function (models) append(models, variables[var_index, ]$id))
}
}
}
# 2nd (and further) gen models
model_precursors <- subset(variables, id %in% precursor_variable_ids & type == "model")
if (nrow(model_precursors)) {
selector <- sapply(hypotheses$models, function (models) any(model_precursors$id %in% models))
hypotheses[selector, ]$models <- lapply(hypotheses[selector, ]$models,
function (models) append(models, variables[var_index, ]$id))
variables[var_index, ]$type <- "model"
}
if (eval_) {
if (!is_mutation) {
evaluation <- eval(exp[[3]], envir = env)
if (is.data.frame(evaluation))
variables[var_index, ]$type <- "data"
# S4 objects cause errors when being asigned to the cell of the dataframe
if (isS4(evaluation))
evaluation <- NA
else if ('package:ggplot2' %in% search() && is.ggplot(evaluation)) {
variables[var_index, ]$type <- "model" # TODO: replace with an exclusive data type
evaluation <- NA
# Wrap the collections so that they don't cripple the main dataframe
} else if (is.vector(evaluation) || is.matrix(evaluation) || is.list(evaluation) || is.data.frame(evaluation))
evaluation <- list(evaluation)
else if (is.null(evaluation))
evaluation <- NA
variables[var_index, ]$value <- evaluation
}
eval(exp, envir = env)
}
# Create columns from file-loaded data
if (variables[var_index, ]$type == "data" && !is.na(variables[var_index, ]$value) && !length(variables[var_index, ]$columns[[1]])) {
for (col_name in colnames(variables[var_index, ]$value[[1]])) {
c(col_index, variables) %<-% find_variable(col_name, variables,
add = TRUE,
force = TRUE,
check_shadowing = FALSE)
variables[col_index, ]$type <- "column"
variables[col_index, ]$generation <- variables[var_index, ]$generation + 1
variables[var_index, ]$columns[[1]] <- append(variables[var_index, ]$columns[[1]], variables[col_index, ]$id)
}
}
functions <- rbind(functions, new_functions)
# Is "[" call
} else if (is.call(exp) && identical(exp[[1]], quote(`[`)) && !force_as_function) {
# data[data$col1 == value, "col2"]
if (length(exp) == 4) {
col_name <- exp[[4]];
lookup <- exp[[3]];
# data[data$col1 == value, ] <- but w/o the $ in the end
# data[, smth] <- if smth is not a name of the column
if (missing(col_name) || (missing(lookup) && !is.atomic(col_name))) {
c(variables, functions, hypotheses) %<-% recursion(exp, variables, functions, hypotheses,
addition_mode = addition_mode, depth = depth,
force_as_function = TRUE);
# data[, "col"]
} else if (missing(lookup) && is.atomic(col_name)) {
exp[[1]] <- quote(`$`)
exp[[3]] <- NULL
c(variables, functions, hypotheses) %<-% recursion(exp, variables, functions, hypotheses,
addition_mode = addition_mode, depth = depth - 1);
} else {
c(variables, functions, hypotheses) %<-% hypothesis_subroutine(exp, variables, functions, hypotheses,
addition_mode = addition_mode, depth = depth);
}
} else {
c(variables, functions, hypotheses) %<-% recursion(exp, variables, functions, hypotheses,
addition_mode = addition_mode, depth = depth,
force_as_function = TRUE);
}
# Is "$" call
} else if (is.call(exp) && identical(exp[[1]], quote(`$`))) {
if (is.call(exp[[2]])) {
# data[data$col1 == value, ]$col2
if (identical(exp[[2]][[1]], quote(`[`))) {
c(variables, functions, hypotheses) %<-% hypothesis_subroutine(exp, variables, functions, hypotheses,
addition_mode = addition_mode, depth = depth);
# func()$column
} else {
c(variables, functions, hypotheses) %<-% recursion(exp[[2]], variables, functions, hypotheses,
addition_mode = addition_mode, depth = depth - 1);
}
# data$column
} else {
c(var_index, variables) %<-% find_variable(as.character(exp[[2]]), variables,
add = addition_mode,
type_constraint = c("data", "model", "constant"));
c(col_index, variables) %<-% find_variable(as.character(exp[[3]]), variables,
add = TRUE,
force = !length(variables[var_index, ]$columns[[1]]),
type_constraint = "column",
check_shadowing = FALSE);
variables[col_index, ]$generation <- variables[var_index, ]$generation + 1;
variables[col_index, ]$type <- "column";
if (!(variables[col_index, ]$id %in% variables[var_index, ]$columns[[1]])) {
variables[var_index, ]$columns[[1]] <- append(variables[var_index, ]$columns[[1]], variables[col_index, ]$id)
}
func <- list(
id = paste0(c("f", UUIDgenerate()), collapse = "-"),
name = paste0(as.character(exp)[c(2, 1, 3)], collapse = ""),
lines = NA,
signature = NA,
packages = NA,
arguments = list(list(variables[var_index, ]$id, variables[col_index, ]$id)),
depth = depth - addition_mode,
breakpoint = NA
);
functions[nrow(functions) + 1, ] <- func;
}
# Is "~" call -- formula (aka hypothesis) initialization
} else if (is.call(exp) && identical(exp[[1]], quote(`~`)) && !force_as_function) {
c(index, hypotheses, variables) %<-% find_hypothesis(exp, hypotheses, variables, add = TRUE);
c(variables, functions, hypotheses) %<-% recursion(exp, variables, functions, hypotheses,
addition_mode = addition_mode, depth = depth,
force_as_function = TRUE);
hypotheses[index, ]$functions[[1]] <- append(hypotheses[index, ]$functions[[1]], functions[nrow(functions), ]$id);
# Is function declaration
} else if (is.call(exp) && identical(exp[[1]], quote(`function`))) {
# TODO: introduce scopes
# functions[[length(functions) + 1]] <- exp;
# Is library import call
} else if (is.call(exp) && (identical(exp[[1]], quote(`library`)) || identical(exp[[1]], quote(`require`))) && !force_as_function) {
eval(exp, envir = env);
# Is library name call
} else if (is.call(exp) && identical(exp[[1]], quote(`::`))) {
# Shouldn't really ever pop up
# TODO: use to hint the exact package the function is being imported from?
# Is paranthesis (?!)
} else if (is.call(exp) && identical(exp[[1]], quote(`(`))) {
c(variables, functions, hypotheses) %<-% recursion(exp[[2]], variables, functions, hypotheses,
addition_mode = addition_mode, depth = depth);
# Is a control structure
} else if (is.call(exp) && (identical(exp[[1]], quote(`if`)) || identical(exp[[1]], quote(`for`)) || identical(exp[[1]], quote(`while`)) || identical(exp[[1]], quote(`repeat`)))) {
# TODO: add conditional as the precursor?
c(variables, functions, hypotheses) %<-% recursion(exp[[3]], variables, functions, hypotheses,
addition_mode = addition_mode, depth = depth);
# Is block statement
} else if (is.call(exp) && identical(exp[[1]], quote(`{`))) {
for (line in as.list(exp)[2:length(exp)])
c(variables, functions, hypotheses) %<-% recursion(line, variables, functions, hypotheses,
addition_mode = addition_mode, depth = depth)
# Is a generic function call
} else if (is.call(exp)) {
if (is.call(exp[[1]])) {
func_name <- paste0(as.character(exp[[1]])[c(2, 1, 3)], collapse = "");
} else {
func_name <- as.character(exp[[1]]);
}
packages <- find(func_name);
packages <- as.list(packages[grep("package:", packages)])
if (length(packages)) {
packages <- list(packages);
} else {
packages <- NA;
}
func <- list(
id = paste0(c("f", UUIDgenerate()), collapse = "-"),
name = func_name,
lines = NA,
signature = paste0(c(as.character(exp[[1]]), "(" , paste0(as.character(exp[2:length(exp)]), collapse = ", "), ")"), collapse = ""),
packages = packages,
arguments = list(),
depth = depth - addition_mode,
breakpoint = NA
);
if (length(exp) > 1) {
c(func, variables, functions, hypotheses) %<-% argument_recursion(as.list(exp)[2:length(exp)], func,
variables, functions, hypotheses,
depth = depth, addition_mode = addition_mode);
# If in the "shallow" mode and any of the variables referenced in the hypothesis are not "columns"
# -- brand them as such and add them to whichever "dataset" variable found within arguments of the same function
if (!eval_ && nrow(hypotheses)) {
hypothesis_selector <- sapply(hypotheses$functions, function (functions) func$id %in% functions) | hypotheses$id %in% func$arguments
if (nrow(hypotheses[hypothesis_selector, ])) {
columns <- hypotheses[hypothesis_selector, ]$columns[[1]]
column_list <- columns$control
if (!is.null(columns$dependant))
column_list <- append(column_list, columns$dependant)
constant_selector <- variables$type != "column" & variables$id %in% column_list
dataset_selector <- variables$type == "data" & variables$id %in% func$arguments
if (any(constant_selector) && any(dataset_selector)) {
data_var <- as.list(variables[dataset_selector, ])
for (col_id in column_list)
if (!(col_id %in% data_var$columns)) {
variables[variables$id == col_id, ]$type <- "column"
data_var$columns[[1]] <- append(data_var$columns[[1]], col_id)
}
}
}
}
# TODO: improve the hypothesis selection
if (nrow(hypotheses)) {
col_ids <- list();
for (var_index in 1:nrow(variables)) {
var <- as.list(variables[var_index, ]);
if (var$id %in% func$arguments && var$type == "column") {
col_ids <- append(col_ids, var$id);
}
}
if (length(col_ids)) {
for (hyp_index in 1:nrow(hypotheses)) {
hypothesis <- as.list(hypotheses[hyp_index, ]);
columns <- append(hypothesis$columns[[1]]$control, hypothesis$columns[[1]]$dependant);
if (length(setdiff(columns, col_ids)) == 0) {
hypothesis$functions[[1]] <- append(hypothesis$functions[[1]], func$id);
hypotheses[hyp_index, ] <- hypothesis;
break;
}
}
}
}
}
func$arguments <- list(func$arguments);
functions[nrow(functions) + 1, ] <- func;
# Is variable name call
} else if (is.name(exp)) {
# print(as.character(exp));
# NB: does not add the variable to the list by deault
c(tmp, variables) %<-% find_variable(as.character(exp), variables,
add = addition_mode);
# Is atomic
} else if (is.atomic(exp)) {
# Dunno
} else {
}
list(variables, functions, hypotheses)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.