##
## The core interpreter class
##
#' @importFrom methods new
ParseDriver <- setRcppClass("ParseDriver")
#Flags you can bitwise OR to enable debugging features.
#It's necessary that these have the same numeric values as
#the macros in the C++ header file.
DEBUG_PARSE_TRACE <- 4
DEBUG_MATCH_CALL <- 8
DEBUG_VERBOSE_ERROR <- 16
DEBUG_NO_PARSE_ERROR <- 32
DEBUG_NO_CALLBACKS <- 64
AdoInterpreter <-
R6::R6Class("AdoInterpreter",
public = list(
dta = NULL,
##
## ctor and dtor
##
initialize = function(df = NULL, debug_level = 0, print_results = 1,
echo = 0)
{
## Create objects
self$dta <- Dataset$new()
private$logger <- Logger$new()
private$rclass <- SymbolTable$new()
private$eclass <- SymbolTable$new()
private$cclass <- SymbolTable$new()
private$settings <- SymbolTable$new()
private$macro_syms <- SymbolTable$new()
private$usercmd <- SymbolTable$new()
ns <- getNamespace(utils::packageName())
private$defaultcmd <- SymbolTable$new(env=ns)
## Populate the objects
defaults <- private$cclass_value_defaults()
for(nm in names(defaults))
self$cclass_set(nm, defaults[[nm]])
defaults <- private$setting_value_defaults()
for(nm in names(defaults))
self$setting_set(nm, defaults[[nm]])
self$setting_set("echo", echo)
self$setting_set("print_results", print_results)
self$setting_set("debug_level", debug_level)
if(!is.null(df))
self$dta$use_dataframe(df)
},
finalize = function()
{
self$dta$clear()
self$log_deregister_all_sinks()
},
##
## Logging methods
##
log_has_sink = function(filename, type = NULL)
{
return(private$logger$has_sink(filename = filename, type = type))
},
log_sink_type = function(filename)
{
return(private$logger$sink_type(filename = filename))
},
log_register_sink = function(filename, type="log", header = TRUE)
{
return(private$logger$register_sink(filename = filename, type = type,
header = header))
},
log_deregister_sink = function(filename)
{
return(private$logger$deregister_sink(filename))
},
log_deregister_all_sinks = function(type = NULL)
{
return(private$logger$deregister_all_sinks(type = type))
},
log_command = function(msg, echo = NULL)
{
if(is.null(echo))
{
if(self$setting_defined("echo"))
echo <- self$setting_value("echo")
else
echo <- FALSE
}
return(private$logger$log_command(msg = msg, echo = echo))
},
log_result = function(msg, print_results = NULL)
{
if(is.null(print_results))
{
if(self$setting_defined("print_results"))
print_results <- self$setting_value("print_results")
else
print_results <- FALSE
}
return(private$logger$log_result(msg = msg, print_results = print_results))
},
log_sinks = function() private$logger$log_sinks,
log_cmdlog_sinks = function() private$logger$cmdlog_sinks,
log_is_enabled = function() private$logger$log_enabled,
log_set_enabled = function(value) private$logger$log_enabled(value),
log_cmdlog_is_enabled = function() private$logger$cmdlog_enabled,
log_cmdlog_set_enabled = function(value) private$logger$cmdlog_enabled(value),
##
## e-class accessors
##
eclass_all = function()
{
return(private$eclass$all_values())
},
eclass_names = function()
{
return(private$eclass$all_symbols())
},
eclass_set = function(sym, val)
{
return(private$eclass$set_symbol(sym, val))
},
eclass_unset = function(sym)
{
return(private$eclass$unset_symbol(sym))
},
eclass_value = function(sym)
{
return(private$eclass$symbol_value(sym))
},
eclass_defined = function(sym)
{
return(private$eclass$symbol_defined(sym))
},
eclass_query = function(val=NULL, enum=NULL)
{
raiseif(is.null(val) && is.null(enum),
msg="Must provide argument for e-class query")
if(enum)
return(self$eclass_names())
else
return(self$eclass_value(val))
},
##
## r-class accessors
##
rclass_all = function()
{
return(private$rclass$all_values())
},
rclass_names = function()
{
return(private$rclass$all_symbols())
},
rclass_set = function(sym, val)
{
return(private$rclass$set_symbol(sym, val))
},
rclass_unset = function(sym)
{
return(private$rclass$unset_symbol(sym))
},
rclass_value = function(sym)
{
return(private$rclass$symbol_value(sym))
},
rclass_defined = function(sym)
{
return(private$rclass$symbol_defined(sym))
},
rclass_query = function(val=NULL, enum=NULL)
{
raiseif(is.null(val) && is.null(enum),
msg="Must provide argument for r-class query")
if(enum)
return(self$rclass_names())
else
return(self$rclass_value(val))
},
##
## c-class accessors
##
# NOTE: this function may have side effects. Some of the c-class
# values do things on access, like create tempfiles.
cclass_all = function()
{
nms <- names(private$cclass_value_varying_quoted())
lst <- list()
for(nm in nms)
lst[[length(lst)+1]] <- private$cclass_value_varying(nm)
return(c(lst, private$cclass$all_values()))
},
cclass_names = function()
{
st <- private$cclass$all_symbols()
se <- names(private$cclass_value_varying_quoted())
return(c(st, se))
},
cclass_set = function(sym, val)
{
if(sym %in% names(private$cclass_value_varying_quoted()))
raiseCondition("Cannot set varying c-class value")
else
return(private$cclass$set_symbol(sym, val))
},
cclass_unset = function(sym)
{
if(sym %in% names(private$cclass_value_varying_quoted()))
raiseCondition("Cannot unset varying c-class value")
else
return(private$cclass$unset_symbol(sym))
},
cclass_value = function(sym)
{
if(sym %in% names(private$cclass_value_varying_quoted()))
return(private$cclass_value_varying(sym))
else
return(private$cclass$symbol_value(sym))
},
cclass_defined = function(sym)
{
if(sym %in% names(private$cclass_value_varying_quoted()))
return(TRUE)
else
return(private$cclass$symbol_defined(sym))
},
cclass_query = function(val=NULL, enum=NULL)
{
raiseif(is.null(val) && is.null(enum),
msg="Must provide argument for c-class query")
if(enum)
return(self$cclass_names())
else
return(self$cclass_value(val))
},
##
## Macro accessors
##
macro_all = function()
{
return(private$macro_syms$all_values())
},
macro_names = function()
{
return(private$macro$all_symbols())
},
macro_set = function(sym, val)
{
return(private$macro_syms$set_symbol(sym, val))
},
macro_unset = function(sym)
{
return(private$macro_syms$unset_symbol(sym))
},
macro_value = function(sym)
{
return(private$macro_syms$symbol_value(sym))
},
macro_defined = function(sym)
{
return(private$macro_syms$symbol_defined(sym))
},
##
## Settings accessors
##
setting_all = function()
{
return(private$settings$all_values())
},
setting_names = function()
{
return(private$settings$all_symbols())
},
setting_set = function(sym, val)
{
return(private$settings$set_symbol(sym, val))
},
setting_unset = function(sym)
{
return(private$settings$unset_symbol(sym))
},
setting_value = function(sym)
{
return(private$settings$symbol_value(sym))
},
setting_defined = function(sym)
{
return(private$settings$symbol_defined(sym))
},
##
## Methods for adding and removing user-defined commands
## FIXME how to do this? the names suggest they refer to all cmds,
## which currently they don't; should symboltable methods have a
## "parent" arg?
##
usercmd_all = function()
{
return(private$usercmd$all_values())
},
usercmd_names = function()
{
return(private$usercmd$all_symbols())
},
usercmd_set = function(sym, val)
{
raiseifnot(substr(sym, 1, 8) == 'ado_cmd_',
msg="User-defined cmd name must have ado_cmd_ prefix")
raiseif(sym %in% private$defaultcmd$all_symbols(),
msg="User-defined cmd must not shadow built-in cmd name")
# FIXME better validation of val
raiseifnot(is.function(val),
msg="User-defined cmd must be function")
return(private$usercmd$set_symbol(sym, val))
},
usercmd_unset = function(sym)
{
return(private$usercmd$unset_symbol(sym))
},
usercmd_value = function(sym)
{
return(private$usercmd$symbol_value(sym))
},
usercmd_defined = function(sym)
{
return(private$usercmd$symbol_defined(sym))
},
##
## Other utilities
##
cmd_all = function()
{
# unique already, see usercmd_set
return(c(private$defaultcmd$all_values(),
private$usercmd$all_values()))
},
cmd_names_all = function()
{
funcs <- private$defaultcmd$all_symbols()
funcs <- funcs[grep("^ado_cmd_", funcs)]
funcs <- c(funcs, self$usercmd_names()) # unique, see usercmd_set
return(funcs)
},
cmd_unabbreviate = function(name, cls="error", msg=NULL)
{
return(unabbreviateName(name, choices=self$cmd_names_all(),
cls=cls, msg=msg))
},
##
## The main entry point
##
interpret = function(con = NULL, echo = NULL)
{
debug_level <- self$setting_value("debug_level")
if(is.null(echo))
# Allow the echo setting to be overridden
echo <- self$setting_value("echo")
while(TRUE)
{
val <-
tryCatch(
{
inpt <- read_input(con)
if(length(inpt) == 0) # we've hit EOF
raiseCondition(msg="Exit requested",
cls="ExitRequestedException")
cls <- methods::getRefClass("ParseDriver")
obj <- cls$new(inpt, self, debug_level, echo)
obj$parse()
},
error=identity,
AdoException=identity
)
if(inherits(val, "ExitRequestedException"))
break
else if(inherits(val, c("error", "AdoException")))
self$log_result(val$message %p% "\n\n")
}
return(invisible(NULL))
},
##
## Callbacks for the frontend
##
cmd_action = function(ast, txt, echo)
{
self$log_command(". " %p% trimws(txt) %p% "\n", echo=echo)
check(ast, context=self, self$debug_parse_trace)
self$deep_eval(codegen(ast, context=self))
},
#Recursive evaluation of the sort of expression object that the parser builds.
#This function both evaluates the expressions and sends the results through
#the logger.
deep_eval = function(expr)
{
envir <- as.list(private$usercmd$env)
enclos <- private$defaultcmd$env
ret <- list()
for(chld in expr)
{
if(is.expression(chld))
ret[[length(ret)+1]] <- self$deep_eval(chld)
else
{
tmp <- suppressWarnings(withVisible(eval(chld, envir=envir, enclos=enclos)))
ret[[length(ret)+1]] <- tmp$value
if(tmp$visible)
self$log_result(fmt(tmp$value))
}
}
# Return this so that higher layers can check whether it's a condition,
# but those layers don't print it. Result printing happens above.
return(ret)
},
# A callback that allows the lexer to retrieve macro and
# (e,r,c)-class values
macro_accessor = function(name)
{
#Implement the e() and r() stored results objects, and the c() system
#values object. All of the regexes here are a little screwy: when the e(),
#r(), or c() appears at the beginning of the macro text, everything after
#the close paren is ignored. But this is actually Stata's behavior,
#so we'll run with it.
#The (e,r,c)-classes are ONLY recognized when at the start of a macro text.
#The "_?" in the regexes matches them when used in either a local macro
#(which the parser expands into a global with a prefixed "_") or a global.
#One peculiarity of note: for the c-class values, we don't just
#check the c-class environment. We also have to looks up certain
#c-class values from other places, mainly Sys.* R functions and
#other wrappers around system APIs. C-class values not resolved from
#such lookups are looked for in the usual symbol table. E-class and
#r-class values don't behave this way, and all values are stored in
#the corresponding symbol tables.
#the e() class
m <- regexpr("^e_?\\((?<match>.*)\\)", name, perl=TRUE)
start <- attr(m, "capture.start")
len <- attr(m, "capture.length")
if(start != -1)
{
txt <- substr(name, start, start + len - 1)
return(self$eclass_value(txt))
}
#the r() class
m <- regexpr("^_?r\\((?<match>.*)\\)", name, perl=TRUE)
start <- attr(m, "capture.start")
len <- attr(m, "capture.length")
if(start != -1)
{
txt <- substr(name, start, start + len - 1)
return(self$rclass_value(txt))
}
#the c() class
m <- regexpr("^_?c\\((?<match>.*)\\)", name, perl=TRUE)
start <- attr(m, "capture.start")
len <- attr(m, "capture.length")
if(start != -1)
{
txt <- substr(name, start, start + len - 1)
return(self$cclass_value(txt))
}
#a normal macro
if(!(self$macro_defined(name)))
return("")
else
return(self$macro_value(name))
}
),
active = list(
debug_parse_trace = function()
{
debug_level <- self$setting_value("debug_level") # set in ctor
return((debug_level %&% DEBUG_PARSE_TRACE) != 0)
},
debug_match_call = function()
{
debug_level <- self$setting_value("debug_level")
return((debug_level %&% DEBUG_MATCH_CALL) != 0)
},
debug_verbose_error = function()
{
debug_level <- self$setting_value("debug_level")
return((debug_level %&% DEBUG_VERBOSE_ERROR) != 0)
},
debug_no_parse_error = function()
{
debug_level <- self$setting_value("debug_level")
return((debug_level %&% DEBUG_NO_PARSE_ERROR) != 0)
}
),
private = list(
logger = NULL,
rclass = NULL,
cclass = NULL,
eclass = NULL,
settings = NULL,
macro_syms = NULL,
usercmd = NULL,
defaultcmd = NULL,
default_webuse_url = function()
{
return('https://www.stata-press.com/data/r15/')
},
# Default settings (for, e.g., restoring to defaults)
setting_value_defaults = function()
{
return(list(
webuse_url = private$default_webuse_url()
))
},
# Return the default c-class env values
cclass_value_defaults = function()
{
return(list(
##
## Mathematical constants
##
pi = pi,
e = exp(1),
##
## Letters
##
alpha = paste0(letters, collapse=" "),
ALPHA = paste0(LETTERS, collapse=" "),
##
## Weeks and months
## The Stata docs imply these aren't localized, but they should be.
##
Wdays = paste0(weekdays(seq(as.Date("2013-06-03"), by=1, len=7),
abbreviate=TRUE),
collapse=" "),
Weekdays = paste0(weekdays(seq(as.Date("2013-06-03"), by=1, len=7)),
collapse=" "),
Mons = paste0(format(ISOdate(2000, 1:12, 1), "%b"), collapse=" "),
Months = paste0(format(ISOdate(2000, 1:12, 1), "%B"), collapse=" "),
##
## URLs for webuse, also available in settings
##
default_webuse_url = private$default_webuse_url(),
##
## OS or machine info that can't change during execution
##
os = if(.Platform$OS.type == "windows") "Windows"
else if(Sys.info()["sysname"] == "Darwin") "MacOSX"
else "Unix",
osdtl = Sys.info()["release"] %p% " " %p% Sys.info()["version"],
bit = 8 * .Machine$sizeof.pointer, # e.g., 8 * 8 = 64-bit
machine_type = utils::sessionInfo()$platform,
byteorder = if(.Platform$endian == 'big') "hilo" else "lohi",
processors = parallel::detectCores(),
processors_mach = parallel::detectCores(),
processors_max = parallel::detectCores(),
dirsep = .Platform$file.sep,
#Almost if not quite exactly right
mindouble = -.Machine$double.xmax,
maxdouble = .Machine$double.xmax,
epsdouble = .Machine$double.eps,
smallestdouble = .Machine$double.xmin,
#R does have a 4-byte integer type, even though integers are
#generally represented as doubles
minlong = -2^31 + 1,
maxlong = 2^31 - 1,
#Almost if not quite exactly right
minfloat = -.Machine$double.xmax,
maxfloat = .Machine$double.xmax,
epsfloat = .Machine$double.eps,
##
## Versions of R or ado, also can't change during execution
##
rversion = R.version$version.string,
ado_version = utils::packageVersion(utils::packageName()),
##
## Resource limits
##
max_N_theory = 2^31 - 1,
max_k_theory = 2^31 - 1,
#This corresponds to a data.frame of 2^31 - 1 columns and 2^31 - 1 rows,
#where each cell is a string of 2^31 - 1 bytes' length. There's a reason
#this variable's name ends in "theory".
max_width_theory = (2^31 - 1)^3,
#As hardcoded into our lexer: see ado.fl's redefinition
#of the C macro YYLMAX
max_macrolen = 2^16,
macrolen = 2^16,
max_macro_namelen = 32, # Stata disallows long identifiers
#The real limit is on the length of a single lexer token, which can
#be no longer than 2^16 bytes. There's no limit on the length of
#commands, provided they can be represented as R strings, and an R
#string can be no longer than 2^31 - 1 bytes. Note that encountering
#a single token longer than YYLMAX = 2^16 bytes will cause yylex() to
#raise an R error condition rather than calling the C exit() function
#on the R process.
max_cmdlen = 2^31 - 1,
cmdlen = 2^31 - 1,
#The maximum length of the symbol type as of R 2.13.0
namelen = 10000,
#This is the limit on vector size hardcoded into R in various places;
#the newer long vectors can be, of course, longer, but using them is
#still difficult and we've made no effort to do so.
maxvar = 2^31 - 1,
maxstrvarlen = 2^31 - 1,
#The str# and strL types as we implement them are the same
maxstrlvarlen = 2^31 - 1
))
},
# These are the c-class values that may change during execution,
# which means we can't set any default values for them; they have
# to be looked up at query time
cclass_value_varying_quoted = function()
{
return(list(
current_date = quote(Sys.Date()),
current_time = quote(Sys.time()),
mode = quote(if(interactive()) "" else "batch"),
console = quote(if(.Platform$GUI == "unknown") "console" else ""),
hostname = quote(Sys.info()["nodename"]),
username = quote(Sys.info()["user"]),
tempdir = quote(tempdir()),
tmpdir = quote(tempdir()),
pwd = quote(getwd()),
N = quote(self$dta$dim[1]),
k = quote(self$dta$dim[2]),
width = quote(utils::object.size(self$dta)),
changed = quote(self$dta$changed),
filename = quote(self$dta$filename),
filedate = quote(self$dta$filedate),
niceness = quote(tools::psnice()),
rng = quote(paste0(RNGkind(), collapse=" ")),
rngstate = quote(paste0(.Random.seed, collapse=",")),
# it's appalling that triggering a garbage collection is the
# recommended way to check mem usage
memory = quote({mem <- gc(); 1024 * (mem[1, "(Mb)"] + mem[2, "(Mb)"])}),
# FIXME - need to implement the machinery for commands to have
# return codes; it's not clear what this should look like in an
# implementation where control flow at a low level is based on
# calls to signalCondition().
rc = quote(0)
))
},
cclass_value_varying = function(val)
{
qtd <- private$cclass_value_varying_quoted()
if(val %in% names(qtd))
return(eval(qtd[[val]]))
else
raiseCondition("Bad c-class value")
}
)
)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.