SciDBR: An R Interface to SciDB

## Functions for debugging and logging

####################
#
# Public functions
#
####################

# Use options(scidb.debug=TRUE) to enable debug printing.
is.debug <- function() { as.logical(getOption("scidb.debug", FALSE)) }

# Use either options(scidb.trace=TRUE) or options(scidb.debug=2)
# to enable trace printing.
is.trace <- function() {
  getOption("scidb.trace", getOption("scidb.debug", 0) >= 2)
}

# Use options(scidb.trace.http=TRUE) or options(scidb.trace=TRUE)
# to print HTTP communication between SciDBR and the scidb or Shim server.
is.trace.http <- function() {
  is.trace() || getOption("scidb.trace.http", FALSE)
}

# Use options(scidb.trace.api='exported') to trace exported API calls.
#   (TRUE will also work.)
# Use options(scidb.trace.api='internal') to trace internal calls too.
#   (options(scidb.trace=TRUE) will also enable API call tracing.)
is.trace.api <- function() {
  opt <- getOption("scidb.trace.api", "")
  is.trace() || opt == TRUE || has.chars(opt)
}

# Use options(scidb.log.mask=TRUE) to mask UIDs and hashes in logging output
# to make it easier to compare logs between two different runs.
is.mask.enabled <- function() {
  getOption("scidb.log.mask", FALSE)
}

##################
#
# Basic logging
#
##################

## Functions for writing diagnostic messages
## For now these are to stderr; they could easily be written to a log file instead
msg <- function(..., tag="[SciDBR]") {
  args <- if (is.mask.enabled()) .mask.msg(...) else list(...)
  do.call("message", c(list(tag, " "), args))
}
msg.debug <- function(..., tag="[SciDBR]") {
  if (is.debug()) msg(tag=tag, ...)
}
msg.trace <- function(..., tag="[SciDBR-trace]") {
  if (is.trace()) msg(tag=tag, ...)
}
msg.trace.http <- function(..., tag="[SciDBR-HTTP]") {
  if (is.trace.http()) msg(tag=tag, ...)
}
msg.trace.api <- function(..., tag="[SciDBR-API]") {
  if (is.trace.api()) msg(tag=tag, ...)
}

################
#
# API tracing
#
################

#' Trace the entry to a function: print the function's name and argument list
#' to the log. The tracing only happens if options(scidb.trace.api) is either
#' TRUE or "internal".
#'
#' To trace a function, put a call to .TraceEnter and .TraceExit
#' in any function you want to trace, like this:
#' Foo <- function(arg1, arg2, ...) {
#'   trace <- .TraceEnter("Foo", arg1=arg1, arg2=arg2, ...)
#'   on.exit(.TraceExit(trace, returnValue()), add=TRUE)
#'   # ... the rest of Foo
#' }
#' The magic R function returnValue() in an on.exit() will contain
#' the value that the function returns, no matter how it returns it.
#' Unfortunately there's no easy way to do that for args and get the
#' evaluated value of the args.
#'
#' R has an alternative trace() facility, but it only displays the arguments 
#' to a function in unevaluated form, so it's very difficult to see what 
#' parameters the caller actually passed to a given function call.
#' 
#' @param fnname  the name of the function being called
#' @param ...  the arguments of the function (named and unnamed)
#' @return an opaque object you can pass to .TraceExit when the
#'         function returns
#' @noRd
.TraceEnter <- function(fnname, ...)
{
  if (!is.trace.api()) {
    return()
  }

  args <- sapply(list(...), function(x) .ToString(x))
  invocation <- paste0(fnname, "(", .FormatNameValuePairs(args), ")")
  
  depth <- as.numeric(getOption(".trace_depth", 0)) + 1
  options(.trace_depth = depth)
  
  msg.trace.api(.Indent(depth - 1),
                "->(", depth, ") ",
                .Trunc(invocation))

  return(invocation)
}

#' Trace a call to an internal function.
#' This is the same as .TraceEnter, but the tracing only happens
#' if options(scidb.trace.api) is set to "internal".
#' @seealso .TraceEnter
#' @noRd
.TraceEnterInternalFn <- function(...)
{
  if (getOption("scidb.trace.api", "") == "internal") {
    return(.TraceEnter(...))
  }
}

#' Trace the end of a function call.
#' This should normally be in an on.exit() block to make sure it executes
#' at the end of the function no matter how it exits.
#' @param invocation  the opaque object returned by .TraceEnter()
#' @param retval  the value returned from the function. If the call to
#'    .TraceExit is in an on.exit() block, you can use returnValue() for this.
#' @seealso .TraceEnter
#' @noRd
.TraceExit <- function(invocation, retval=NULL)
{
  if (!is.trace.api() || !has.chars(invocation)) {
    return()
  }

  depth <- as.numeric(getOption(".trace_depth"))
  msg.trace.api(.Indent(depth - 1),
                "<-(", depth, ") ",
                .Trunc(invocation, 30),
                " returns ",
                tryCatch(.Trunc(.ToString(retval)),
                         error=function(err) "<unknown>"))
  options(.trace_depth = depth - 1)
}

################
#
# HTTP Tracing
#
################


#' Log an HTTP request sent to the server.
#' @param h  the curl handle
#' @param method  the HTTP method: 'GET', 'POST', 'DELETE', etc.
#' @param uri  the request URI/URL
#' @param headers  a named list of request headers
#' @param data  the data being sent, either a string or a raw vector
#' @param attachments  a list of attachments
#' @noRd
.TraceHttpRequestSent <- function(h, method, uri, headers=NULL, data=NULL,
                                  attachments=NULL)
{
  if (!is.trace.http()) {
    return()
  }

  msg.trace.http(">>> Sending HTTP Request: ", method, " ", uri)

  for (header in names(headers)) {
    msg.trace.http("     Header: ", header, ": ", headers[[header]])
  }

  cookies <- curl::handle_cookies(h)
  if (!is.null(cookies) && nrow(cookies) > 0) {
    msg.trace.http("     Cookies: ", .CookiesToTsv(cookies))
  }
  
  if (has.chars(data)) {
    msg.trace.http("     Data: ",
                   .TruncateHttpData(data, headers[["Content-Type"]]))
  }

  for (iatt in seq_along(attachments)) {
    name <- names(attachments)[[iatt]]
    att <- attachments[[iatt]]
    msg.trace.http("     Attachment ", iatt,
                   " (name=", name, ", content-type=", att$content_type, "): ",
                   .TruncateHttpData(att$data, att$content_type))
  }
}

#' Log an HTTP response received from the server.
#' @param h  the curl handle
#' @param method  the HTTP method of the request: 'GET', 'POST', 'DELETE', etc.
#' @param uri  the request URI/URL
#' @param resp  the result of a curl_fetch_memory() call, with $parsed_headers
#'    set to the result of curl::parse_headers_list()
#' @noRd
.TraceHttpResponseReceived <- function(h, method, uri, resp)
{
  if (!is.trace.http()) {
    return()
  }
  
  msg.trace.http("<<< Received HTTP status code ", resp$status_code, " from ", method, " ", uri)

  headers <- resp$parsed_headers
  for (ii in seq_along(headers)) {
    msg.trace.http("     Header: ", names(headers)[[ii]], ": ", headers[[ii]])
  }

  cookies <- curl::handle_cookies(h)
  if (!is.null(cookies) && nrow(cookies) > 0) {
    msg.trace.http("     Cookies: ", .CookiesToTsv(cookies))
  }

  if (length(resp$content) > 0) {
    content_type <- headers[["content-type"]]
    msg.trace.http("     Data: ", .TruncateHttpData(resp$content, content_type))
  }
}

#####################
#
# Private functions
#
#####################

## Mask out frequently-changing parts of names
## so that two logs can be diffed more easily
.mask.msg <- function(...) {
  sapply(list(...), function(s) {
    s <- gsub("conn@\\w+", "conn@<masked>", s)
    s <- gsub("R_array\\w+", "R_array<masked>", s)
    s <- gsub("shim_input_buf_\\w+", "shim_input_buf_<masked>", s)
    s <- gsub('session="\\w+"', 'session="<masked>"', s)
    s
  })
}

#' Truncate a string; if it's too long, chop it off and replace the end
#' with "..."
#' @param str  the string to truncate
#' @param newlen  the number of characters to truncate to; if absent, use
#'                option "scidb.trace.api.maxlength"
#' @return the truncated string
#' @noRd
.Trunc <- function(str, newlen=NULL)
{
  newlen <- newlen %||% as.numeric(getOption("scidb.trace.api.maxlength", 999))
  if (nchar(str) <= newlen) {
    return(str)
  }
  return(paste0(strtrim(str, newlen-3), "..."))
}

#' Remove all items after the first n items.
#' It's good to do this before printing a long list, especially when the string
#' will itself be truncated, to avoid spending significant amounts of time
#' concatenating strings for list items that won't be printed.
#' @param val  the vector to truncate
#' @param n  the number of items to keep in the vector; if absent, use
#'           option "scidb.trace.api.maxitems"
#' @return the truncated vector
#' @noRd
.TruncItems <- function(val, n=NULL)
{
  n <- n %||% as.numeric(getOption("scidb.trace.api.maxitems", 99))
  return(head(val, n))
}

#' @return a string consisting of n*indent whitespace characters.
#' @noRd
.Indent <- function(n, indent=NULL)
{
  indent <- indent %||% as.numeric(getOption("scidb.trace.indent", 2))
  return(strrep(" ", 2 * indent))
}

#' Given a vector of named elements, write the vector into a string like
#'  "name1=val1, name2=val2, name3=val3", etc.
#' @param vec  the vector
#' @param name_value_delimiter  goes between a name and its value
#' @param item_delimiter  goes between one name-value pair and the next
#' @return a string representing the vector's elements
#' @noRd
.FormatNameValuePairs <- function(vec,
                                  name_value_delimiter="=",
                                  item_delimiter=", ")
{
  if (is.null(vec)) {
    return("(R NULL value)")
  }
  if (length(vec) == 0) {
    return(paste0("(0-length vector of type ", typeof(vec), ")"))
  }
  paste0(sapply(seq_along(.TruncItems(vec)),
                function(ii) {
                  name <- names(vec)[[ii]]
                  val <- if (is.null(vec[[ii]])) {
                    "(R NULL value)"
                  } else if (length(vec[[ii]]) == 0) {
                    paste0("(0-length vector of type ", typeof(vec[[ii]]), ")")
                  } else {
                    vec[[ii]]
                  }
                  if (has.chars(name)) {
                    paste0(name, name_value_delimiter, val)
                  } else {
                    val
                  }
                }),
         collapse=item_delimiter)
}

#' @return a string that encodes the given value, whether it's a
#'   vector, an env, or an afl/scidb object.
#' @importFrom jsonlite toJSON
#' @noRd
.ToString <- function(val)
{
  if (is.null(val)) {
    return("(R NULL value)")
  }
  if (length(val) == 0) {
    return(paste0("(0-length vector of type ", typeof(val), ")"))
  }
  if (is.numeric(val) || is.logical(val)) {
    return(as.character(val))
  }
  if (is.character(val)) {
    if (length(val) == 1) {
      return(if (startsWith(val, '"')) val else paste0('"', val, '"'))
    }
    return(paste0("(", .FormatNameValuePairs(val), ")"))
  }
  if (inherits(val, "afl")) {
    return(paste0("conn@", attr(val, "connection")$id))
  }
  if (inherits(val, "scidb")) {
    return(paste0("scidb(", val@name, ")"))
  }
  
  ## Use toJSON() just because it's able to print many kinds of objects
  ##   including envs - better than format(), as.character(),
  ##   or other alternatives.
  ## Use force=TRUE to treat an unknown class as a plain list/env.
  result <- tryCatch(jsonlite::toJSON(.TruncItems(val),
                                      force=TRUE, auto_unbox=TRUE),
                     error=function(err) NULL)
  if (has.chars(result)) {
    return(result)
  }
  return("<unknown>")
}


#' Truncate the given HTTP data to a suitable length for logging.
#' @param data  the HTTP data being sent or received, either a string
#'   or a raw vector.
#' @param content_type  the Content-Type of the HTTP data.
#' @return a string for logging the data
#' @noRd
.TruncateHttpData <- function(data, content_type)
{
  content_type <- content_type %||% ""
  if (startsWith(content_type, "application/json")) {
    max_chars <- getOption("scidb.trace.http.maxlength.json", 2999)
    is_text <- TRUE
  } else if (startsWith(content_type, "text/") || is.character(data)) {
    max_chars <- getOption("scidb.trace.http.maxlength.text", 999)
    is_text <- TRUE
  } else {
    max_chars <- getOption("scidb.trace.http.maxlength.binary", 19)
    is_text <- FALSE
  }

  if (is_text) {
    text <- if (is.raw(data)) rawToChar(data) else data
    return(paste0(strtrim(text, max_chars),
                  if (nchar(text) > max_chars) "..." else ""))
  }
  return(paste0("(binary) ", paste0(data[1:max_chars], collapse=""),
                if (length(data) > max_chars) "..." else ""))
}