R/core.R

Defines functions getStataFuture doInStata

Documented in doInStata getStataFuture

#' Execute some Stata code in a running Stata instance
#' @param id An object of S3 class 'StataID' generated by \code{\link[RStataLink]{startStata}}
#' @param code A Stata code in a string to be executed
#' @param df A data frame to be loaded into Stata before the Stata code is executed (optional)
#' @param import_df Logical: should the resulting Stata dataset be returned?
#'                      Defaults: \code{TRUE} if \code{df} is provided, else \code{FALSE}.
#' @param results NULL or a character vector with either \code{"e"} or \code{"r"} or both of these
#' characters. Default: \code{c("e", "r")} which means that both e-class and r-class Stata results
#' will be imported (stored in e() and r() Stata macros, scalars, matrices respectively). If
#' \code{NULL}, none of these results will be imported.
#' @param timeout Seconds for R to wait for Stata results. Default: infinity.
#' @param preserve_restore Logical: should the Stata code start with \code{preserve} and end with \code{restore}?
#'                         Default: \code{FALSE}.
#' @param cleanup Logical: should the time-stamped temporary files (.do file, .log file, and possible input
#' and output .tsv files) be deleted at the end? Default: \code{TRUE}.
#' @param nolog Logical: should the Stata log be discarded in the returned value? Default: \code{FALSE}.
#' @param future Logical: should this function block R until Stata finishes the job and return what Stata
#' produces (\code{future = FALSE}, default) or should this function send the job to Stata and
#' not make R wait for Stata output (\code{future = TRUE}). In latter case, the Stata output can
#' be obtained with the function \code{\link[RStataLink]{getStataFuture}}.
#' @return
#' If \code{future = FALSE}, a list with:
#' \itemize{
#'          \item \code{log} -- A character vector with Stata display log
#'          if \code{nolog = FALSE}. May be an empty string if Stata does not finish executing
#'          the code before \code{timeout}.
#'          \item \code{error} -- If Stata displays an error -- an integer number with the Stata
#'          error code number (see \url{http://www.stata.com/manuals14/perror.pdf}).
#'          The error message should be visible in the \code{log} (see the point above).
#'          \item \code{df} -- Optional: a data frame saved by Stata if \code{import_df = TRUE} and
#'          if it can be read by \code{\link[utils]{read.delim}}, else \code{NULL}
#'          if \code{\link[utils]{read.delim}} returns an error.
#'			\item \code{results} -- if the argument \code{results} is not \code{NULL}, a list with
#'			one or both of the elements named \code{e_class} and/or \code{r_class}
#'			(depending on what was specified in the argument \code{results}), each including (if available):
#'			\itemize{
#'				   \item \code{scalars} -- a named list of numeric scalar values,
#'				   \item \code{macros} -- a named list of character (string/text) values,
#'				   \item \code{matrices} -- a named list of numeric matrices,
#'				   \item \code{modeldf} -- only for \code{e_class}: a data.frame with the estimated coefficients
#'				   (column \code{coef}), and standard errors (column \code{stderr}), with
#'				   the Stata variable names recorded in row.names.
#'			}
#' }
#' If \code{future = TRUE}, an object of S3 class 'StataFuture' to be used by
#' \code{\link[RStataLink]{getStataFuture}}.
#' @export
doInStata <- function(id,
                      code="",
                      df=NULL,
                      import_df=!is.null(df),
                      results=c('e','r'),
                      timeout=Inf,
                      preserve_restore=FALSE,
                      cleanup=TRUE,
                      nolog=FALSE,
                      future=FALSE) {
  stopifnot(class(id)=='StataID',
            is.character(code),
            is.data.frame(df) || is.null(df),
            is.logical(import_df), length(import_df)==1,
            if (results %>% is.null) TRUE else
              all(results %in% c('e','r')),
            is.numeric(timeout), timeout>=0,
            is.logical(preserve_restore), length(preserve_restore)==1,
            is.logical(cleanup), length(cleanup)==1,
            is.logical(nolog), length(nolog)==1)
  
  path <- id %++% '/'
  time_stamp <- timeStamp()
  logfile <- paste0(path, 'statalog_', names(id), '_', time_stamp, '.log')
  
  import_df_cmd <- ""
  if (!is.null(df)) {
    inputtsvf <- path %++% 'inputdf_' %++% time_stamp %++% '.tsv'
    write.table(df, file=inputtsvf, sep="\t", na="", row.names=FALSE)
    import_df_cmd <- 'qui insheet using "' %++% inputtsvf %++% '", tab names clear \n'
  }
  
  save_result_cmd <- ""
  outputtsvf <- NULL
  if (import_df) {
    outputtsvf <- path %++% 'outputdf_' %++% time_stamp %++% '.tsv'
    save_result_cmd <- 'qui outsheet using "' %++% outputtsvf %++% '", replace \n'
  }
  
  stata_commands <- paste(
    c('tempname log_' %++% names(id),
      'cap log close `log_' %++% names(id) %++% '\'',
      'log using "' %++% logfile %++% '", replace text name(`log_' %++% names(id) %++% '\')',
      ifelse(preserve_restore, 'preserve', ""),
      import_df_cmd,
      code,
      ifelse(!is.null(results), 'qui getStataResults ' %++% time_stamp, ""),
      save_result_cmd,
      ifelse(preserve_restore, 'restore', ""),
      'log close `log_' %++% names(id) %++% '\'',
      '\n'),
    collapse='\n')
  dofile <- path %++% time_stamp %++% '.do'
  cat(stata_commands, file=dofile)
  close(file(dofile))
  
  StataFuture <- environment() %>%
    as.list() %>%
    `class<-`('StataFuture')
  if (future) StataFuture else getStataFuture(StataFuture)
}

#' Obtain Stata output from a job previously sent to Stata by \code{\link[RStataLink]{doInStata}}
#' 
#' @param StataFuture An object of S3 class 'StataFuture' produced by
#' \code{\link[RStataLink]{doInStata}}, with argument \code{future = TRUE},
#' when a job was sent to Stata.
#' @return
#' See the description of the return value for \code{\link[RStataLink]{doInStata}} with
#' argument \code{future = FALSE}.
#' @export
getStataFuture <- function(StataFuture) {
  stopifnot(class(StataFuture)=='StataFuture')
  
  for (n in names(StataFuture))
    assign(n, StataFuture[[n]])  # destructure StataFuture
  
  t <- Sys.time() %>% as.numeric
  
  Output <- list()
  # Wait until the log file is closed by Stata
  repeat {
    Output$log <- tryCatch(readLines(logfile), error = function(e) NULL, warning = function(w) NULL)
    close(file(logfile))
    isError <- Output$log %>%
      contains('^Waiting for remote task requests')
    logClosed <- Output$log %>%
      contains('^.*log close `log_' %++% names(id))
    if (logClosed || isError ||
        Sys.time() %>% as.numeric %>% subtract(t) >= timeout) {
      if(length(Output$log)>=10)
        Output$log %<>% extract(9:(length(.) -
                                     ifelse(isError,
                                            5,10))) %>%
        Filter(function(x)
          x!='. ' & !grepl(time_stamp,x),
          .) %>%
        sub(rep.int('-',240) %>% paste(collapse=""),
            rep.int('-',68) %>% paste(collapse=""),
            ., fixed=TRUE) else
              Output$log <- ""
            break
    }
    Sys.sleep(.01)
  }
  if (isError)
    Output$error <- Output$log %>%
    tail(1) %>%
    sub('^r\\((\\d*)\\);','\\1',.) %>%
    as.integer %>%
    `class<-`('StataErrorNumber')
  if (nolog) Output$log <- NULL
  if (!is.null(Output$log)) class(Output$log) <- 'StataLog'
  if (import_df && !isError) {
    # Wait for the Stata output df file
    while (!file.exists(outputtsvf) &&
           Sys.time() %>% as.numeric %>% subtract(t) < timeout) {
      Sys.sleep(.01)
    }
    Output$df <- tryCatch(utils::read.delim(outputtsvf, stringsAsFactors=FALSE, check.names=FALSE),
                          error = function(e) NULL, warning = function(w) NULL)
    close(file(outputtsvf))
  }
  
  if (!is.null(results) && !isError) {
    resulttsvf <- path %++% 'resultdf_"CLASS"_' %++% time_stamp %++% '.tsv'
    Output$results <- lapply(results, function(x)
      tryCatch(resulttsvf %>%
                 sub('"CLASS"',x,.,fixed=TRUE) %>%
                 utils::read.delim(stringsAsFactors=FALSE, check.names=FALSE) %>%
                 dfResultsToList %>%
                 `class<-`('StataResults') %>%
                 {if (length(.)==0) NULL else .},
               error = function(e) NULL, warning = function(w) NULL)) %>%
      set_names(results %++% '_class')
  }
  
  if (cleanup) removeFiles(path, time_stamp)
  Output
}

#' Remove Stata task/job sent to Stata by \code{\link[RStataLink]{doInStata}}
#' 
#' This is done if possible (if not yet executed or started).
#' @param StataFuture An object of S3 class 'StataFuture' produced by
#' \code{\link[RStataLink]{doInStata}}, with argument \code{future = TRUE},
#' when a job was sent to Stata.
#' @export
deleteStataFuture <- function(StataFuture) {
  stopifnot(class(StataFuture)=='StataFuture')
  for (n in names(StataFuture))
    assign(n, StataFuture[[n]])  # destructure StataFuture
  removeFiles(path, time_stamp)
}

#' Check if Stata instance is ready to receive a job (running and not busy)
#' @param id An object of S3 class 'StataID' generated by \code{\link[RStataLink]{startStata}}
#' @param timeout Seconds to wait for Stata to respond. Default: 1.
#' Warning: too low value may result in ``false nagative'' i.e. non-busy Stata
#' instance not managing to reply on time; too high value may unnecessairly slow down
#' the execution of your R code -- setting up this parameter optimally may require
#' some trial and error based on your hardware
#' (setting \code{timeout} as low as possible without producing ``false positives'').
#' @return \code{TRUE} or \code{FALSE}
#' @export
isStataReady <- function(id, timeout=1) {
  stopifnot(class(id)=='StataID',
            is.numeric(timeout), timeout>=0)
  code <- c(LETTERS, letters, 0:9) %>%
    sample(10, TRUE) %>%
    paste(collapse="")
  log <- doInStata(id,
                   'di "' %++% code %++% '"',
                   timeout=timeout,
                   results=NULL)$log
  glob2rx('*' %++% code %++% '*') %>%
    grepl(log) %>% any
}

#' Open a Stata instance in a local server-like mode to be used by R through
#' \code{\link[RStataLink]{doInStata}}
#' 
#' A wrapper around Stata ``server'' code
#' @param timeout Seconds to wait for the response/confirmation from Stata. Default: 60.
#' @param start_cmd A string specifying a system command line to start Stata, e.g.: on Windows OS
#' \code{'"C:\\\\Program Files\\\\Stata14\\\\StataMP.exe"'}.
#' Default: the contents of option "statapath" obtined with \code{getOption('statpath')}.
#' @param compath A path to a directory where the R<->Stata information exchange
#' sub-directory should be created. Default: \code{\link{tempdir}()}.
#' @param exit_on_error601 Logical: should Stata be closed when the R--Stata data exchange
#' working directory disappears and Stata ``server'' stops
#' (\href{http://www.stata.com/search.cgi?query=601}{Stata error 601})? Default: \code{FALSE}.
#' @param verify Logical (default: \code{TRUE}). Should R wait for the confirmation that Stata
#' ``server'' has been started. Use it as an escape hack if Stata starts, but the verification
#' cannot be obtained.
#' @return An object of S3 class 'StataID' if Stata confirms that
#' the 'server' is ready, else an error.
#' @export
startStata <- function(timeout=60,
                       start_cmd=getOption('statapath'),
                       compath=tempdir(),
                       exit_on_error601=FALSE,
                       verify=TRUE) {
  stopifnot(is.numeric(timeout), timeout>=0,
            !is.null(start_cmd), is.character(start_cmd), length(start_cmd)==1,
            is.character(compath), length(compath)==1,
            is.logical(exit_on_error601), length(exit_on_error601)==1)
  repeat {
    id <- c(LETTERS, letters, 0:9) %>%
      sample(3, TRUE) %>%
      paste(collapse="")
    path <- compath %++% '/' %++% id
    result <- dir.create(path)
    if (result) break
  }
  stata_server_code <- c(adoFilesCode,
                         stataServerCode) %>%
    paste(collapse='\n') %>%
    multiGsub(list(
      # '<<<lstrfun.ado>>>', ado_path %++% 'lstrfun.ado',
      # 			'<<<mygen.ado>>>', ado_path %++% 'mygen.ado',
      '<<<ID>>>', id,
      '<<<CD>>>', path,
      '<<<exit_on_error601>>>', as.character(exit_on_error601)))
  stata_server_code_file <- tempfile(fileext='.do')
  cat(stata_server_code, file=stata_server_code_file)
  close(file(stata_server_code_file))
  suppressWarnings(system(start_cmd %++% ' do "' %++% stata_server_code_file %++% '"',
                          wait=FALSE,
                          ignore.stdout=TRUE))
  ID <- path
  class(ID) <- 'StataID'
  names(ID) <- id
  attr(ID, 'exit_on_error601') <- exit_on_error601
  if (verify && isStataReady(ID, timeout) %>% not) stop('Starting Stata  failed.') else
    message('Stata "server" started successfully.')
  ID
}

#' Stop (close) Stata instance
#' 
#' A convenience wrapper around Stata code \code{exit, STATA}.
#' @param id An object of S3 class 'StataID' generated by \code{\link[RStataLink]{startStata}}.
#' @param clear Logical. Should the data in Stata memory be discarded
#' (i.e. Stata code \code{exit, STATA clear}). Default: \code{FALSE}.
#' @return \code{NULL} or error if there is unsaved data in Stata memory and
#' \code{clear=FALSE}.
#' @export
stopStata <- function(id, clear=FALSE) {
  stopifnot(class(id)=='StataID',
            is.logical(clear), length(clear)==1)
  doInStata(id, 'exit, STATA ' %++% if (clear) 'clear', timeout=3) %>%
    extract2('log') %>%
    equals('no; data in memory would be lost') %>%
    any -> problem
  if (problem)
    stop('\nStata error:\n no; data in memory would be lost\n r(4);\n',
         'Use argument clear=TRUE in stopStata()\n',
         'if you want to discard the data in Stata memory.\n')
}
alekrutkowski/RStataLink documentation built on March 22, 2023, 2:18 a.m.