R/Misc.R

Defines functions readline_time waitForKey libinstandload simple_rapply `%!in%` LazyAnd LazyOr Lazyifelse checkMasking out .out parent.nenv duplicated

Documented in checkMasking duplicated LazyAnd Lazyifelse LazyOr libinstandload out parent.nenv readline_time simple_rapply waitForKey

# Script for helper-functions, as used on EmilMisc-package
# readline_time(prompt, timeout = 3600, precision=.1): Readline with waiting limit ----
#' Readline with waiting limit
#'
#' Normally, when using readline, execution is halted until input is supplied, and if it is not, the script hangs forever
#' This functions waits a set time for the user to provide input, then returns control
#'
#' @param prompt prompt for the user, similar to the one in readline(). Usually ends with a space (or newline).
#' @param timeout timer in seconds. If set to 0, base::readline is used
#' @param precision Polling interval in seconds. \cr
#' Internally, every \code{precision} seconds the script checks if valid input is received.
#' Smaller values increase responsiveness, but also increase CPU-usage.
#' @return Input provided by the user, if provided in time (and finished with RETURN)\cr
#' If the timer has expired, \code{character(0)}, even if the user has started typing but has not yet pressed RETURN.
#' If used in a non-interactive session, NULL, and if the timer has expired, character(0).\cr
#' If timeout==0, the same as provided by readline()
#' @note Unfortunately, does not work on all platforms/in all environments.\cr
#' It is dependent on being able to read from file('stdin'), which is different from stdin(), see \code{\link[base:connections]{file}} for details.
#' In particular, it does not work in RStudio for MacOS, or Rgui for Windows or MacOS.
#' It does confirmed work on R run from terminal on MacOS 10.13.6\cr
#' Problems manifest by file('stdin') not being connected to anything, i.e. no input is received, so this function always returns \code{character(0)},
#' and any input typed is interpreted as a new command. To test, try with a small value of timeout first.
#' @seealso \code{\link{waitForKey}} for a function that simply waits for a keypress for a set time.
#' @export
readline_time <- function(prompt, timeout = 3600, precision=.1) {
  stopifnot(length(prompt)<=1,
            is.numeric(timeout),
            length(timeout)==1,
            !is.na(timeout),
            timeout>=0,
            is.numeric(precision),
            length(precision)==1,
            !is.na(precision),
            precision>0)
  if(!interactive()) return(NULL)
  if(timeout==0) return(readline(prompt))
  timer <- timeout
  my_in <- file('stdin')
  open(my_in, blocking=FALSE)
  cat(prompt)
  ans <- readLines(my_in, n=1)
  while(timer>0 && !length(ans)) {
    Sys.sleep(precision)
    timer <- timer-precision
    ans <- readLines(my_in, n=1)
  }
  close(my_in)
  return(ans)
}


# waitForKey(message, time=10, counter=.5, precision=.01): Wait for set time, or continue on input. ----
#' Wait for set time, or continue on input.
#'
#' Waits until a timer has expired, or the user has provided some input (pressed RETURN)\cr
#' A message is shown, with a countdown-timer if desired.
#' @param message Message to be shown during countdown. Use {n} as a placeholder for the number of seconds left.
#' @param time Time to wait, in seconds (approximate)
#' @param counter Interval to update the message, in seconds. 0 to remain static. Must otherwise be a multiple of precision to work reliably.
#' @param precision Polling interval to use when checking for a keypress.
#' @return (invisibly) Either the string 'key' or 'timer', signifying what caused the return
#' @note Unfortunately, does not work on all platforms/in all environments.\cr
#' It is dependent on being able to read from file('stdin'), which is different from stdin(), see \code{\link[base:connections]{file}} for details.
#' In particular, it does not work in RStudio for MacOS, or Rgui for Windows or MacOS.
#' It does confirmed work on R run from terminal on MacOS 10.13.6\cr
#' Problems manifest by file('stdin') not being connected to anything, i.e. no input is received, so this function always returns \code{character(0)},
#' and any input typed is interpreted as a new command. To test, try with a small value of time first.
#' @seealso \code{\link{readline_time}} for a function that also collects your input.
#' @export
waitForKey <- function(message='Continuing in {n} seconds, or press any key.', time=10, counter=.5, precision=.01) {
  stopifnot(is.character(message), length(message)==1, !is.na(message),
            is.numeric(time), length(time)==1, !is.na(time), time>0,
            is.numeric(counter), length(counter)==1, !is.na(counter), counter>=0,
            is.numeric(precision), length(precision)==1, !is.na(precision), precision>0)
  my_in <- file('stdin')
  open(my_in, blocking=FALSE)
  ans <- readLines(my_in, n=1)
  if(counter==0) cat(message)
  while(time>0 && !length(ans)) {
    if(counter>0 && !is.null(message) && round(time/counter, digits = 3) %% 1L==0L) {
      cat(gsub('\\{n\\}', format(round(time, digits=4), width=5, scientific = F), message), '\r', sep = '')
    }
    Sys.sleep(precision)
    time <- time-precision
    ans <- readLines(my_in, n=1)
  }
  close(my_in)
  if(length(ans)) {
    return(invisible('key'))
  } else {
    return(invisible('timer'))
  }
}


# libinstandload(): Library Auto-installation and loading ----
#' Library Auto-installation and loading
#'
#' One combined function to load (multiple) packages, and automatically install them if needed
#' @param ... list of packages to install
#' @param order logical specifying whether packages should be loaded in order. If TRUE, first argument is loaded first (so comes last in search())
#'     if FALSE, R tries to load packages first, then installs any misses, so new packages are on top. FALSE is slightly faster
#' @param verbose logical, whether to print messages from require and install.package
#' @param extras logical. Some packages need extra initialization, e.g. extrafonts needs to install fonts. Should extra initializations be done on installation?\cr
#'     Note that initiliazation scripts are added on an ad-hoc basis, and is not complete. In this first version. it is only implemented for 'extrafont'
#' @export
libinstandload <- function(..., order=FALSE, verbose=FALSE, extras=TRUE) {
  packs <- list(...)
  if(!all(rapply(packs, class)=='character')) stop('Unexpected arguments, all args in ... should be character or list of characters')
  packs <- unlist(packs)
  if(extras) {
    if('extrafont' %in% packs) {
      if(length(tryCatch(find.package('extrafont'), error=function(e) {character()}))==0) {
        loadfonts <- importfonts <- TRUE
      } else if(!'package:extrafont' %in% search()) {
        importfonts <- FALSE
        loadfonts <- TRUE
      } else {
        loadfonts <- importfonts <- FALSE
      }
    }
  }
  if(order) {
    for(p in packs) {
      if(verbose && !require(p, character.only = TRUE)) {
        utils::install.packages(p)
        library(p, character.only=TRUE)
      } else if(!verbose && !suppressMessages(suppressWarnings(require(p, character.only = TRUE,quietly = TRUE)))) {
        suppressMessages(utils::install.packages(p))
        suppressMessages(library(p, character.only = T))
      }
    }
  } else {
    if(verbose) {
      install <- !sapply(packs, require, character.only=TRUE)
      if(any(install)) {
        utils::install.packages(unlist(packs[install]))
        sapply(packs[install], library, character.only=TRUE)
      }
    } else {
      install <- suppressMessages(suppressWarnings(!sapply(packs, require, character.only=TRUE, quietly=TRUE)))
      if(any(install)) {
        suppressMessages(utils::install.packages(unlist(packs[install])))
        suppressMessages(sapply(packs[install], library, character.only=TRUE))
      }
    }
  }
  if(extras) {
    if('extrafont' %in% packs) {
      if(importfonts) extrafont::font_import(prompt=verbose)
      if(loadfonts) {
        if(verbose) {
          loadfonts()
          loadfonts(device='postscript')
        } else {
          suppressMessages(loadfonts())
          suppressMessages(loadfonts(device='postscript'))
        }
      }
    }
  }
  return(invisible(0))
}

# simple_rapply(x, fn, ..., classes='ANY', inclLists='No'): Alternative for rapply ----
#' Alternative for rapply
#'
#' Function to be used as an alternative for rapply (recursive version of lapply)\cr
#' It differs in two important aspects:
#' \enumerate{
#' \item It handles NULL as a value, instead of seeing it as an empty list. See examples for comparison
#' \item If you set inclLists to TRUE, it also applies fn over the lists itself
#' }
#'
#' Further note that this implementation is comparable to running rapply with how='replace'
#' @param x Any object, but usually a list (if not a list, the result is identical to calling fn(x, ...))
#' @param fn A function to apply over the elements of x
#' @param ... Additional arguments passed on to fn
#' @param classes A character vector of class names, or "ANY" to match any class. Other classes are returned unmodified
#' @param inclLists Should fn be also applied to the lists themselves? see 'Examples'.
#' Can be FALSE, 'No', 'First', 'Last' or TRUE. First and Last apply fn to the list themselves either before or after applying to the elements themselves.
#' TRUE and FALSE are for backward compatibility, and are casted to 'No' and 'First', with a warning
#' @return a list with the same structure as x, with fn applied over the elements
#' @seealso rapply
#' @examples
#' L <- list(l=list(m=4, o=NULL, n=3), p=list(NULL), q=c(t=6, r=4,s=5), k=1)
#' rapply(L, is.null, how='replace')
#' any(unlist(rapply(L, is.null, how='replace')))
#' simple_rapply(L, is.null)
#' any(unlist(rapply(L, is.null)))
#'
#' rapply(L, function(x) {if(!is.null(names(x))) x[order(names(x))] else x}, how='replace')
#' simple_rapply(L, function(x) {if(!is.null(names(x))) x[order(names(x))] else x})
#' simple_rapply(L, function(x) {if(!is.null(names(x))) x[order(names(x))] else x}, inclLists=TRUE)
#' @export
simple_rapply <- function(x, fn, ..., classes='ANY', inclLists='No') {
  if(is.logical(inclLists)) {
    inclLists <- ifelse(inclLists, 'First', 'Last')
    warning('Argument inclLists is logical, which is deprecated. Use "No", "First" or "Last" instead.')
  }
  if(length(fn)>1 || length(inclLists)>1 || inclLists %!in% c('No','First','Last')) stop('Bad arguments')
  if(is.list(x))
  {
    if(inclLists=='First') x <- fn(x, ...)
    if(inclLists=='Last') {
      x <- lapply(x, simple_rapply, fn, ..., classes=classes, inclLists=inclLists) # Don't sapply this, return value must be consistent (thus list)
      return(fn(x,...))
    } else {
      return(lapply(x, simple_rapply, fn, ..., classes=classes, inclLists=inclLists)) # Don't sapply this, return value must be consistent (thus list)
    }
  } else {
    if(classes=='ANY' || class(x) %in% classes) {
      fn(x, ...)
    } else {
      x
    }
  }
}
# %!in%: Value not matching ----
#' Value not matching
#'
#' Simple function, element x is not in y.\cr
#' \code{x \%!in\% y} is the same as \code{!x \link[base:match]{\%in\%} y}\cr
#' Just implemented because I've found myself having to go back too often.
#'
#' @param x,y Used as in !(x \%in\% y)
#' @seealso \code{\link[base:match]{\%in\%}}
#'
#' @name not-in
#' @rdname not-in
#'
#' @export
`%!in%` <- function(x,y)!('%in%'(x,y))

# LazyAnd(a, b, fun, ...) and LazyOr: Lazy (short-circuited), vectorized &/&& and |/|| ----
#' Lazy (short-circuited), vectorized &/&& and |/||
#'
#' In base R, & is vectorized, and && is short-circuited (also called lazy evaluation).
#' LazyAnd combines these two: a is evaluated, and then fun(b) is called for the positions where a evaluates to TRUE (or NA), to decide a & b.
#' The advantage is that it's possible to check for valid inputs and use these inputs in one go, or that it's possible to only call an expensive function when needed.\cr\cr
#' LazyOr is analogous, fun(b) only called if a is FALSE or NA
#'
#' @param a A logical
#' @param b A vector equal to length of a (unless length(a)==1), which is used if a evaluates to TRUE or NA (for LazyAnd), or if a evaluates to FALSE or NA (for LazyOr)
#' @param fun Function to be called. Note that this function needs fun(b[x]) to be independent of fun(b[y])
#' @param ... Additional arguments to be passed on to fun
#'
#' @return
#' LazyAnd: \code{a & fun(b)}, but fun(b) is only called for those b[x] where \code{a[x]==TRUE || is.na(a[x])}\cr\cr
#' LazyOr: \code{a | fun(b)}, but fun(b) is only called for those b[x] where \code{a[x]==FALSE || is.na(a[x])}
#'
#' @section Warning:
#' Note that this may produce unexpected results if elements of fun(b) are not independent of each other, e.g. calling: \cr
#' \code{
#' nums <- 1:10
#' nums \%\% 2==0 & cumsum(nums) \%\% 2==0
#' }\cr\cr
#' and\cr\cr
#' \code{
#' LazyAnd(nums \%\% 2==0, nums, function(x) {cumsum(x) \%\% 2==0})
#' }\cr\cr
#' gives different results, as \code{cumsum(1:10[c(2,4,6,8,10)]}) is called, instead of \code{cumsum(1:10)[c(2,4,6,8,10)]}, which produces different results
#'
#' @examples
#' # A function to check evenness, but who prints an alert if the value is more then 10
#' input <- data.frame(valid=c(TRUE,TRUE,TRUE,TRUE,FALSE,FALSE),
#' value=c('1','12',2,'3','huh',14), stringsAsFactors = FALSE)
#' fun <- function(x) {
#'   if(any(as.numeric(x)>10))
#'     cat('Numbers over 10 (unexpected):',as.numeric(x)[as.numeric(x)>10], '')
#'   return(as.numeric(x) %% 2==0)
#' }
#' cat("\nAnd in total we have",sum(input$valid & fun(input$value)),"even numbers")
#' cat("\nWith LazyAnd we have in total:",sum(LazyAnd(input$valid, input$value, fun)),"even numbers")
#'
#' # Example where calling a function for all possible values would be possible,
#' # but (prohibitively) expensive
#' set.seed(4)
#' # This function may be very expensive, so we don't want to check all numbers
#' is.prime <- function(n) n == 2L || all(n %% 2L:max(2,floor(sqrt(n))) != 0)
#' n <- floor(runif(1e4, min=0, max=.5)^(-4))
#' surely_prime <- LazyAnd(n<1e10, n, sapply, FUN=is.prime)
#'
#' # The difference between this call and
#' \dontrun{sapply(n, is.prime) # Don't try this at home!}
#' # is getting results for 62 more occurences (with low probability of being prime,
#' # probably 2-3 of them are prime), at the cost of a LOT of resources.
#'
#' @export
LazyAnd <- function(a, b, fun, ...) {
  stopifnot(length(a)==1 || length(a)==length(b))
  a[is.na(a)|a] <- a[is.na(a)|a] & fun(b[is.na(a)|a], ...)
  return(a)
}
#' @rdname LazyAnd
#' @export
LazyOr <- function(a, b, fun, ...) {
  stopifnot(length(a)==1 || length(a)==length(b))
  a[is.na(a)|!a] <- a[is.na(a)|!a] | fun(b[is.na(a)|!a], ...)
  return(a)
}
# Lazyifelse(test, yFun, yIn, nFun, nIn): Lazy version of ifelse ----
#' Lazy version of ifelse
#'
#' When using ifelse, the return values are evaluated for all values, then discarded.
#' This causes problems when you want to check for valid inputs in test.
#' In this function, values are only evaluated when necessary:
#' yFun is called with yIn for test==TRUE, and
#' nFun is called with nIn for test==FALSE.
#' Missing (NA) values in test return NA, without calling either yFun or nFun
#' @param test an object which can be coerced to logical
#' @param yFun,nFun Functions to be called when test evaluates to TRUE or FALSE respectively
#' @param yIn,nIn Arguments for yFun and nFun. Recycled if necessary
#' @details If you just want to return values, you can use the function \code{\link[base]{identity}}
#' @examples
#' stats <- list(a=NA,
#'               b=list(time="1:00", speed=100))
#' besttimes <- Lazyifelse(is.na(stats), identity, NA, function(x) {sapply(x, `[[`, i='time')}, stats)
#' @export

Lazyifelse <- function(test, yFun, yIn, nFun, nIn) {
  out <- rep(NA, times=length(test))
  if(length(test) %% length(yIn)!=0) warning('yIn recycled partially (test not a multiple of yIn')
  if(length(test) %% length(nIn)!=0) warning('nIn recycled partially (test not a multiple of nIn')
  yIn <- rep(yIn, length.out=length(test))
  nIn <- rep(nIn, length.out=length(test))
  out[!is.na(test) & test] <- yFun(yIn[!is.na(test) & test])
  out[!is.na(test) & !test] <- nFun(nIn[!is.na(test) & !test])
  return(out)
}

# checkMasking(...): Check for possible masking problems in saved .R-files ----
#' Check for possible masking problems in saved .R-files
#'
#' When loading libraries, the function library warns if functions are masked, but it's often not clear if these functions are used in a script.\cr
#' This function reads the files named in "scripts" as texfiles, and scans for function-names that have possibly been masked, and returns a list of potential problems, along with line numbers./cr
#' Call this script after all relevant libraries have been loaded.\cr
#' It's not perfect, as the pattern matching to decide what is potentially a function-call and what is not is problematic\cr
#' Excluded are:\cr
#' \itemize{
#' \item Content after a "#"
#' \item Content between single or double quotes
#' \item Argument names in function calls: \code{fun(n=3, o=4)} will not warn for 'n' or 'o'
#' \item Field names: Identifiers preceded by "$"
#' \item Functions that have been explicitly assigned from a package at some place in the script: count <- plyr::count causes all mentions of count to be ignored.\cr
#'     Note that it's not checked WHERE this assignment takes place, nor if it takes place at all (e.g. in a conditional statement)
#' \item Functions of class 'standardGeneric', as they are meant to be overwritten.\cr
#'     Note the script still has some difficulties with this, as multiple methods in multiple packages may be defined, only seeming to mask each other
#'}
#' Furthermore, you can specify names that can be ignored (e.g. the name 'n' from dplyr::n gives lots of false positives if you assign n in .GlobalEnv)\cr
#' See also 'allowed' in the arguments
#' Checking only takes place for \emph{functions}, not for data-objects
#'
#' @param scripts A character vector with filenames to check
#' @param allowed A data.frame with often used functions, along with the package you intend to use with that, or "any".\cr
#'     Names in this data.frame are ignored in any scripts. Instead, if an environment is specified, the functions checks whether these names are not masked from the given environment.\cr
#'     For examples, you can look at getOption('checkMasking_Allowed'), the default value. If this option is unset when loading this library, it is set to some standard value, you can use these values as example.
#' @param extrascripts Another character vector with filenames, appended to scripts. The reason there are 2 is in order to specify your own script, to go along with standardscripts.\cr
#'     The value 'self' is treated differently: it tells the script to extract its own source code. This may fail if it is not available, in this case try \code{install.packages(..., type='source')} or \code{options(keep.source=TRUE)}
#' @param functions list of functions or function names for which source code should be checked.
#' @param packages list of packages for which all functions should be checked
#' @return 0 (invisibly) if no potentional problems have been found, otherwise a nested list with information.\cr
#'     The exact structure of the return may be changed in future versions, so is not meant to be automatically processed,\cr
#'     except for the check for a return of 0 for "no problems"\cr
#'     Right now, the structure is a list with elements for each script that has been processed (empty if no problems were found in that particular script, but if there were problems in other scripts)\cr
#'     These elements are list, with elements for each line with a problem, being character vectors with a line number, the contents of that line, and the name that triggered it's potential problem.\cr
#'
#' @details
#' This function makes use of the global options "checkMasking_Allowed" and checkMasking_extraScripts:\cr
#' See for information regarding checkMasking_Allowed at the "allowed" parameter.\cr
#'     If this library is loaded, and this option is unset, it is set to a default value.\cr
#'     If you don't want any values to be allowed, use options(checkMasking_Allowed='')\cr
#' The option checkMasking_extraScripts is useful if you have some code that is used over and over.\cr
#' If you want to check function from all loaded libraries, you can use\cr
#' \code{checkMasking(packages=search())}#'
#'
#' This function may fail to notice problems if it doens't recognize function-names well. Some pitfalls are:
#' \itemize{
#' \item Identifiers with non-standard characters, as the function splits up lines in 'words' by word-boundaries as used by the regular expression ('\\b')
#' \item Escaped characters may confuse the regexes.\cr
#'         E.g: paste('I wouldn\'t have thought this would cause',duplicateFunction(),'problems, I really didn\'t!') will not trigger an alert, because eveything between paired single quotes is ignored, even though some are escaped.\cr
#'         In this case, the script drops 'I wouldn' and ',duplicateFunction(),' and 't!'.\cr
#'         Or #'s as part of a string, as everything after # is ignored: gsub('#','',duplicateFunction(text)) passes.\cr
#' \item Libraries that have not yet been loaded are not checked! Advise is to specify all needed libraries first, then run 'checkMasking'.
#' \item Functions having their own environments may end up calling other functions then those first in the search path. This may cause both false positives and false negatives
#' \item If a function is marked \code{\link[base:Deprecated]{.Deprecated}} or \code{\link[base:Defunct]{.Defunct}}, an attempt is made to let it pass. \cr
#'         The script looks for the "new" argument in .Deprecated or .Defunct. If there is none, no package is provided, or there is
#'         a replacement with a different name, the script lets it pass. If a package is provided, the script checks whether this package
#'         is higher on the search-list. \cr
#'         And the script of course only allows these functions to \emph{be} masked, not to mask others.
#'}
#' @export

checkMasking <- function(scripts=c(), allowed=getOption('checkMasking_Allowed'), extrascripts=c(getOption('checkMasking_extraScripts')), functions=c(), packages=c('own')) {
  if(is.null(allowed) || is.na(allowed) || allowed=='') allowed <- data.frame()
  allls <- sapply(search(), function(x) {as.character(utils::lsf.str(pos=x))})
  allls <- data.frame(name=unlist(allls, use.names = FALSE),
                      env=as.factor(unlist(sapply(names(allls), function(x) {rep(x, times=length(allls[[x]]))}))),
                      stringsAsFactors = FALSE, row.names = c())
  dupl <- allls[duplicated(allls$name) | duplicated(allls$name, fromLast = TRUE),]
  dupl <- dupl[!apply(dupl, 1, function(x) {class(get(x['name'], pos=x['env']))}) %in% c('standardGeneric'),]
  dupl <- dupl[!apply(dupl, 1, function(du) {
    ga <- utils::getAnywhere(du['name'])
    return(ga$dups[ga$where==du['env']])
  }),] # Identical according to getAnywhere
  dupl <- dupl[!apply(dupl, 1, function(x) {
    if(x[['name']]=='future_lapply') browser()
    if(x[['name']]=='checkMasking') return(TRUE)
    src <- utils::capture.output(get(x['name'], pos=x['env']))
    defp <- src[grepl('^ *(\\.Defunct)|(\\.Deprecated)', src)]
    if(!length(defp)) return(FALSE)
    defp <- defp[[1]]
    prs <- parse(text=defp)
    if(is.null(names(prs[[1]]))) {
      newfun <- prs[[1]][[2]]
    } else {
      newfun <- prs[[1]][-1][names(prs[[1]][-1])=='new'][[1]]
      if(is.null(newfun)) newfun <- prs[[1]][-1][names(prs[[1]][-1])==''][[1]]
    }
    if(!is.null(newfun) && !is.character(newfun) || length(newfun)>1) stop('A function calls .Defunct or .Deprecated, without clear arguments')
    if(!is.null(newfun)) newfun <- strsplit(newfun, split='::')[[1]]
    if(length(newfun)<2 || newfun[[2]]!=x[['name']]) {
      return(paste0(x, collapse='') %in% do.call(paste0, dupl[duplicated(dupl$name),]))
    } else {
      return(paste0('package:',newfun[[1]]) %in% search()[1:which(search()==x[['env']])])
    }
  }),] # Filter .Defunct and .Deprecated
  allowed <- allowed$name[allowed$env=='any' | apply(allowed, 1, function(x) {
    any(dupl$name[!duplicated(dupl$name)]==x['name'] & dupl$env[!duplicated(dupl$name)]==x['env'])
  })]
  dupl <- dupl[!dupl$name %in% allowed,]
  dupl <- dupl[duplicated(dupl$name) | duplicated(dupl$name, fromLast = TRUE),]
  if(nrow(dupl)==0) return(invisible(0))
  dupl$env <- sub('package:','',fixed=TRUE, dupl$env)
  packages <- lapply(packages, function(p) {
    if(p=='own') {
      environment()
    } else if(p %in% search()) {
      p
    } else if(paste0('package:',p) %in% search()) {
      paste0('package:',p)
    } else {
      warning('Package ',p,' not found, continuing with other code')
      return(NULL)
    }
  })
  tocheck <- list(stats::setNames(scripts,if(length(scripts)) paste0('scripts:',scripts)),
                  stats::setNames(extrascripts,if(length(extrascripts)) paste0('extrascripts:',extrascripts)),
                  stats::setNames(lapply(functions[sapply(functions, mode)!='function'], get, mode = 'function'),
                           if(any(sapply(functions, mode)!='function'))
                             paste0('functionnames:', functions[sapply(functions, mode)!='function'])),
                  stats::setNames(list(functions[sapply(functions, mode)=='function']),
                           if(any(sapply(functions, mode)=='function'))
                             paste0('directfunction nr ',1:sum(sapply(functions, mode)=='function')) else NULL),
                  rapply(packages, how='unlist', function(p) {lapply(as.character(utils::lsf.str(p)), function(f) {
                    stats::setNames(list(get(f, pos=p)),paste0(p,':',f))})}))
  tocheck <- unlist(tocheck)
  mentions <- lapply(tocheck, function(f) {
    if(mode(f)!='function' && (is.null(f) || is.na(f) || length(f)==0 || f=='')) return(list())
    if(mode(f)=='function') {
      lines <- utils::capture.output(f)
      if(!is.null(environment(f)))
        allowed <- c(allowed, as.character(utils::lsf.str(environment(f))))
    } else if(f=='self') {
      lines <- utils::capture.output(checkMasking)
      allowed <- c(allowed, as.character(utils::lsf.str(environment(checkMasking))), as.character(utils::lsf.str(environment())))
    } else {
      lines <- readLines(f)
    }
    names(lines) <- 1:length(lines)
    # Explanation of gsub:
    # Everything after # is ignored (comments): '(#.*$)'
    # Content between single or double quotes is ignored: '(\'.*?\')' and '(".*?")'
    # Names of arguments passed to functions are ignored, that is, anything of the form "(name=" for first arguments, or ", name=" for seconds:
    # "(((, ?)|\\()[a-z]+=[^=])"
    # Field names, everything directly after a $ sign: '(\\$[A-Za-z0-9_\\.]+)'
    lines <- gsub('(#.*$)|(\'.*?\')|(".*?")|(((, ?)|\\()[a-z]+=[^=])|(\\$[A-Za-z0-9_\\.]+)','', lines)
    lines <- lines[lines!='']
    # Add to the list of allowed names, anything that is declared somewhere.
    # So if somewhere in a script there is a line containing 'count <- plyr::count', then 'count' is allowed
    # Note that this assignment has local scope, so this is only valid for the script being evaluated
    allowed <- unique(c(allowed, dupl$name[sapply(dupl$name, function(x) {
      isTRUE(grep(x, lines)[1]==grep(paste0(x,' ?<- ?[a-zA-Z]+::',x), lines)[1])
    })]))
    if(all(dupl$name %in% allowed)) return()
    # And now we can remove any mentions of duplicate functions that are explicitly called
    lines <- gsub('[a-z]+::`?[a-z]+`?','', lines)
    # We use 2 ways of searching: as a regular expression with word-boundaries for 'normal' function names, and simpler approach for "complicated" functionnames
    # So looking for \\bduplicateFunction\\b, this is stored in regexes
    # Not in one step because paste ('',character(0))  is recyclecasted to paste0('','')
    regexes <- dupl$name[grepl('^[a-z]*$', dupl$name)&!dupl$name %in% allowed]
    if(length(regexes)) regexes <- unique(paste0('\\b',regexes,'\\b'))
    # And second, a list of not-so-regular-expressions, e.g. looking for masking of '+' or `<-`. Not clear where word-boundaries are, so just using fixed grep
    fixed <- unique(dupl$name[!grepl('^[a-z]*$', dupl$name)&!dupl$name %in% allowed])
    lines <- lines[sapply(lines, function(l) {any(sapply(regexes, grepl, x=l),
                                                  sapply(fixed, grepl, x=l, fixed=TRUE))})]
    if(length(lines)>0) {
      # Find out which regex triggered a TRUE return for each line.
      # Note that if multiple functions/regexes on one line triggered a return, only the first one is considered
      if(length(regexes)) {
        fault <- gsub('^\\\\b|\\\\b$','',
                      regexes[sapply(lines, function(l) {which(sapply(regexes, grepl, x=l))[1]})])
      } else {
        fault <- rep(NA,length(lines))
      }
      # If return is NA, this means the return was triggered by one of the fixed, not-so-regular-expressions
      if(any(is.na(fault))) {
        fault[is.na(fault)] <- gsub('^\\\\b|\\\\b$','',
                                    fixed[sapply(lines[is.na(fault)], function(l) {which(sapply(fixed, grepl, x=l, fixed=TRUE))[1]})])
      }
      return(data.frame(linenumber=names(lines), line=lines, maskedName=fault))
    } else {
      return()
    }
  })
  if(!all(sapply(mentions, length)==0)) {
    return(mentions[sapply(mentions, length)>0])
  } else {
    return(invisible(0))
  }
}

# out(...): printing and simultaneously writing to a logfile ----
#' Customary function for printing output and simultaneously writing to a logfile
#'
#' Character vectors are printed using cat instead of print.\cr
#' Multiple arguments are accepted, as seperate calls\cr
#' Any output is first evaluated (in its entirety), then printed to files, finally to console
#'
#' @param ... Objects to print/write to file
#' @param logpath Filename(s) of file(s) to print to, beside console. NULL if you just want to print to console.
#'
#' @export

out <- function(..., logpath=getOption('StandardPaths')[['TextOutput']]) {
  args <- list(...)
  while(length(args)>0) {
    chunk <- args[[1]]
    args <- args[-1]
    while(length(args)>0 && class(args[[1]])==class(chunk[[1]]) && class(chunk[[1]])=='character') {
      chunk <- c(chunk, args[[1]])
      args <- args[-1]
    }
    .out(chunk, logpath)
  }

}
.out <- function(text, filepath) {
  tempwidth <- getOption('width')
  for(filename in filepath) {
    sink(filename, append=TRUE)
    options(width=200)
    if(class(text)=='character') {
      cat(text,'\n')
    } else {
      print(text)
    }
    sink()
  }
  options(width=tempwidth)
  if(class(text)=='character') {
    cat(text,'\n')
  } else {
    print(text)
  }
}

# parent.nenv(env, n): Get parent, grandparent, grandgrandparent, etc. ----
#' Access environments further in the definition chain
#'
#' Get the parent, grandparent, grandgrandparent of an environment
#' @param env an environment
#' @param n how many steps to go up the chain, >= 0
#'
#' @return An enclosing environment
#'
#' @examples
#' myfun <- function(x) {
#'   myfun2 <- function(x2) {
#'     x+x2
#'   }
#'   x <- x+1
#'   return(myfun2)
#' }
#' parent.env(environment(myfun(1))) # That is myfuns environment
#' parent.env(parent.env(environment(myfun(1))))
#'   # This is the environment in which myfun was defined, normally .GlobalEnv
#' parent.nenv(environment(myfun(1)), 2) # The same call, but clearer
#'
#' @export
parent.nenv <- function(env, n) {
  stopifnot(is.environment(env), is.numeric(n), n>=0)
  if(n==0) {
    return(env)
  } else {
    return(parent.env(parent.nenv(env, n-1)))
  }
}
# Duplicated with fromLast=NA working for both directions ----
#' Determine duplicate elements, possibly in both directions
#' @param x a vector or a data frame or an array or NULL.
#' @param incomparables a vector of values that cannot be compared. FALSE is a special value, meaning that all values can be compared, and may be the only value accepted for methods other than the default. It will be coerced internally to the same type as x.
#' @param fromLast logical indicating if duplication should be considered from the reverse side (TRUE), the start (FALSE), or from both sides (NA)
#' @param ... arguments for particular methods
#'
#' @details
#' In the case of \code{fromLast} either \code{TRUE} or \code{FALSE}, \code{\link[base:duplicated]{base::duplicated}} is called directly.
#' Only when it's \code{NA}, comparison is done from both sides
#'
#' @export
duplicated <- function(x, incomparables=FALSE, fromLast=FALSE, ...) {
  if(is.na(fromLast)) {
    return(base::duplicated(x, incomparables, fromLast = FALSE, ...) |
           base::duplicated(x, incomparables, fromLast = TRUE, ...))
  } else {
    base::duplicated(x, incomparables, fromLast, ...)
  }
}
# More functions ----
EmilBode/EmilMisc documentation built on Feb. 24, 2020, 4:11 p.m.