R/tools.r

Defines functions examples.grow.list growlist.to.list growlist.add growlist grow.list move.library unique.obs valid.obs int.seq get.top.x.obs describe.var.internal describe.var.logical describe.var.date describe.var.numeric describe.var.integer describe.var.default describe.var describe.data my.help.online my.help examples.my.help word.at.pos parse.expr.and.comments examples.parse.expr.and.comments parse.text parse.text.with.source get.expr.src.lines get.txt.blocks extract.command signif.or.round signif.cols examples.signif.cols examples.print.example print.example examples.str.left.of qlist examples.qlist is.assignment display nlist deparse1 copy.into.envir sc with.random.seed data view.in.pane stop.without.error all.parent.env copy.named.env print.named.env as.named.env copy.task.env print.StudEnv new.task.env is.false is.true quick.df mark_utf8 colored.html copy.non.null.fields copy.into.missing.fields replace.fields substitute.call first.non.null compute.value.index named.list gglines perc has.col

has.col = function(x,col) {
  col %in% names(x)
}

perc = function(val, round.digits=2) {
  paste0(round(val*100,round.digits),"%")
}


gglines = function(data, xvar=colnames(data)[1], yvars=setdiff(colnames(data),xvar), key_col="variable",value_col="value", geom=ggplot2::geom_line(size=1.1)) {
  tidyr::gather_(data,key_col=key_col,value_col=value_col, gather_cols=yvars) %>%
  ggplot2::ggplot(ggplot2::aes_string(x=xvar,y=value_col, group=key_col, color=key_col)) + geom
}

named.list = function(x, names) {
  li = as.list(x)
  names(li) = names
  li
}

mutate_.NULL = select_.NULL = arrange_.NULL = filter_.NULL = function(...) NULL

compute.value.index = function(x, values=unique(x)) {
  ind = rep(NA_integer_, length(x))
  for (val in values) {
    rows = which(x == val)
    ind[rows] = seq_along(rows)
  }
  ind
}

first.non.null = function(...) {
  args = list(...)
  for (arg in args) {
    if (!is.null(arg)) return(arg)
  }
  return(NULL)
}

substitute.call = function (x, env=parent.frame())
{
  call <- substitute(substitute(x, env), list(x = x))
  eval(call)
}
# replaces values in dest and returns list of old values
replace.fields = function(dest, source, empty.obj = "__EmPtYLeEERE___") {
  restore.point("replace.fields")

  fields = names(source)

  exist = intersect(fields, names(dest))

  empty = setdiff(fields,exist)
  empty.list = replicate(n = length(empty),empty.obj,simplify = FALSE)
  names(empty.list) = empty

  if (length(exist)>0) {
    old = c(mget(exist,dest), empty.list)
  } else {
    old = empty.list
  }
  for (name in names(source)) {
    obj = source[[name]]
    if (identical(obj, empty.obj)) {
      if (name %in% exist) rm(list=name,envir=dest)
    } else {
      dest[[name]] = obj
    }
  }
  old
}

copy.into.missing.fields = function(dest, source) {
  restore.point("copy.into.empty.fields")

  new.fields = setdiff(names(source), names(dest))
  dest[new.fields] = source[new.fields]
  dest
}

copy.non.null.fields = function(dest, source, fields=names(source)) {
  restore.point("copy.into.empty.fields")
  copy.fields = fields[!sapply(source[fields], is.null)]

  if (is.environment(dest)) {
    for (field in copy.fields) dest[[field]] = source[[field]]
  } else {
    dest[copy.fields] = source[copy.fields]
  }

  invisible(dest)
}



colored.html = function(txt, color="blue") {
  if (is.null(color)) return(txt)
  paste0("<font color='",color,"'>",txt,"</font>")
}

# mark the encoding of character vectors as UTF-8
mark_utf8 <- function(x) {
  if (is.character(x)) {
    Encoding(x) <- 'UTF-8'
    return(x)
  }
  if (!is.list(x)) return(x)
  attrs <- attributes(x)
  res <- lapply(x, mark_utf8)
  attributes(res) <- attrs
  res
}


# # from and to must be sorted and non-overlapping
# match.intervals = function(x, from, to) {
#   from = c(2,5,10.1); to=c(3,8,12)
#   x = 0:15
#
#   vec = as.numeric(t(cbind(from, to)))
#   int=findInterval(x,vec)
#   int.rev=length(vec)-findInterval(-x,rev(-vec))
#
#   res = pmin(int, int.rev)
#   res[res %% 2 == 0] = NA
#
#   rbind(x,int, int.rev, res)
# }

quick.df = function (...)
{
  df = list(...)
  attr(df, "row.names") <- 1:length(df[[1]])
  attr(df, "class") <- "data.frame"
  df
}


is.true = function(val) {
  if (length(val)==0)
    return(FALSE)
  val[is.na(val)] = FALSE
  return(val)
}
is.false = function(val) {
  if (length(val)==0)
    return(FALSE)
  val[is.na(val)] = TRUE
  return(!val)
}
new.task.env = function(chunk.ind, ps = get.ps()) {
  restore.point("new.task.env")

  task.env = new.env(parent=ps$ps.baseenv)

  #task.env =
  task.env$..chunk.ind <- chunk.ind
  class(task.env) = c("StudEnv",class(task.env))
  #cat("\nnew.")
  #print(task.env)
  #all.parent.env(task.env)
  #all.parent.env(ps$ps.baseenv)

  task.env
}

print.StudEnv = function(task.env,...) {
  cat("task.env chunk", task.env$..chunk.ind,":")
  env = task.env
  class(env) = "environment"
  print(env)
  obj = ls(task.env)
  if (length(obj)>0) {
    cat("  objects: ", paste0(obj, collapse=", "))
  }
}

copy.task.env = function(env, new.chunk.ind=env$..chunk.ind, ps = get.ps()) {
  restore.point("copy.task.env")
  task.env = as.environment(as.list(env, all.names=TRUE))
  parent.env(task.env) <- ps$ps.baseenv

  #all.parent.env(task.env)
  #all.parent.env(ps$ps.baseenv)
  #all.parent.env(globalenv())
  #parent.env(task.env) <- parent.env(globalenv())
  task.env$..chunk.ind = new.chunk.ind
  class(task.env) = c("StudEnv",class(task.env))
  #cat(" copy.task.env: ")
  #print(task.env)
  task.env
}

as.named.env = function(env, name) {
  env$..name <- name
  class(env) = c("named.env", class(env))
  env
}

print.named.env = function(env,...) {
  cat("\n<named environment:", env$..name, ">")
  print(ls(env))
}

copy.named.env = function(env, name = env$..name) {
  as.named.env(as.environment(as.list(ps$task.env, all.names=TRUE)), name)
}


all.parent.env = function(env=globalenv()) {
  if (identical(env,emptyenv()))
    return(NULL)
  penv = parent.env(env)
  c(list(penv), all.parent.env(penv))
}

stop.without.error <- function(...){
  opt <- options(show.error.messages=FALSE)
  on.exit(options(opt))
  display(...)
  stop()
}

view.in.pane = function(html=NULL, markdown=NULL) {
  library(knitr)
  library(markdown)
  #f <- system.file("examples", "knitr-minimal.Rmd", package = "knitr")
  #knit(f)
  htmlFile <- tempfile(fileext=".html")
  if (!is.null(markdown)) {
    markdownToHTML(text=txt,output=htmlFile)
  } else if (!is.null(html)) {
    writeLines(html,htmlFile)
  }
  if (require(rstudio))
    rstudioapi::viewer(htmlFile)
}



#' Overwrite the base function data, copy data by default into the calling environment instead of the global environment
data = function(..., envir = parent.frame()) {
  utils:::data(..., envir=envir)
}

#' Calls a function with a specified random.seed
#' @export
with.random.seed <- function (expr, seed = 1234567890)
{
    old.seed = get(".Random.seed", .GlobalEnv)
    set.seed(seed)
    ret = eval(expr)
    assign(".Random.seed", old.seed, .GlobalEnv)
    runif(1)
    return(ret)
}


#' Like paste0 but returns an empty vector if some string is empty
sc = function(..., sep="", collapse=NULL) {
  str = list(...)
  restore.point("str.combine")
  len = sapply(str,length)
  if (any(len==0))
    return(vector("character",0))
  paste0(...,sep=sep,collapse=collapse)
}

copy.into.envir <- function (source = sys.frame(sys.parent(1)), dest = sys.frame(sys.parent(1)),
    names = NULL, exclude = NULL, overwrite = TRUE, all.names = TRUE, set.fun.env.to.dest = FALSE)
{
    if (is.null(names)) {
        if (is.environment(source)) {
            names = ls(envir = source, all.names = all.names)
        }
        else {
            names = names(source)
        }
    }
    if (!overwrite) {
        exclude = c(exclude, ls(envir = dest))
    }
    names = setdiff(names, exclude)
    if (is.environment(source)) {
      for (na in names) {
        if (set.fun.env.to.dest) {
          tryCatch({
              val <- get(na,envir=source)
              # Set enclosing environment to dest
              if (is.function(val))
                environment(val) <- dest
              assign(na,val,envir=dest)
            }, error = function(e) {
                message(paste("Variable ", na, " was missing."))
          })
        } else {
          tryCatch({
              val <- get(na,envir=source)
              assign(na,val,envir=dest)
            }, error = function(e) {
                message(paste("Variable ", na, " was missing."))
          })
        }
      }
    } else if (is.list(source)) {
        for (na in names) {
            assign(na, source[[na]], envir = dest)
        }
    }
}

deparse1 = function(call, collapse="") {
  paste0(deparse(call, width=500),collapse=collapse)
}

nlist = function (...)
{
    li = list(...)
    li.names = names(li)
    names = unlist(as.list(match.call())[-1])
    if (!is.null(li.names)) {
        no.names = li.names == ""
        names(li)[no.names] = names[no.names]
    }
    else {
        names(li) = names
    }
    li
}

#' Displays the given text
#' @export
display = function (..., collapse = "\n", sep = "", start.char="\n",end.char="\n")
{
    str = paste(start.char, paste(..., collapse = collapse, sep = sep), end.char, sep = "")
    invisible(cat(str))
}

is.assignment = function(call) {
  if (length(call)==1)
    return(FALSE)

  char.op = as.character(call[[1]])
  char.op == "=" | char.op == "<-"
}


# Some tool functions
examples.qlist = function() {
  qlist({x=5;3*x})
}

qlist = function (..., .env = parent.frame())
{
  as.list(match.call()[-1])
  #structure(as.list(match.call()[-1]), env = .env, class = "quoted")
}

examples.str.left.of = function() {
  str.left.of("Hi","i")
  str.left.of("Ha","i")

}

print.example = function(code) {
  cat(paste0("\n",code,"\n"))
  print(eval(parse(text=code,srcfile=NULL)))
}

examples.print.example = function() {
  print.example('rep(c("A","B","C"),length.out = 10)')
}

examples.signif.cols = function() {
  df = data.frame(A="Hi", b=runif(3),c=runif(3)*1000)
  signif.cols(df,3)
}

signif.cols = function(dat,digits=4) {
  li = lapply(dat, function(col) {
    if (is.numeric(col))
      return(signif(col, digits))
    return(col)
  })
  names(li) = colnames(dat)
  do.call("quick.df",li)
}

signif.or.round = function(val, digits=3) {
  if (any(val>10^digits))
    return(round(val))
  return(signif(val,digits))
}



extract.command = function(txt,command) {
  #restore.point("extract.command")
  lines = which(substring(txt,1,nchar(command))==command)
  if (length(lines)==0)
    return(NULL)
  val = str_trim(substring(txt[lines],nchar(command)+1))
  data.frame(line=lines, val=val, stringsAsFactors=FALSE)
}


get.txt.blocks = function(txt, start=NULL, end=NULL, start.with=NULL, end.with=NULL, complements=FALSE, inner = TRUE) {
  restore.point("get.txt.blocks")
  if (!is.null(start))
    start.rows = which(str.trim(txt) == start)
  if (!is.null(start.with))
    start.rows = which(str.starts.with(txt, start.with))
  if (!is.null(end))
    end.rows = which(str.trim(txt) == end)
  if (!is.null(end.with))
    end.rows = which(str.starts.with(txt, end.with))


  if (length(start.rows)==0) {
    if (complements)
      return(list(txt))
    return(list())
  }
  if (!complements) {
    str = lapply(1:length(start.rows), function(i) txt[(start.rows[i]+inner):(end.rows[i]-inner)])
  }
  if (complements) {
    n = length(start.rows)
    new.start.rows = c(1,end.rows+inner)
    end.rows = c(start.rows-inner, length(txt))
    start.rows = new.start.rows
    zero.len = start.rows > end.rows
    start.rows = start.rows[!zero.len]
    end.rows = end.rows[!zero.len]
    if (length(start.rows)==0)
      return(list())
    str = lapply(1:length(start.rows), function(i) txt[(start.rows[i]):(end.rows[i])])

  }
  return(str)


}

get.expr.src.lines = function(expr) {
  sapply(attr(expr,"srcref"), function(e) e[1])
}


example=parse.text.with.source = function() {
  parse.text.with.source("y = 1+2")
}


parse.text.with.source = function(text) {
  restore.point("parse.text.with.source")
  if (is.null(text))
    return(NULL)
  e = base::parse(text=text)
  if (length(e)==0)
    return(NULL)
  str = sapply(attr(e,"srcref"), function(e) paste0(as.character(e), collapse="\n"))

  if (length(str)<length(e)) {
    nstr = sapply(e, deparse1)
    cat("\nparse.text.with.source does not return correct source:\n")
    cat("is:\n")
    cat(paste0(str, collapse="\n"))
    cat("should be:\n")
    cat(paste0(nstr, collapse="\n"))
    str = nstr
  }
  list(expr = e, source = str)
}


parse.text = function(text) {
  restore.point("parse.text")

  if (is.null(text))
    return(NULL)
  parse(text=text,srcfile=NULL)
}


examples.parse.expr.and.comments = function() {
  code = '
  # compute y
  y = 2+1
  # comment for z
  # another comment
  z = "Hi"
  # last comment
  '
  e = parse(text=code)
  parse.expr.and.comments(code)
}



parse.expr.and.comments = function(code, comment.start = "#") {
 if (is.null(code))
   return(list(expr=NULL, comments=NULL))
  code = sep.lines(code,"\n")
  e = parse(text=code)
  er = get.expr.src.lines(e)
  cr = which(str.starts.with(str.trim(code),comment.start))
  c2e = findInterval(cr,er)+1
  i = 2
  comments = lapply(seq_along(er), function(i) {
    rows = cr[which(c2e==i)]
    if (length(rows)==0)
      return(NULL)
    paste0(str.right.of(code[rows], comment.start), collapse="\n")
  })
  list(expr=e, comments=comments)
}

# Find words in the sense of valid function names at the current cursor position
word.at.pos = function(txt, pos) {
  mat.li = str_locate_all(txt,"[A-Za-z0-9._]*")
  i = 1
  li = lapply(seq_along(mat.li), function(i) {
    mat = mat.li[[i]]
    row = which(mat[,1]<=pos[i] & mat[,2]>=pos[i])
    if (length(row)==0)
      return(c(nchar(txt)+1,0))
    mat[row,]
  })
  m = do.call(rbind,li)

  substring(txt, m[,1],m[,2])
}

examples.my.help = function() {
  my.help(topic="mean")
}

my.help = function (topic, package = NULL, lib.loc = NULL, verbose = getOption("verbose"), help_type = "html",...)
{
  restore.point("my.help")
  paths <- utils:::index.search(topic, find.package(loadedNamespaces()))
  paths <- unique(paths)
  rd = utils:::.getHelpFile(paths)
#   library(staticdocs)
#   srd = structure(staticdocs:::set_classes(rd), class = cd("Rd_doc", "Rd"))
#   html = to_html(srd)
  file = tempfile(pattern = "file", tmpdir = tempdir(), fileext = ".html")
  html = tools::Rd2HTML(rd, out=file)
  #writeLines(html, file)
  browseURL(file)
}

my.help.online = function (topic, package = NULL, lib.loc = NULL, verbose = getOption("verbose"), help_type = "html",...)
{
  url = paste0("http://rdocs-staging.herokuapp.com/#", topic)
  browseURL(url)
}

describe.data = function(dt) {
  li = lapply(dt,describe.var)
  li
  rbindlist(li)
  stat = as.data.frame(do.call("rbind",li))
}

describe.var = function(...) {
  UseMethod("describe.var")
}


describe.var.default= function(v, name=NULL, funs = c("valid.obs","unique.obs")) {
  describe.var.internal(v,name,funs)
}
describe.var.integer = function(v, name=NULL, funs = c("valid.obs","unique.obs", "mean","min","max","sd")) {
  describe.var.internal(v,name,funs)
}
describe.var.numeric = function(v, name=NULL, funs = c("valid.obs","unique.obs", "mean","min","max","sd")) {
  describe.var.internal(v,name,funs)
}
describe.var.date= function(v, name=NULL, funs = c("valid.obs","unique.obs", "min","max")) {
  describe.var.internal(v,name,funs)
}
describe.var.logical= function(v, name=NULL, funs = c("valid.obs","unique.obs","mean")) {
  describe.var.internal(v,name,funs)
}


describe.var.internal = function(v, name=NULL, funs = c("valid.obs","unique.obs", "mean","min","max","sd")) {
  #restore.point("describe.var")
  vec = lapply(seq_along(funs), function(i) {
    res = tryCatch(
      suppressWarnings(do.call(funs[[i]],list(v, na.rm=TRUE))),
      error = function(e) NA
    )
    res
  })
  names(vec) = funs
  if (!is.null(name)) {
    c(list(name=name,class=class(vec)[1]),vec)
  } else {
    c(list(class=class(v)[1]),vec)
  }
}




get.top.x.obs = function(v, top.x=5, digits=4) {
  restore.point("get.top.x.obs")
  uv = unique(v)

  qu.df = data_frame(v=v)
  counts.df = summarise(group_by(qu.df,v), counts = n())

  shares = counts.df[["counts"]] / length(v)
  shares = sort(shares, decreasing = TRUE)
  names = counts.df[["v"]]

  #shares = (table(v, useNA="ifany") / length(v))
  #names = as.character(names(shares))



  shares = sort(shares, decreasing = TRUE)
  if (is.numeric(v)) {
    names = as.character(signif(as.numeric(names),digits))
  } else {
    names = as.character(names)
  }
  names[is.na(names)] = "<NA>"
  top.x = min(top.x, length(shares))

  dt = data.frame(var=names[1:top.x], share=as.numeric(shares[1:top.x]))
  dt
}

int.seq = function(from, to) {
  if (from > to)
    return(NULL)
  from:to
}

valid.obs = function (x, na.rm = TRUE)
{
    return(ifelse(na.rm, sum(!is.na(x)), length(x)))
}

unique.obs = function (x, na.rm = TRUE)
{
  length(unique(x))
}

move.library = function(name, pos=2) {
  ns = paste0("package:",name)
  suppressWarnings(detach(ns,character.only=TRUE, force=TRUE))
  attachNamespace(name,pos=pos)
}


grow.list = function(li) {
  c(li, vector("list", length(li)))
}

growlist = function(len=100) {
  g = new.env()
  g$size = 0
  g$li =  vector("list",len)
  g$len = len
  g
}
growlist.add = function(g, el) {
  size = g$size+1
  g$size = size
  if (g$len< size) {
    g$li = c(g$li, vector("list", size))
    g$len = g$len + size
    #cat("increase.size to", g$len," \n")
  }
  g$li[[size]] = el
  return(NULL)
}

growlist.to.list = function(g) {
  g$li[1:g$size]
}


examples.grow.list = function() {


  growlist.madd = function(gli, ...) {
    li = list(...)
    len =  length(li)
    if (length(gli$li)< gli$size+len)
      c(gli$li, vector("list", gli$size+len))
    #gli[(gli$size+1)gli] =
    gli$size = gli$size+length(gli)
  }

  # I did not find an elegant solution for quickly growing a list
  # if its size is not known ex-ante and we do not want to fully unlist it
  # grow list seems so far the best approach
  library(microbenchmark)
  runBenchmark <- function(n) {
      microbenchmark(times = 3,
          growlist = {
            g = growlist(100)
            for(i in 1:n) {
              growlist.add(g, list(i=i))
            }
          },
          grow_list = {
            li = vector("list",10)
            for(i in 1:n) {
              if (length(li)<i) li = grow.list(li)
              li[[i]] = list(i=i)
            }
          },
          prelocate = {
            li = vector("list",n)
            for(i in 1:n) {
              li[[i]] = list(i=i)
            }
          },
          rstack = {
            s = rstack()
            for(i in 1:n) {s = insert_top(s,list(i))}
          },
           c_ = {
              a <- list(0)
              for(i in 1:n) {a = c(a, list(i))}
          },
          list_ = {
              a <- NULL
              for(i in 1:n) {a <- list(a, list(i=i))}
              unlist(a)
          },
          by_index = {
              a <- list(0)
              for(i in 1:n) {a[length(a) + 1] <- i}
              a
          }
       )
  }
  runBenchmark(n = 1000)

  fun = function(g,i) {
    g$li[[i]]<-list(i=i)
  }
  fun2 = function(g,i) {
    size = g$size+1
    g$size = size
    g$li[[g$size]]<-list(i=i)
    len = length(g$li)

  }


  Rprof(tmp <- tempfile())
  n = 100000
  g = growlist(1000)
  for(i in 1:n) {
    growlist.add(g, list(i=i))
  }

  li = vector("list",10)
  for(i in 1:n) {
    if (length(li)<i) li = grow.list(li)
    li[[i]] = list(i=i)
  }
  #growlist.to.list(g)

  Rprof()
  summaryRprof(tmp)
  unlink(tmp)


}
skranz/courserClicker documentation built on Oct. 1, 2018, 3:32 a.m.