R/tools_rtutor.r

call.fun = function(.fun,...) {
  if (is.list(.fun)) {
    .fun = do.call(first.none.null, .fun)
  }
  
  do.call(.fun, list(...))
}

has.col = function(x,col) {
  col %in% names(x)
}
 
perc = function(val, round.digits=2) {
  paste0(round(val*100,round.digits),"%")
}


gglines = function(data, xvar=colnames(data)[1], yvars=setdiff(colnames(data),xvar), key_col="variable",value_col="value", geom=ggplot2::geom_line(size=1.1)) {
  tidyr::gather_(data,key_col=key_col,value_col=value_col, gather_cols=yvars) %>%
  ggplot2::ggplot(ggplot2::aes_string(x=xvar,y=value_col, group=key_col, color=key_col)) + geom
}

named.list = function(x, names) {
  li = as.list(x)
  names(li) = names
  li
}

mutate_.NULL = select_.NULL = arrange_.NULL = filter_.NULL = function(...) NULL

compute.value.index = function(x, values=unique(x)) {
  ind = rep(NA_integer_, length(x))
  for (val in values) {
    rows = which(x == val)
    ind[rows] = seq_along(rows)
  }
  ind
}

first.non.null = function(...) {
  args = list(...)
  for (arg in args) {
    if (!is.null(arg)) return(arg)
  }
  return(NULL)
}

substitute.call = function (x, env=parent.frame()) 
{
  call <- substitute(substitute(x, env), list(x = x))
  eval(call)
}
# replaces values in dest and returns list of old values
replace.fields = function(dest, source, empty.obj = "__EmPtYLeEERE___") {
  restore.point("replace.fields")
  
  fields = names(source)

  exist = intersect(fields, names(dest))
  
  empty = setdiff(fields,exist)
  empty.list = replicate(n = length(empty),empty.obj,simplify = FALSE)
  names(empty.list) = empty
  
  if (length(exist)>0) {
    old = c(mget(exist,dest), empty.list)
  } else {
    old = empty.list
  }
  for (name in names(source)) {
    obj = source[[name]]
    if (identical(obj, empty.obj)) {
      if (name %in% exist) rm(list=name,envir=dest)  
    } else {
      dest[[name]] = obj
    }
  }
  old
}

copy.into.missing.fields = function(dest, source) {
  restore.point("copy.into.empty.fields")

  new.fields = setdiff(names(source), names(dest))
  dest[new.fields] = source[new.fields]
  dest
}

copy.non.null.fields = function(dest, source, fields=names(source)) {
  restore.point("copy.into.empty.fields")
  copy.fields = fields[!sapply(source[fields], is.null)]
  
  if (is.environment(dest)) {
    for (field in copy.fields) dest[[field]] = source[[field]]  
  } else {
    dest[copy.fields] = source[copy.fields]
  }
 
  invisible(dest)
}



colored.html = function(txt, color="blue") {
  if (is.null(color)) return(txt)
  paste0("<font color='",color,"'>",txt,"</font>")
}

# mark the encoding of character vectors as UTF-8
mark_utf8 <- function(x) {
  if (is.character(x)) {
    Encoding(x) <- 'UTF-8'
    return(x)
  }
  if (!is.list(x)) return(x)
  attrs <- attributes(x)
  res <- lapply(x, mark_utf8)
  attributes(res) <- attrs
  res
}


# # from and to must be sorted and non-overlapping
# match.intervals = function(x, from, to) {
#   from = c(2,5,10.1); to=c(3,8,12)
#   x = 0:15
#   
#   vec = as.numeric(t(cbind(from, to)))
#   int=findInterval(x,vec)
#   int.rev=length(vec)-findInterval(-x,rev(-vec))
#   
#   res = pmin(int, int.rev)
#   res[res %% 2 == 0] = NA
#   
#   rbind(x,int, int.rev, res)
# }

quick.df = function (...) 
{
  df = list(...)
  attr(df, "row.names") <- 1:length(df[[1]])
  attr(df, "class") <- "data.frame"
  df
}


is.true = function(val) {
  if (length(val)==0)
    return(FALSE)
  val[is.na(val)] = FALSE
  return(val)
}
is.false = function(val) {
  if (length(val)==0)
    return(FALSE)
  val[is.na(val)] = TRUE  
  return(!val)
}
new.task.env = function(chunk.ind, ps = get.ps()) {
  restore.point("new.task.env")
  
  task.env = new.env(parent=ps$ps.baseenv)
  
  #task.env = 
  task.env$..chunk.ind <- chunk.ind
  class(task.env) = c("StudEnv",class(task.env))
  #cat("\nnew.")
  #print(task.env)
  #all.parent.env(task.env)
  #all.parent.env(ps$ps.baseenv)
  
  task.env
}

print.StudEnv = function(task.env,...) {
  cat("task.env chunk", task.env$..chunk.ind,":")
  env = task.env
  class(env) = "environment"
  print(env)
  obj = ls(task.env)
  if (length(obj)>0) {
    cat("  objects: ", paste0(obj, collapse=", "))
  }
}

copy.task.env = function(env, new.chunk.ind=env$..chunk.ind, ps = get.ps()) {
  restore.point("copy.task.env")
  task.env = as.environment(as.list(env, all.names=TRUE))
  parent.env(task.env) <- ps$ps.baseenv

  #all.parent.env(task.env)
  #all.parent.env(ps$ps.baseenv)
  #all.parent.env(globalenv())
  #parent.env(task.env) <- parent.env(globalenv())
  task.env$..chunk.ind = new.chunk.ind
  class(task.env) = c("StudEnv",class(task.env))
  #cat(" copy.task.env: ")
  #print(task.env)
  task.env
}

as.named.env = function(env, name) {
  env$..name <- name
  class(env) = c("named.env", class(env))
  env
}

print.named.env = function(env,...) {
  cat("\n<named environment:", env$..name, ">")
  print(ls(env))
}

copy.named.env = function(env, name = env$..name) {
  as.named.env(as.environment(as.list(ps$task.env, all.names=TRUE)), name)
}


all.parent.env = function(env=globalenv()) {
  if (identical(env,emptyenv()))
    return(NULL)
  penv = parent.env(env)
  c(list(penv), all.parent.env(penv))
}

stop.without.error <- function(...){ 
  opt <- options(show.error.messages=FALSE) 
  on.exit(options(opt))
  display(...)
  stop() 
} 

view.in.pane = function(html=NULL, markdown=NULL) {
  library(knitr)
  library(markdown)
  #f <- system.file("examples", "knitr-minimal.Rmd", package = "knitr")
  #knit(f)
  htmlFile <- tempfile(fileext=".html")
  if (!is.null(markdown)) {
    markdownToHTML(text=txt,output=htmlFile)
  } else if (!is.null(html)) {
    writeLines(html,htmlFile)
  }
  if (require(rstudio))
    rstudioapi::viewer(htmlFile)
}



#' Overwrite the base function data, copy data by default into the calling environment instead of the global environment
data = function(..., envir = parent.frame()) {
  utils:::data(..., envir=envir)
}

#' Calls a function with a specified random.seed
#' @export
with.random.seed <- function (expr, seed = 1234567890) 
{
    old.seed = get(".Random.seed", .GlobalEnv)
    set.seed(seed)
    ret = eval(expr)
    assign(".Random.seed", old.seed, .GlobalEnv)
    runif(1)
    return(ret)
}


#' Like paste0 but returns an empty vector if some string is empty
sc = function(..., sep="", collapse=NULL) {
  str = list(...)
  restore.point("str.combine")
  len = sapply(str,length)
  if (any(len==0))
    return(vector("character",0))
  paste0(...,sep=sep,collapse=collapse)
}

copy.into.envir <- function (source = sys.frame(sys.parent(1)), dest = sys.frame(sys.parent(1)), 
    names = NULL, exclude = NULL, overwrite = TRUE, all.names = TRUE, set.fun.env.to.dest = FALSE) 
{
    if (is.null(names)) {
        if (is.environment(source)) {
            names = ls(envir = source, all.names = all.names)
        }
        else {
            names = names(source)
        }
    }
    if (!overwrite) {
        exclude = c(exclude, ls(envir = dest))
    }
    names = setdiff(names, exclude)
    if (is.environment(source)) {
      for (na in names) {
        if (set.fun.env.to.dest) {
          tryCatch({
              val <- get(na,envir=source)
              # Set enclosing environment to dest
              if (is.function(val))
                environment(val) <- dest
              assign(na,val,envir=dest)
            }, error = function(e) {
                message(paste("Variable ", na, " was missing."))
          })                      
        } else {
          tryCatch({
              val <- get(na,envir=source)
              assign(na,val,envir=dest)
            }, error = function(e) {
                message(paste("Variable ", na, " was missing."))
          })          
        }
      }
    } else if (is.list(source)) {
        for (na in names) {
            assign(na, source[[na]], envir = dest)
        }
    }
}

deparse1 = function(call, collapse="") {
  paste0(deparse(call, width=500),collapse=collapse)
}

nlist = function (...) 
{
    li = list(...)
    li.names = names(li)
    names = unlist(as.list(match.call())[-1])
    if (!is.null(li.names)) {
        no.names = li.names == ""
        names(li)[no.names] = names[no.names]
    }
    else {
        names(li) = names
    }
    li
}

#' Displays the given text
#' @export
display = function (..., collapse = "\n", sep = "", start.char="\n",end.char="\n") 
{
    str = paste(start.char, paste(..., collapse = collapse, sep = sep), end.char, sep = "")
    invisible(cat(str))
}

is.assignment = function(call) {
  if (length(call)==1)
    return(FALSE)
  
  char.op = as.character(call[[1]])
  char.op == "=" | char.op == "<-"
}


# Some tool functions
examples.qlist = function() {
  qlist({x=5;3*x})
}

qlist = function (..., .env = parent.frame()) 
{
  as.list(match.call()[-1])
  #structure(as.list(match.call()[-1]), env = .env, class = "quoted")
}

examples.str.left.of = function() {
  str.left.of("Hi","i")
  str.left.of("Ha","i")

}

print.example = function(code) {
  cat(paste0("\n",code,"\n"))
  print(eval(parse(text=code,srcfile=NULL)))
}

examples.print.example = function() {
  print.example('rep(c("A","B","C"),length.out = 10)')
}

examples.signif.cols = function() {
  df = data.frame(A="Hi", b=runif(3),c=runif(3)*1000)
  signif.cols(df,3)
}

signif.cols = function(dat,digits=4) {
  li = lapply(dat, function(col) {
    if (is.numeric(col))
      return(signif(col, digits))
    return(col)
  })
  names(li) = colnames(dat)
  do.call("quick.df",li)
}

signif.or.round = function(val, digits=3) {
  if (any(val>10^digits))
    return(round(val))
  return(signif(val,digits))
}



extract.command = function(txt,command) {
  #restore.point("extract.command")
  lines = which(substring(txt,1,nchar(command))==command)
  if (length(lines)==0)
    return(NULL)
  val = str_trim(substring(txt[lines],nchar(command)+1))
  data.frame(line=lines, val=val, stringsAsFactors=FALSE)
} 


get.txt.blocks = function(txt, start=NULL, end=NULL, start.with=NULL, end.with=NULL, complements=FALSE, inner = TRUE) {
  restore.point("get.txt.blocks")
  if (!is.null(start))
    start.rows = which(str.trim(txt) == start)
  if (!is.null(start.with))
    start.rows = which(str.starts.with(txt, start.with))
  if (!is.null(end))
    end.rows = which(str.trim(txt) == end)
  if (!is.null(end.with))
    end.rows = which(str.starts.with(txt, end.with))
  

  if (length(start.rows)==0) {
    if (complements)
      return(list(txt))
    return(list())
  }
  if (!complements) {  
    str = lapply(1:length(start.rows), function(i) txt[(start.rows[i]+inner):(end.rows[i]-inner)])
  }
  if (complements) {
    n = length(start.rows)
    new.start.rows = c(1,end.rows+inner)
    end.rows = c(start.rows-inner, length(txt))
    start.rows = new.start.rows
    zero.len = start.rows > end.rows
    start.rows = start.rows[!zero.len]
    end.rows = end.rows[!zero.len]
    if (length(start.rows)==0)
      return(list())
    str = lapply(1:length(start.rows), function(i) txt[(start.rows[i]):(end.rows[i])])

  }
  return(str)

  
}

get.expr.src.lines = function(expr) {
  sapply(attr(expr,"srcref"), function(e) e[1])
}


example=parse.text.with.source = function() {
  parse.text.with.source("y = 1+2")
}


parse.text.with.source = function(text) {
  restore.point("parse.text.with.source")
  if (is.null(text))
    return(NULL)
  e = base::parse(text=text)
  if (length(e)==0)
    return(NULL)
  str = sapply(attr(e,"srcref"), function(e) paste0(as.character(e), collapse="\n"))
  
  if (length(str)<length(e)) {
    nstr = sapply(e, deparse1)
    cat("\nparse.text.with.source does not return correct source:\n")
    cat("is:\n")
    cat(paste0(str, collapse="\n"))
    cat("should be:\n")
    cat(paste0(nstr, collapse="\n"))
    str = nstr
  }
  list(expr = e, source = str) 
}


parse.text = function(text) {
  restore.point("parse.text")
  
  if (is.null(text))
    return(NULL)
  parse(text=text,srcfile=NULL)
}


examples.parse.expr.and.comments = function() {
  code = '
  # compute y
  y = 2+1
  # comment for z
  # another comment
  z = "Hi"
  # last comment
  '
  e = parse(text=code)
  parse.expr.and.comments(code)
}



parse.expr.and.comments = function(code, comment.start = "#") {
 if (is.null(code))
   return(list(expr=NULL, comments=NULL))
  code = sep.lines(code,"\n")
  e = parse(text=code)
  er = get.expr.src.lines(e)
  cr = which(str.starts.with(str.trim(code),comment.start))
  c2e = findInterval(cr,er)+1
  i = 2
  comments = lapply(seq_along(er), function(i) {
    rows = cr[which(c2e==i)]
    if (length(rows)==0)
      return(NULL)
    paste0(str.right.of(code[rows], comment.start), collapse="\n")
  })
  list(expr=e, comments=comments)
}

# Find words in the sense of valid function names at the current cursor position
word.at.pos = function(txt, pos) {
  mat.li = str_locate_all(txt,"[A-Za-z0-9._]*")
  i = 1
  li = lapply(seq_along(mat.li), function(i) {
    mat = mat.li[[i]]
    row = which(mat[,1]<=pos[i] & mat[,2]>=pos[i])
    if (length(row)==0)
      return(c(nchar(txt)+1,0))
    mat[row,]
  })
  m = do.call(rbind,li)  

  substring(txt, m[,1],m[,2])
}

examples.my.help = function() {
  my.help(topic="mean")
}

my.help = function (topic, package = NULL, lib.loc = NULL, verbose = getOption("verbose"), help_type = "html",...) 
{
  restore.point("my.help")
  paths <- utils:::index.search(topic, find.package(loadedNamespaces()))  
  paths <- unique(paths)
  rd = utils:::.getHelpFile(paths)
#   library(staticdocs)
#   srd = structure(staticdocs:::set_classes(rd), class = cd("Rd_doc", "Rd"))
#   html = to_html(srd)
  file = tempfile(pattern = "file", tmpdir = tempdir(), fileext = ".html")
  html = tools::Rd2HTML(rd, out=file)
  #writeLines(html, file)
  browseURL(file)
}

my.help.online = function (topic, package = NULL, lib.loc = NULL, verbose = getOption("verbose"), help_type = "html",...) 
{
  url = paste0("http://rdocs-staging.herokuapp.com/#", topic)
  browseURL(url)
}

describe.data = function(dt) {
  li = lapply(dt,describe.var)
  li
  rbindlist(li)
  stat = as.data.frame(do.call("rbind",li))
}

describe.var = function(...) {
  UseMethod("describe.var")
}


describe.var.default= function(v, name=NULL, funs = c("valid.obs","unique.obs")) {
  describe.var.internal(v,name,funs)
}
describe.var.integer = function(v, name=NULL, funs = c("valid.obs","unique.obs", "mean","min","max","sd")) {
  describe.var.internal(v,name,funs)
}
describe.var.numeric = function(v, name=NULL, funs = c("valid.obs","unique.obs", "mean","min","max","sd")) {
  describe.var.internal(v,name,funs)
}
describe.var.date= function(v, name=NULL, funs = c("valid.obs","unique.obs", "min","max")) {
  describe.var.internal(v,name,funs)
}
describe.var.logical= function(v, name=NULL, funs = c("valid.obs","unique.obs","mean")) {
  describe.var.internal(v,name,funs)
}


describe.var.internal = function(v, name=NULL, funs = c("valid.obs","unique.obs", "mean","min","max","sd")) {
  #restore.point("describe.var")
  vec = lapply(seq_along(funs), function(i) {
    res = tryCatch(
      suppressWarnings(do.call(funs[[i]],list(v, na.rm=TRUE))),
      error = function(e) NA
    )
    res
  })
  names(vec) = funs
  if (!is.null(name)) {
    c(list(name=name,class=class(vec)[1]),vec)
  } else {
    c(list(class=class(v)[1]),vec)
  }
}




get.top.x.obs = function(v, top.x=5, digits=4) {
  restore.point("get.top.x.obs")
  uv = unique(v)
  
  qu.df = data_frame(v=v)
  counts.df = summarise(group_by(qu.df,v), counts = n())
  
  shares = counts.df[["counts"]] / length(v)
  shares = sort(shares, decreasing = TRUE)
  names = counts.df[["v"]]
  
  #shares = (table(v, useNA="ifany") / length(v))
  #names = as.character(names(shares))
  
  
  
  shares = sort(shares, decreasing = TRUE)
  if (is.numeric(v)) {
    names = as.character(signif(as.numeric(names),digits))
  } else {
    names = as.character(names)
  }
  names[is.na(names)] = "<NA>"
  top.x = min(top.x, length(shares))
  
  dt = data.frame(var=names[1:top.x], share=as.numeric(shares[1:top.x]))
  dt
}

int.seq = function(from, to) {
  if (from > to)
    return(NULL)
  from:to
}

valid.obs = function (x, na.rm = TRUE) 
{
    return(ifelse(na.rm, sum(!is.na(x)), length(x)))
}

unique.obs = function (x, na.rm = TRUE) 
{
  length(unique(x))
}

move.library = function(name, pos=2) {
  ns = paste0("package:",name)
  suppressWarnings(detach(ns,character.only=TRUE, force=TRUE))
  attachNamespace(name,pos=pos)
}


grow.list = function(li) {
  c(li, vector("list", length(li)))
}

growlist = function(len=100) {
  g = new.env()
  g$size = 0
  g$li =  vector("list",len)
  g$len = len
  g
}
growlist.add = function(g, el) {
  size = g$size+1
  g$size = size
  if (g$len< size) {
    g$li = c(g$li, vector("list", size))
    g$len = g$len + size
    #cat("increase.size to", g$len," \n")
  }
  g$li[[size]] = el 
  return(NULL)
}

growlist.to.list = function(g) {
  g$li[1:g$size]
}


examples.grow.list = function() {
  
  
  growlist.madd = function(gli, ...) {
    li = list(...)
    len =  length(li)
    if (length(gli$li)< gli$size+len)
      c(gli$li, vector("list", gli$size+len))
    #gli[(gli$size+1)gli] = 
    gli$size = gli$size+length(gli)
  } 
  
  # I did not find an elegant solution for quickly growing a list
  # if its size is not known ex-ante and we do not want to fully unlist it
  # grow list seems so far the best approach
  library(microbenchmark)
  runBenchmark <- function(n) {
      microbenchmark(times = 3,
          growlist = {
            g = growlist(100)
            for(i in 1:n) {
              growlist.add(g, list(i=i))
            }
          },        
          grow_list = {
            li = vector("list",10)
            for(i in 1:n) {
              if (length(li)<i) li = grow.list(li)
              li[[i]] = list(i=i) 
            }
          },
          prelocate = {
            li = vector("list",n)
            for(i in 1:n) {
              li[[i]] = list(i=i) 
            }
          },
          rstack = {
            s = rstack()
            for(i in 1:n) {s = insert_top(s,list(i))}
          },
           c_ = {
              a <- list(0)
              for(i in 1:n) {a = c(a, list(i))}
          },
          list_ = {
              a <- NULL
              for(i in 1:n) {a <- list(a, list(i=i))}
              unlist(a)
          },
          by_index = {
              a <- list(0)
              for(i in 1:n) {a[length(a) + 1] <- i}
              a
          }
       )
  }
  runBenchmark(n = 1000)
  
  fun = function(g,i) {
    g$li[[i]]<-list(i=i)
  }
  fun2 = function(g,i) {
    size = g$size+1
    g$size = size
    g$li[[g$size]]<-list(i=i)
    len = length(g$li)

  }
  

  Rprof(tmp <- tempfile())
  n = 100000
  g = growlist(1000)
  for(i in 1:n) {
    growlist.add(g, list(i=i))
  }

  li = vector("list",10)
  for(i in 1:n) {
    if (length(li)<i) li = grow.list(li)
    li[[i]] = list(i=i) 
  }
  #growlist.to.list(g)
  
  Rprof()
  summaryRprof(tmp)
  unlink(tmp)
    

}
skranz/RTutor2 documentation built on May 30, 2019, 2:01 a.m.