R/bruceR_utils.R

Defines functions LOOKUP restore_labelled export import file_ext set.wd dtime RGB formatF formatN df_to_html print_table fct_rev as_factor capitalize rep_char cc_m_ci cc_ci cc Run sprintf_transformer Glue Print pkg_install_suggested pkg_depend installed `%partin%` `%nonein%` `%anyin%` `%allin%` `%^%`

Documented in cc dtime export formatF formatN Glue import LOOKUP pkg_depend Print print_table rep_char RGB Run set.wd

#### Pipeline Functions ####


#' Paste strings together.
#'
#' Paste strings together. A wrapper of [`paste0()`][base::paste0].
#' Why `%^%`? Because typing `%` and `^` is pretty easy by
#' pressing **Shift + 5 + 6 + 5**.
#'
#' @param x,y Any objects, usually a numeric or character string or vector.
#'
#' @return
#' A character string/vector of the pasted values.
#'
#' @examples
#' "He" %^% "llo"
#' "X" %^% 1:10
#' "Q" %^% 1:5 %^% letters[1:5]
#'
#' @export
`%^%` = function(x, y) {
  paste0(x, y)
}


## The opposite of `%in%`.
##
## @param x Numeric or character vector.
## @param vector Numeric or character vector.
##
## @return
## A vector of `TRUE` or `FALSE`.
##
## @seealso
## [`%in%`][base::match]
##
## @examples
## data = data.table(ID=1:10, X=sample(1:10, 10))
## data
## data[ID %notin% c(1, 3, 5, 7, 9)]
##
## @export
# `%notin%` = function(x, vector) {
#   match(x, vector, nomatch=0L) == 0L
# }


#' @importFrom data.table %notin%
#' @export
data.table::`%notin%`


#' A simple extension of `%in%`.
#'
#' @param x Numeric or character vector.
#' @param vector Numeric or character vector.
#'
#' @return
#' `TRUE` or `FALSE`.
#'
#' @seealso
#' [`%in%`][base::match]
#'
#' [`%anyin%`]
#'
#' [`%nonein%`]
#'
#' [`%partin%`]
#'
#' @examples
#' 1:2 %allin% 1:3  # TRUE
#' 3:4 %allin% 1:3  # FALSE
#'
#' @export
`%allin%` = function(x, vector) {
  all(x %in% vector)
}


#' A simple extension of `%in%`.
#'
#' @inheritParams %allin%
#'
#' @return
#' `TRUE` or `FALSE`.
#'
#' @seealso
#' [`%in%`][base::match]
#'
#' [`%allin%`]
#'
#' [`%nonein%`]
#'
#' [`%partin%`]
#'
#' @examples
#' 3:4 %anyin% 1:3  # TRUE
#' 4:5 %anyin% 1:3  # FALSE
#'
#' @export
`%anyin%` = function(x, vector) {
  any(x %in% vector)
}


#' A simple extension of `%in%`.
#'
#' @inheritParams %allin%
#'
#' @return
#' `TRUE` or `FALSE`.
#'
#' @seealso
#' [`%in%`][base::match]
#'
#' [`%allin%`]
#'
#' [`%anyin%`]
#'
#' [`%partin%`]
#'
#' @examples
#' 3:4 %nonein% 1:3  # FALSE
#' 4:5 %nonein% 1:3  # TRUE
#'
#' @export
`%nonein%` = function(x, vector) {
  !any(x %in% vector)
}


#' A simple extension of `%in%`.
#'
#' @param pattern Character string containing **regular expressions** to be matched.
#' @param vector Character vector.
#'
#' @return
#' `TRUE` or `FALSE`.
#'
#' @seealso
#' [`%in%`][base::match]
#'
#' [`%allin%`]
#'
#' [`%anyin%`]
#'
#' [`%nonein%`]
#'
#' @examples
#' "Bei" %partin% c("Beijing", "Shanghai")  # TRUE
#' "bei" %partin% c("Beijing", "Shanghai")  # FALSE
#' "[aeiou]ng" %partin% c("Beijing", "Shanghai")  # TRUE
#'
#' @export
`%partin%` = function(pattern, vector) {
  any(grepl(pattern, vector, perl=TRUE))
}


#### Basic Functions ####


installed = function(pkg) {
  if(pkg %notin% utils::installed.packages())
    utils::install.packages(pkg)
}


#' Check dependencies of R packages.
#'
#' @param pkgs Package(s).
#' @param excludes \[Optional\] Package(s) and their dependencies excluded from the dependencies of `pkgs`.
#' Useful if you want to see the unique dependencies of `pkgs`.
#'
#' @return
#' A character vector of package names.
#'
#' @export
pkg_depend = function(pkgs, excludes=NULL) {
  default.pkgs = c("base", "boot", "class",
                   "cluster", "codetools", "compiler",
                   "datasets", "foreign", "graphics",
                   "grDevices", "grid", "KernSmooth",
                   "lattice", "MASS", "Matrix",
                   "methods", "mgcv", "nlme",
                   "nnet", "parallel", "rpart",
                   "spatial", "splines", "stats",
                   "stats4", "survival", "tcltk",
                   "tools", "translations", "utils")
  exclude.pkgs = default.pkgs
  for(ex in excludes)
    exclude.pkgs = union(exclude.pkgs, unlist(tools::package_dependencies(ex, recursive=TRUE)))
  for(pkg in pkgs)
    pkgs = union(pkgs, unlist(tools::package_dependencies(pkg, recursive=TRUE)))
  return(sort(setdiff(pkgs, exclude.pkgs)))
}


pkg_install_suggested = function(by) {
  if(missing(by)) {
    pkgs.suggests = "
    rstudioapi, devtools, pacman,
    tidyverse, ggstatsplot, jmv,
    dplyr, tidyr, stringr, forcats, data.table,
    rio, haven, foreign, readxl, openxlsx, clipr,
    tibble, plyr, glue, crayon,
    emmeans, effectsize, performance,
    pwr, simr, MASS, sampling, careless,
    irr, correlation, corpcor, corrplot,
    afex, car, psych, lmtest, nnet,
    lme4, lmerTest, multilevel, r2mlm, MuMIn,
    metafor, meta, metaSEM, metapower,
    mediation, interactions, JSmediation,
    lavaan, lavaanPlot, semPlot, processR,
    jtools, reghelper, summarytools, texreg,
    sjstats, sjPlot, apaTables,
    forecast, vars, pls, plm, AER,
    TOSTER, BEST, BayesFactor, brms,
    mlr, caret, party, randomForest, e1071, varImp,
    downloader, rvest, RCurl, RSelenium, mailR, jiebaR,
    ggplot2, ggtext, cowplot, see,
    ggrepel, ggeffects, ggsignif, ggridges, ggthemes,
    ggbreak, ggplotify, ggExtra, GGally, wordcloud2,
    patchwork, showtext"
    cat("\n")
    Print(pkgs.suggests)
    cat("\n")
    yesno = utils::menu(title="All these R packages will be installed. Do it now?",
                        choices=c("Yes", "No"))
    if(yesno==1)
      pkgs.suggests = cc(pkgs.suggests)
    else
      return(invisible())
  } else {
    pkgs.suggests = pacman::p_depends(by, character.only=TRUE, local=TRUE)$Suggests
  }
  pkgs.installed = pacman::p_library()
  pkgs.need.install = base::setdiff(pkgs.suggests, pkgs.installed)
  if(length(pkgs.need.install)>0) {
    utils::install.packages(pkgs.need.install)
  } else {
    if(missing(by))
      Print("<<green Done!>>")
    else
      Print("<<green All packages suggested by `{by}` have been installed!>>")
  }
}


#' Print strings with rich formats and colors.
#'
#' @describeIn Print
#' Paste and print strings.
#'
#' @description
#' Frustrated with [print()] and [cat()]? Try this!
#' Run examples to see what it can do.
#'
#' @details
#' Possible formats/colors that can be used in `"<< >>"` include:
#'
#' (1) bold, italic, underline, reset, blurred, inverse, hidden, strikethrough;
#'
#' (2) black, white, silver, red, green, blue, yellow, cyan, magenta;
#'
#' (3) bgBlack, bgWhite, bgRed, bgGreen, bgBlue, bgYellow, bgCyan, bgMagenta.
#'
#' See more details in [glue::glue()] and [glue::glue_col()].
#'
#' @param ... Character strings enclosed by `"{ }"` will be evaluated as R code.
#'
#' Character strings enclosed by `"<< >>"` will be printed as formatted and colored text.
#'
#' Long strings are broken by line and concatenated together.
#'
#' Leading whitespace and blank lines from the first and last lines are automatically trimmed.
#'
#' @return
#' Formatted text.
#'
#' @examples
#' name = "Bruce"
#' Print("My name is <<underline <<bold {name}>>>>.
#'        <<bold <<blue Pi = {pi:.15}.>>>>
#'        <<italic <<green 1 + 1 = {1 + 1}.>>>>
#'        sqrt({x}) = <<red {sqrt(x):.3}>>", x=10)
#'
#' @export
Print = function(...) {
  tryCatch({
    output = glue(..., .transformer=sprintf_transformer, .envir=parent.frame())
    output_color = glue_col( gsub("<<", "{", gsub(">>", "}", output)) )
    print(output_color)
  }, error = function(e) {
    warning(e)
    print(...)
  })
}


#' @describeIn Print
#' Paste strings.
#' @export
Glue = function(...) {
  output = glue(..., .transformer=sprintf_transformer, .envir=parent.frame())
  output_color = glue_col( gsub("<<", "{", gsub(">>", "}", output)) )
  return(output_color)
}


glue = glue::glue
glue_col = glue::glue_col

bold = crayon::bold
italic = crayon::italic
underline = crayon::underline
reset = crayon::reset
blurred = crayon::blurred
inverse = crayon::inverse
hidden = crayon::hidden
strikethrough = crayon::strikethrough

black = crayon::black
white = crayon::white
silver = crayon::silver
red = crayon::red
green = crayon::green
blue = crayon::blue
yellow = crayon::yellow
cyan = crayon::cyan
magenta = crayon::magenta


sprintf_transformer = function(text, envir) {
  text = glue(text, .envir=envir)
  m = regexpr(":.+$", text)
  if(m!=-1) {
    format = substring(regmatches(text, m), 2)
    regmatches(text, m) = ""
    res = eval(parse(text=text, keep.source=FALSE), envir)
    do.call(sprintf, list(glue("%{format}f"), res))
  } else {
    eval(parse(text=text, keep.source=FALSE), envir)
  }
}


#' Run code parsed from text.
#'
#' @param ... Character string(s) to run. You can use `"{ }"` to insert any R object in the environment.
#' @param silent Suppress error/warning messages. Defaults to `FALSE`.
#'
#' @return
#' Invisibly return the running expression(s).
#'
#' @examples
#' Run("a=1", "b=2")
#' Run("print({a+b})")
#'
#' @export
Run = function(..., silent=FALSE) {
  text = glue(..., .sep="\n", .envir=parent.frame())
  if(silent) {
    suppressWarnings({
      eval(parse(text=text), envir=parent.frame())
    })
  } else {
    eval(parse(text=text), envir=parent.frame())
  }
  invisible(text)
}


#' Split up a string (with separators) into a character vector.
#'
#' Split up a string (with separators) into a character vector
#' (whitespace around separator is trimmed).
#'
#' @param ... Character string(s).
#' @param sep Pattern for separation.
#' Defaults to `"auto"`:
#' `,` `;` `|` `\n` `\t`
#' @param trim Remove whitespace from start and end of string(s)?
#' Defaults to `TRUE`.
#'
#' @return
#' Character vector.
#'
#' @examples
#' cc("a,b,c,d,e")
#'
#' cc(" a , b , c , d , e ")
#'
#' cc(" a , b , c , d , e ", trim=FALSE)
#'
#' cc("1, 2, 3, 4, 5")
#'
#' cc("A 1 , B 2 ; C 3 | D 4 \t E 5")
#'
#' cc("A, B, C",
#'    " D | E ",
#'    c("F", "G"))
#'
#' cc("
#' American
#' British
#' Chinese
#' ")
#'
#' @export
cc = function(..., sep="auto", trim=TRUE) {
  dots = list(...)
  x = paste(sapply(dots, function(i) paste(i, collapse=",")), collapse=",")
  x = ifelse(trim, str_trim(x), x)
  sep = ifelse(sep=="auto",
               ifelse(trim, "\\s*[,;\\|\\n\\t]\\s*", "[,;\\|\\n\\t]"),
               sep)
  as.character(str_split(x, sep, simplify=TRUE))
}


cc_ci = function(llci, ulci, digits) {
  paste0("[",
         formatF(llci, digits), ", ",
         formatF(ulci, digits), "]")
}


cc_m_ci = function(mean, llci, ulci, digits) {
  paste0(formatF(mean, digits), " [",
         formatF(llci, digits), ", ",
         formatF(ulci, digits), "]")
}


#' Repeat a character string for many times and paste them up.
#'
#' @param char Character string.
#' @param rep.times Times for repeat.
#'
#' @return
#' Character string.
#'
#' @examples
#' rep_char("a", 5)
#'
#' @export
rep_char = function(char, rep.times) {
  paste(rep(char, times=rep.times), collapse="")
}


## Capitalize the first letter of a string.
capitalize = function(string) {
  capped = grep("^[A-Z]", string, invert=TRUE)
  substr(string[capped], 1, 1) = toupper(substr(string[capped], 1, 1))
  return(string)
}


## forcats::as_factor()
as_factor = function(x) {
  factor(x, levels=unique(x))
}


## forcats::fct_rev()
fct_rev = function(f) {
  factor(f, levels=rev(levels(f)))
}


#' Print a three-line table (to R Console and Microsoft Word).
#'
#' This basic function prints any data frame as a three-line table to either R Console or Microsoft Word (.doc). It has been used in many other functions of `bruceR` (see below).
#'
#' @param x Matrix, data.frame (or data.table), or any model object (e.g., `lm`, `glm`, `lmer`, `glmer`, ...).
#' @param digits Numeric vector specifying the number of decimal places of output. Defaults to `3`.
#' @param nspaces Number of whitespaces between columns. Defaults to `1`.
#' @param row.names,col.names Print row/column names. Defaults to `TRUE` (column names are always printed). To modify the names, you can use a character vector with the same length as the raw names.
#' @param title Title text, which will be inserted in `<p></p>` (HTML code).
#' @param note Note text, which will be inserted in `<p></p>` (HTML code).
#' @param append Other contents, which will be appended in the end (HTML code).
#' @param line Lines looks like true line (`TRUE`) or `=== --- ===` (`FALSE`).
#' @param file File name of MS Word (`.doc`).
#' @param file.align.head,file.align.text Alignment of table head or table text: `"left"`, `"right"`, `"center"`. Either one value of them OR a character vector of mixed values with the same length as the table columns. Default alignment (if set as `"auto"`): left, right, right, ..., right.
#'
#' @return
#' Invisibly return a list of data frame and HTML code.
#'
#' @examples
#' print_table(data.frame(x=1))
#'
#' print_table(airquality, file="airquality.doc")
#' unlink("airquality.doc")  # delete file for code check
#'
#' model = lm(Temp ~ Month + Day + Wind + Solar.R, data=airquality)
#' print_table(model)
#' print_table(model, file="model.doc")
#' unlink("model.doc")  # delete file for code check
#'
#' @export
print_table = function(
    x, digits=3,
    nspaces=1,
    row.names=TRUE,
    col.names=TRUE,
    title="", note="", append="",
    line=TRUE,
    file=NULL,
    file.align.head="auto",
    file.align.text="auto"
) {
  ## Preprocess data.frame ##
  if(!inherits(x, c("matrix", "data.frame", "data.table"))) {
    coef.table = coef(summary(x))
    if(!is.null(coef.table)) x = coef.table
  }
  x = as.data.frame(x)
  sig = NULL
  if(length(digits)==1) digits = rep(digits, length(x))
  for(j in 1:length(x)) {
    if(inherits(x[,j], "factor"))
      x[,j] = as.character(x[,j])
    if(grepl("Pr\\(|pval|p.value|<i>p</i>", names(x)[j])) {
      sig = formatF(sig.trans(x[,j]), 0)  # formatF will make * left-aligned
      if(grepl("<i>p</i>", names(x)[j])==FALSE)
        names(x)[j] = "p"
      x[,j] = p.trans(x[,j])
    } else {
      x[,j] = formatF(x[,j], digits[j])
    }
    if(grepl("^S\\.E\\.$|^Std\\. Error$|^se$|^SE$|^BootSE$", names(x)[j])) {
      x[,j] = paste0("(", x[,j], ")")  # add ( ) to S.E.
      x[grepl("\\d", x[,j])==FALSE, j] = ""  # remove ( ) from blank S.E.
      if(grepl("S\\.E\\.", names(x)[j])==FALSE) names(x)[j] = "S.E."
    }
    if(grepl("^S\\.D\\.$|^Std\\. Deviation$", names(x)[j])) {
      x[,j] = paste0("(", x[,j], ")")  # add ( ) to S.D.
      x[grepl("\\d", x[,j])==FALSE, j] = ""  # remove ( ) from blank S.E.
      if(grepl("S\\.D\\.", names(x)[j])==FALSE) names(x)[j] = "S.D."
    }
    # if(grepl("\\[", names(x)[j])) x[,j] = paste0("[", x[,j], ",")
    # if(grepl("\\]", names(x)[j])) x[,j] = paste0(x[,j], "]")
    # if(grepl("^[Ee]stimate$", names(x)[j])) names(x)[j] = "Coef."
    names(x)[j] = gsub(" value$|val$", "", names(x)[j])
  }
  if(is.null(sig)==FALSE & "sig" %notin% names(x)) {
    p.pos = which(names(x) %in% c("p", "<i>p</i>"))
    nvars = length(names(x))
    if(p.pos<nvars)
      x = cbind(x[1:p.pos], ` ` = sig, x[(p.pos+1):nvars])
    else
      x = cbind(x, ` ` = sig)
    x$` ` = as.character(x$` `)
  }

  if(inherits(row.names, "character")) {
    row.names(x) = row.names
    row.names = TRUE
  }
  if(inherits(col.names, "character")) {
    names(x) = col.names
    col.names = TRUE
  }

  ## Compute length to generate line-chars ##
  linechar = ifelse(line, "\u2500", "-")
  title.length = nchar(names(x), type="width")
  vars.length = c()  # bug: vars.length = apply(apply(x, 2, nchar), 2, max)
  for(j in 1:length(x)) vars.length[j] = max(nchar(x[,j], type="width"))
  n.lines = apply(rbind(title.length, vars.length), 2, max)+nspaces
  n.lines.rn = max(nchar(row.names(x), type="width"))+nspaces
  if(row.names)
    table.line = rep_char(linechar, sum(n.lines)+n.lines.rn)
  else
    table.line = rep_char(linechar, sum(n.lines))

  ## Output ##
  if(is.null(file)) {
    if(title!="") Print(title)
    Print(table.line)
    if(row.names)
      cat(rep_char(" ", n.lines.rn))
    for(j in 1:length(x)) {
      name.j = names(x)[j]
      cat(rep_char(" ", n.lines[j]-nchar(name.j, type="width")) %^% name.j)
    }
    cat("\n")
    Print(table.line)
    for(i in 1:nrow(x)) {
      if(row.names) {
        row.name.i = row.names(x)[i]
        cat(row.name.i %^% rep_char(" ", n.lines.rn-nchar(row.name.i, type="width")))
      }
      for(j in 1:length(x)) {
        # cat(sprintf(glue("% {n.lines[j]}s"), ifelse(is.na(xr[i,j]) | grepl("NA$", xr[i,j]), "", xr[i,j])))
        x.ij = ifelse(is.na(x[i,j]) | grepl("NA$", x[i,j]), "", x[i,j])
        cat(rep_char(" ", n.lines[j]-nchar(x.ij, type="width")) %^% x.ij)
      }
      cat("\n")
    }
    Print(table.line)
    if(note!="") Print(note)
  }
  if(row.names) {
    x = cbind(rn=row.names(x), x)
    names(x)[1] = ""
  }
  if(!is.null(file)) {
    html = df_to_html(x, title=title, note=note, append=append,
                      file=file,
                      align.head=file.align.head,
                      align.text=file.align.text)
  } else {
    html = NULL
  }

  invisible(list(df=x, html=html))
}


df_to_html = function(df, title="", note="", append="",
                      file=NULL,
                      align.head="auto",
                      align.text="auto") {
  if(!is.null(file)) {
    if(file=="NOPRINT") {
      file = NULL
    } else {
      file = str_replace(file, "\\.docx$", ".doc")
      if(str_detect(file, "\\.doc$")==FALSE)
        file = paste0(file, ".doc")
    }
  }

  TITLE = title
  TNOTE = note
  APPEND = append

  if(length(align.head)==1) {
    if(align.head=="auto")
      align.head = c("left", rep("right", times=ncol(df)-1))
    else
      align.head = rep(align.head, times=ncol(df))
  }
  if(length(align.text)==1) {
    if(align.text=="auto")
      align.text = c("left", rep("right", times=ncol(df)-1))
    else
      align.text = rep(align.text, times=ncol(df))
  }

  df = as.data.frame(df)
  for(j in 1:ncol(df)) {
    df[[j]] = "<td align='" %^% align.text[j] %^% "'>" %^%
      str_trim(str_replace_all(df[[j]], "^\\s*-{1}", "\u2013")) %^% "</td>"
  }

  THEAD = "<tr> " %^%
    paste("<th align='" %^%
            align.head %^%
            "'>" %^% names(df) %^% "</th>",
          collapse=" ") %^% " </tr>"

  TBODY = "<tr> " %^%
    paste(apply(df, 1, function(...) paste(..., collapse=" ")),
          collapse=" </tr>\n<tr> ") %^% " </tr>"
  TBODY = TBODY %>%
    str_replace_all(">\\s*NA\\s*<", "><") %>%
    str_replace_all("\\s+</td>", "</td>") %>%
    str_replace_all("\\[\\s+", "[") %>%
    str_replace_all("\\,\\s+", ", ") %>%
    str_replace_all("<\\.001", "< .001")

  TABLE = paste0("
<table>
<thead>
", THEAD, "
</thead>
<tbody>
", TBODY, "
</tbody>
</table>
")

  HTML = paste0("<!DOCTYPE html>
<html>

<head>
<meta charset='utf-8'>
<title></title>
<style>
", ifelse(
  grepl("\\.doc$", file),
  "body, pre {font-size: 10.5pt; font-family: Times New Roman;}",
  ""
), "
p {margin: 0px;}
table {border-collapse: collapse; border-spacing: 0px; color: #000000;
       border-top: 2px solid #000000; border-bottom: 2px solid #000000;}
table thead th {border-bottom: 1px solid #000000;}
table th, table td {padding-left: 5px; padding-right: 5px; height: 19px;}
</style>
</head>

<body>
<p>", TITLE, "</p>", TABLE, "<p>", TNOTE, "</p>", APPEND, "
</body>

</html>")

  if(!is.null(file)) {
    if(file!="NOPRINT") {
      # sink(file)
      # cat(HTML)
      # sink()
      f = file(file, "w", encoding="UTF-8")
      cat(HTML, file=f)
      close(f)
      Print("<<green \u2714>> Table saved to <<bold \"{paste0(getwd(), '/', file)}\">>")
      cat("\n")
    }
  }

  invisible(list(HTML=HTML, TABLE=TABLE))
}


#' Format "1234" to "1,234".
#'
#' @param x A number or numeric vector.
#' @param mark Usually `","`.
#'
#' @return
#' Formatted character string.
#'
#' @seealso
#' [format()]
#'
#' [formatF()]
#'
#' @examples
#' formatN(1234)
#'
#' @export
formatN = function(x, mark=",") {
  format(x, big.mark=mark)
}


#' Format numeric values.
#'
#' @param x A number or numeric vector.
#' @param digits Number of decimal places of output. Defaults to `3`.
#'
#' @return
#' Formatted character string.
#'
#' @examples
#' formatF(pi, 20)
#'
#' @seealso
#' [format()]
#'
#' [formatN()]
#'
#' @export
formatF = function(x, digits=3) {
  # format(x, digits=0, nsmall=digits, scientific=FALSE)
  if(inherits(x, "character")) {
    xf = sprintf(paste0("%-", max(nchar(x), na.rm=TRUE), "s"), x)  # left adjustment
  } else {
    x = sprintf(paste0("%.", digits, "f"), x)
    xf = sprintf(paste0("%", max(nchar(x), na.rm=TRUE), "s"), x)
  }
  return(xf)
}


#' A simple extension of [rgb()].
#'
#' @param r,g,b Red, Green, Blue: 0~255.
#' @param alpha Color transparency (opacity): 0~1.
#' If not specified, an opaque color will be generated.
#'
#' @return
#' `"#rrggbb"` or `"#rrggbbaa"`.
#'
#' @examples
#' RGB(255, 0, 0)  # red: "#FF0000"
#' RGB(255, 0, 0, 0.8)  # red with 80% opacity: "#FF0000CC"
#'
#' @export
RGB = function(r, g, b, alpha) {
  grDevices::rgb(r/255, g/255, b/255, alpha)
}


#' Timer (compute time difference).
#'
#' @param t0 Time at the beginning.
#' @param unit Options: `"auto"`, `"secs"`, `"mins"`, `"hours"`, `"days"`, `"weeks"`. Defaults to `"secs"`.
#' @param digits Number of decimal places of output. Defaults to `0`.
#'
#' @return
#' A character string of time difference.
#'
#' @examples
#' \dontrun{
#'
#' t0 = Sys.time()
#' dtime(t0)
#' }
#'
#' @export
dtime = function(t0, unit="secs", digits=0) {
  dt = difftime(Sys.time(), t0, units=unit)
  format(dt, digits=1, nsmall=digits)
}


#### File I/O ####


#' Set working directory to the path of currently opened file.
#'
#' Set working directory to the path of currently opened file (usually an R script). You may use this function in both **.R/.Rmd files and R Console**.
#' [RStudio](https://posit.co/download/rstudio-desktop/)
#' (version >= 1.2) is required for running this function.
#'
#' @param path `NULL` (default) or a specific path. Defaults to extract the path of the currently opened file (usually .R or .Rmd) using the `rstudioapi::getSourceEditorContext` function.
#' @param ask `TRUE` or `FALSE` (default). If `TRUE`, you can select a folder with the prompt of a dialog.
#'
#' @return
#' Invisibly return the path.
#'
#' @seealso
#' [setwd()]
#'
#' @examples
#' \dontrun{
#'
#'   # RStudio (version >= 1.2) is required for running this function.
#'   set.wd()  # set working directory to the path of the currently opened file
#'   set.wd("~/")  # set working directory to the home path
#'   set.wd("../")  # set working directory to the parent path
#'   set.wd(ask=TRUE)  # select a folder with the prompt of a dialog
#' }
#'
#' @describeIn set.wd
#' Main function
#' @aliases set_wd
#' @export
set.wd = function(path=NULL, ask=FALSE) {
  # if(rstudioapi::isAvailable()==FALSE)
  #   stop("[RStudio] is required for running this function!\n",
  #        "Please download and install the latest version of RStudio:\n",
  #        "https://www.rstudio.com/products/rstudio/download/", call.=TRUE)
  is.windows = ifelse(Sys.info()[["sysname"]]=="Windows", TRUE, FALSE)
  if(is.null(path)) {
    tryCatch({
      if(ask) {
        # RStudio version >= 1.1.287
        if(is.windows)
          path = iconv(rstudioapi::selectDirectory(), from="UTF-8", to="GBK")
        else
          path = rstudioapi::selectDirectory()
      } else {
        # # RStudio version >= 1.4.843
        # if(is.windows)
        #   file.path = iconv(rstudioapi::documentPath(), from="UTF-8", to="GBK")
        # else
        #   file.path = rstudioapi::documentPath()

        # RStudio version >= 0.99.1111
        path = dirname(rstudioapi::getSourceEditorContext()$path)
      }
    }, error = function(e) {
      # Error: Function documentPath not found in RStudio
      message("Your RStudio version is: ", rstudioapi::getVersion(), "\n")
      message("Please update RStudio to the latest version:\n",
              "https://rstudio.com/products/rstudio/download/preview/\n")
    })
  }
  if(length(path)>0) {
    Run("setwd(\"{path}\")")
    Print("<<green \u2714>> Set working directory to <<bold \"{getwd()}\">>")
    # rstudioapi::sendToConsole(paste0("setwd(\"", path, "\")"), execute=TRUE)
  }
  invisible(path)
}


#' @describeIn set.wd
#' The alias of `set.wd` (the same)
#' @export
set_wd = set.wd


file_ext = function(filename) {
  filename = str_trim(filename)
  pos = regexpr("\\.([[:alnum:]]+)$", filename)
  ifelse(pos>-1L, tolower(substring(filename, pos+1L)), "")
}


#' Import data from a file (TXT, CSV, Excel, SPSS, Stata, ...) or clipboard.
#'
#' @description
#' Import data from a file, with format automatically judged from file extension. This function is inspired by [rio::import()] and has several modifications. Its purpose is to avoid using lots of `read_xxx()` functions in your code and to provide one tidy function for data import. It supports many file formats (local or URL) and uses the corresponding R functions:
#'
#' - Plain text (.txt, .csv, .csv2, .tsv, .psv), using [data.table::fread()]
#' - Excel (.xls, .xlsx), using [readxl::read_excel()]
#' - SPSS (.sav), using [haven::read_sav()] or [foreign::read.spss()]
#' - Stata (.dta), using [haven::read_dta()] or [foreign::read.dta()]
#' - R objects (.rda, .rdata, .RData), using [load()]
#' - R serialized objects (.rds), using [readRDS()]
#' - Clipboard (on Windows and Mac OS), using [clipr::read_clip_tbl()]
#' - Other formats, using [rio::import()]
#'
#' @param file File name (with extension).
#' If unspecified, then data will be imported from clipboard.
#' @param encoding File encoding. Defaults to `NULL`.
#'
#' Options: `"UTF-8"`, `"GBK"`, `"CP936"`, etc.
#'
#' If you find messy code for Chinese text in the imported data, it is usually effective to set `encoding="UTF-8"`.
#' @param header Does the first row contain column names (`TRUE` or `FALSE`)? Defaults to `"auto"`.
#' @param sheet \[Only for Excel\] Excel sheet name (or sheet number). Defaults to the first sheet. Ignored if the sheet is specified via `range`.
#' @param range \[Only for Excel\] Excel cell range. Defaults to all cells in a sheet. You may specify it as `range="A1:E100"` or `range="Sheet1!A1:E100"`.
#' @param pkg \[Only for SPSS & Stata\] Use which R package to read
#' SPSS (.sav) or Stata (.dta) data file? Defaults to `"haven"`. You may also use `"foreign"`.
#'
#' Notably, `"haven"` may be preferred because it is more robust to non-English characters and can also keep variable labels (descriptions) from SPSS.
#' @param value.labels \[Only for SPSS & Stata\] Convert variables with value labels into R factors with those levels? Defaults to `FALSE`.
#' @param as Class of the imported data. Defaults to `"data.frame"`. Ignored if the file is an R data object (.rds, .rda, .rdata, .RData).
#'
#' Options:
#' - data.frame: `"data.frame"`, `"df"`, `"DF"`
#' - data.table: `"data.table"`, `"dt"`, `"DT"`
#' - tbl_df: `"tibble"`, `"tbl_df"`, `"tbl"`
#' @param verbose Print data information? Defaults to `FALSE`.
#'
#' @return
#' A data object (default class is `data.frame`).
#'
#' @seealso
#' [export()]
#'
#' @examples
#' \dontrun{
#'
#'   # Import data from system clipboard
#'   data = import()  # read from clipboard (on Windows and Mac OS)
#'
#'   # If you have an Excel file named "mydata.xlsx"
#'   export(airquality, file="mydata.xlsx")
#'
#'   # Import data from a file
#'   data = import("mydata.xlsx")  # default: data.frame
#'   data = import("mydata.xlsx", as="data.table")
#' }
#'
#' @export
import = function(
    file,
    encoding=NULL,
    header="auto",
    sheet=NULL,
    range=NULL,
    pkg=c("haven", "foreign"),
    value.labels=FALSE,
    as="data.frame",
    verbose=FALSE
) {
  ## initialize
  if(missing(file)) {
    file = "clipboard"
    fmt = "clipboard"
  } else {
    if(grepl("^http.*://", file)) {
      # file = rio:::remote_to_local(file=file)
      fmt = "http"
    } else {
      if(file.exists(file)==FALSE)
        stop("No such file. Did you forget adding the path or file extension?", call.=FALSE)
      fmt = file_ext(file)  # file format extracted from file extension
    }
  }

  ## import data
  if(fmt=="") {
    warning("File has no extension.", call.=FALSE)
    data = readLines(file, encoding=encoding)
  } else if(fmt=="clipboard") {
    installed("clipr")
    if(header=="auto") header = TRUE
    x = clipr::read_clip()
    if(is.null(x) | (is.character(x) & length(x)==1 & x[1]==""))
      stop("The system clipboard is empty. You may first copy something.", call.=FALSE)
    else
      data = clipr::read_clip_tbl(x=x, header=header)
  } else if(fmt %in% c("rds")) {
    data = readRDS(file=file)
  } else if(fmt %in% c("rda", "rdata")) {
    envir = new.env()
    load(file=file, envir=envir)
    if(length(ls(envir))>1)
      warning("RData file contains multiple objects. Returning the first object.", call.=FALSE)
    data = get(ls(envir)[1], envir)
  } else if(fmt %in% c("txt", "csv", "csv2", "tsv", "psv")) {
    if(is.null(encoding)) encoding = "unknown"
    data = data.table::fread(input=file,
                             sep="auto",
                             encoding=encoding,
                             header=header)
  } else if(fmt %in% c("xls", "xlsx")) {
    installed("readxl")
    if(header=="auto") header = TRUE
    data = readxl::read_excel(path=file,
                              sheet=sheet,
                              range=range,
                              col_names=header)
  } else if(fmt %in% c("sav")) {
    error = TRUE
    if(pkg[1]=="foreign") {
      # pkg="foreign"
      installed("foreign")
      try({
        data = foreign::read.spss(
          file=file,
          reencode=ifelse(is.null(encoding), NA, encoding),
          to.data.frame=TRUE,
          use.value.labels=value.labels)
        error = FALSE
      }, silent=TRUE)
    }
    if(error | pkg[1]=="haven") {
      # pkg="haven"
      if(pkg[1]=="foreign")
        message("[Retry] Using `haven::read_sav()` to import the data...")
      installed("haven")
      data = haven::read_sav(file=file, encoding=encoding)
      if(value.labels) data = haven::as_factor(data)
    }
  } else if(fmt %in% c("dta")) {
    error = TRUE
    if(pkg[1]=="foreign") {
      # pkg="foreign"
      installed("foreign")
      try({
        data = foreign::read.dta(file=file, convert.factors=value.labels)
        error = FALSE
      }, silent=TRUE)
    }
    if(error | pkg[1]=="haven") {
      # pkg="haven"
      if(pkg[1]=="foreign")
        message("[Retry] Using `haven::read_dta()` to import the data...")
      installed("haven")
      data = haven::read_dta(file=file, encoding=encoding)
      if(value.labels) data = haven::as_factor(data)
    }
  } else {
    data = rio::import(file=file)
  }

  ## report data
  if(verbose) {
    if(is.data.frame(data))
      Print("<<green \u2714>> Successfully imported: {nrow(data)} obs. of {ncol(data)} variables")
    else
      Print("<<green \u2714>> Successfully imported: {length(data)} values of class `{class(data)[1]}`")
  }

  ## return data
  if(is.null(as) | fmt %in% c("rds", "rda", "rdata")) {
    return(data)
  } else if(as %in% c("data.frame", "df", "DF")) {
    return(as.data.frame(data))
  } else if(as %in% c("data.table", "dt", "DT")) {
    return(data.table::as.data.table(data))
  } else if(as %in% c("tibble", "tbl_df", "tbl")) {
    installed("tibble")
    return(tibble::as_tibble(data))
  } else {
    return(data)
  }
}


#' Export data to a file (TXT, CSV, Excel, SPSS, Stata, ...) or clipboard.
#'
#' @description
#' Export data to a file, with format automatically judged from file extension. This function is inspired by [rio::export()] and has several modifications. Its purpose is to avoid using lots of `write_xxx()` functions in your code and to provide one tidy function for data export.
#'
#' It supports many file formats and uses corresponding R functions:
#'
#' - Plain text (.txt, .csv, .csv2, .tsv, .psv), using [data.table::fwrite()]; if the `encoding` argument is specified, using [utils::write.table()] instead
#' - Excel (.xls, .xlsx), using [openxlsx::write.xlsx()]
#' - SPSS (.sav), using [haven::write_sav()]
#' - Stata (.dta), using [haven::write_dta()]
#' - R objects (.rda, .rdata, .RData), using [save()]
#' - R serialized objects (.rds), using [saveRDS()]
#' - Clipboard (on Windows and Mac OS), using [clipr::write_clip()]
#' - Other formats, using [rio::export()]
#'
#' @param x Any R object, usually a data frame (`data.frame`, `data.table`, `tbl_df`). Multiple R objects should be included in a *named* `list` (see examples). To save R objects, specify `file` with extensions `.rda`, `.rdata`, or `.RData`.
#' @param file File name (with extension). If unspecified, data will be exported to clipboard.
#' @param encoding File encoding. Defaults to `NULL`.
#'
#' Options: `"UTF-8"`, `"GBK"`, `"CP936"`, etc.
#'
#' If you find messy code for Chinese text in the exported data (often in CSV when opened with Excel), it is usually useful to set `encoding="GBK"` or `encoding="CP936"`.
#' @param header Does the first row contain column names (`TRUE` or `FALSE`)? Defaults to `"auto"`.
#' @param sheet \[Only for Excel\] Excel sheet name(s). Defaults to "Sheet1", "Sheet2", ... You may specify multiple sheet names in a character vector `c()` with the *same length* as `x` (see examples).
#' @param overwrite Overwrite the existing file (if any)? Defaults to `TRUE`.
#' @param verbose Print output information? Defaults to `FALSE`.
#'
#' @return
#' No return value.
#'
#' @seealso
#' [import()]
#'
#' [print_table()]
#'
#' @examples
#' \dontrun{
#'
#'   export(airquality)  # paste to clipboard
#'   export(airquality, file="mydata.csv")
#'   export(airquality, file="mydata.sav")
#'
#'   export(list(airquality, npk), file="mydata.xlsx")  # Sheet1, Sheet2
#'   export(list(air=airquality, npk=npk), file="mydata.xlsx")  # a named list
#'   export(list(airquality, npk), sheet=c("air", "npk"), file="mydata.xlsx")
#'
#'   export(list(a=1, b=npk, c="character"), file="abc.Rdata")  # .rda, .rdata
#'   d = import("abc.Rdata")  # load only the first object and rename it to `d`
#'   load("abc.Rdata")  # load all objects with original names to environment
#'
#'   export(lm(yield ~ N*P*K, data=npk), file="lm_npk.Rdata")
#'   model = import("lm_npk.Rdata")
#'   load("lm_npk.Rdata")  # because x is unnamed, the object has a name "List1"
#'
#'   export(list(m1=lm(yield ~ N*P*K, data=npk)), file="lm_npk.Rdata")
#'   model = import("lm_npk.Rdata")
#'   load("lm_npk.Rdata")  # because x is named, the object has a name "m1"
#' }
#'
#' @export
export = function(
    x, file,
    encoding=NULL,
    header="auto",
    sheet=NULL,
    overwrite=TRUE,
    verbose=FALSE
) {
  ## initialize
  if(missing(file)) {
    file = "clipboard"
    fmt = "clipboard"
  } else {
    if(file.exists(file)==TRUE) {
      if(overwrite)
        message("Overwrite file \"", file, "\" ...")
      else
        stop("File \"", file, "\" existed!", call.=FALSE)
    }
    fmt = file_ext(file)  # file format extracted from file extension
  }

  ## export data
  if(fmt=="") {
    stop("File has no extension.", call.=FALSE)
  } else if(fmt=="clipboard") {
    installed("clipr")
    if(header=="auto") header = TRUE
    suppressWarnings({
      clipr::write_clip(content=x, sep="\t",
                        row.names=FALSE,
                        col.names=header)
    })
  } else if(fmt %in% c("rds")) {
    saveRDS(object=x, file=file)
  } else if(fmt %in% c("rda", "rdata")) {
    if(is.data.frame(x)) {
      save(x, file=file)
    } else if(is.list(x)) {
      if(inherits(x, "list")==FALSE) x = list(x)
      if(is.null(names(x)))
        names(x) = paste0("List", 1:length(x))
      envir = as.environment(x)
      save(list=names(x), file=file, envir=envir)
    } else if(is.environment(x)) {
      save(list=ls(x), file=file, envir=x)
    } else if(is.character(x)) {
      save(list=x, file=file)
    } else {
      stop("`x` must be a data.frame, list, or environment.", call.=FALSE)
    }
  } else if(fmt %in% c("txt", "csv", "csv2", "tsv", "psv")) {
    sep = switch(fmt,
                 txt="\t",
                 csv=",",
                 csv2=";",
                 tsv="\t",
                 psv="|")
    dec = ifelse(fmt=="csv2", ",", ".")
    if(header=="auto") header = TRUE
    if(is.null(encoding)) {
      data.table::fwrite(x=x, file=file,
                         sep=sep, dec=dec,
                         row.names=FALSE,
                         col.names=header)
    } else {
      utils::write.table(x=x, file=file,
                         sep=sep, dec=dec,
                         row.names=FALSE,
                         col.names=header,
                         quote=FALSE, na="",
                         fileEncoding=encoding)
    }
  } else if(fmt %in% c("xls", "xlsx")) {
    installed("openxlsx")
    if(inherits(x, "list")==FALSE) x = list(x)  # one element
    if(header=="auto") header = TRUE
    if(is.null(sheet)) {
      if(is.null(names(x)))
        names(x) = paste0("Sheet", 1:length(x))
      openxlsx::write.xlsx(x=x, file=file,
                           overwrite=overwrite,
                           rowNames=FALSE,
                           colNames=header)
    } else {
      sheet = as.character(sheet)
      if(length(x)==length(sheet)) {
        n = length(x)
        if(file.exists(file)) {
          wb = openxlsx::loadWorkbook(file=file)
          sheets = openxlsx::getSheetNames(file=file)
          for(i in 1:n) {
            if(sheet[i] %in% sheets)
              openxlsx::removeWorksheet(wb, sheet=sheet[i])
            openxlsx::addWorksheet(wb, sheetName=sheet[i])
            openxlsx::writeData(wb, sheet=sheet[i], x=x[[i]],
                                rowNames=FALSE,
                                colNames=header)
          }
          openxlsx::saveWorkbook(wb, file=file, overwrite=TRUE)
        } else {
          names(x) = sheet
          openxlsx::write.xlsx(x=x, file=file,
                               overwrite=overwrite,
                               rowNames=FALSE,
                               colNames=header)
        }
      } else {
        stop("Length of sheet should be equal to length of x!", call.=FALSE)
      }
    }
  } else if(fmt %in% c("sav")) {
    installed("haven")
    x = restore_labelled(x)
    haven::write_sav(data=x, path=file)
  } else if(fmt %in% c("dta")) {
    installed("haven")
    x = restore_labelled(x)
    haven::write_dta(data=x, path=file)
  } else {
    rio::export(x=x, file=file)
  }

  ## report status
  if(verbose) {
    if(fmt=="clipboard")
      Print("<<green \u2714>> Successfully paste to clipboard")
    else
      Print("<<green \u2714>> Successfully saved to <<bold \"{paste0(getwd(), '/', file)}\">>")
  }
}


restore_labelled = function(x) {
  # restore labelled variable classes
  x[] = lapply(x, function(v) {
    if(is.factor(v)) {
      haven::labelled(
        x = as.numeric(v),
        labels = stats::setNames(seq_along(levels(v)), levels(v)),
        label = attr(v, "label", exact=TRUE))
    } else if(!is.null(attr(v, "labels", exact=TRUE)) | !is.null(attr(v, "label", exact=TRUE))) {
      haven::labelled(
        x = v,
        labels = attr(v, "labels", exact=TRUE),
        label = attr(v, "label", exact=TRUE))
    } else {
      v
    }
  })
  x
}


#### Excel-Style Functions ####


#' Search, match, and look up values (like Excel's functions `INDEX + MATCH`).
#'
#' In Excel, we can use `VLOOKUP`, `HLOOKUP`, `XLOOKUP` (a new function released in 2019), or the combination of `INDEX` and `MATCH` to search, match, and look up values. Here I provide a similar function. If multiple values were simultaneously matched, a warning message would be printed.
#'
#' @param data Main data.
#' @param vars Character (vector), specifying the variable(s) to be searched in `data`.
#' @param data.ref Reference data containing both the reference variable(s) and the lookup variable(s).
#' @param vars.ref Character (vector), with the **same length and order** as `vars`, specifying the reference variable(s) to be matched in `data.ref`.
#' @param vars.lookup Character (vector), specifying the variable(s) to be looked up and returned from `data.ref`.
#' @param return What to return. Default (`"new.data"`) is to return a data frame with the lookup values added. You may also set it to `"new.var"` or `"new.value"`.
#'
#' @return
#' New data object, new variable, or new value (see the argument `return`).
#'
#' @seealso
#' [dplyr::left_join()]
#'
#' @examples
#' ref = data.table(City=rep(c("A", "B", "C"), each=5),
#'                  Year=rep(2013:2017, times=3),
#'                  GDP=sample(1000:2000, 15),
#'                  PM2.5=sample(10:300, 15))
#' ref
#'
#' data = data.table(sub=1:5,
#'                   city=c("A", "A", "B", "C", "C"),
#'                   year=c(2013, 2014, 2015, 2016, 2017))
#' data
#'
#' LOOKUP(data, c("city", "year"), ref, c("City", "Year"), "GDP")
#' LOOKUP(data, c("city", "year"), ref, c("City", "Year"), c("GDP", "PM2.5"))
#'
#' @export
LOOKUP = function(
    data, vars,
    data.ref, vars.ref,
    vars.lookup,
    return=c("new.data", "new.var", "new.value")
) {
  by = vars.ref
  names(by) = vars
  data.ref = as.data.frame(data.ref)
  data.new = left_join(data,
                       data.ref[c(vars.ref, vars.lookup)],
                       by=by)
  if(nrow(data.new) > nrow(data))
    warning("More than one values are matched!", call.=TRUE)
  if(length(return)==3) return = "new.data"
  if(return=="new.value" & length(vars.lookup)>=2) return = "new.var"
  if(return=="new.data") {
    return(data.new)
  } else if(return=="new.var") {
    return(data.new[vars.lookup])
  } else if(return=="new.value") {
    return(data.new[[vars.lookup]])
  }
}

Try the bruceR package in your browser

Any scripts or data that you put into this service are public.

bruceR documentation built on Aug. 21, 2025, 5:38 p.m.