Nothing
#### Pipeline Functions ####
#' Paste strings together.
#'
#' Paste strings together. A wrapper of [`paste0()`][base::paste0].
#' Why `%^%`? Because typing `%` and `^` is pretty easy by
#' pressing **Shift + 5 + 6 + 5**.
#'
#' @param x,y Any objects, usually a numeric or character string or vector.
#'
#' @return
#' A character string/vector of the pasted values.
#'
#' @examples
#' "He" %^% "llo"
#' "X" %^% 1:10
#' "Q" %^% 1:5 %^% letters[1:5]
#'
#' @export
`%^%` = function(x, y) {
paste0(x, y)
}
## The opposite of `%in%`.
##
## @param x Numeric or character vector.
## @param vector Numeric or character vector.
##
## @return
## A vector of `TRUE` or `FALSE`.
##
## @seealso
## [`%in%`][base::match]
##
## @examples
## data = data.table(ID=1:10, X=sample(1:10, 10))
## data
## data[ID %notin% c(1, 3, 5, 7, 9)]
##
## @export
# `%notin%` = function(x, vector) {
# match(x, vector, nomatch=0L) == 0L
# }
#' @importFrom data.table %notin%
#' @export
data.table::`%notin%`
#' A simple extension of `%in%`.
#'
#' @param x Numeric or character vector.
#' @param vector Numeric or character vector.
#'
#' @return
#' `TRUE` or `FALSE`.
#'
#' @seealso
#' [`%in%`][base::match]
#'
#' [`%anyin%`]
#'
#' [`%nonein%`]
#'
#' [`%partin%`]
#'
#' @examples
#' 1:2 %allin% 1:3 # TRUE
#' 3:4 %allin% 1:3 # FALSE
#'
#' @export
`%allin%` = function(x, vector) {
all(x %in% vector)
}
#' A simple extension of `%in%`.
#'
#' @inheritParams %allin%
#'
#' @return
#' `TRUE` or `FALSE`.
#'
#' @seealso
#' [`%in%`][base::match]
#'
#' [`%allin%`]
#'
#' [`%nonein%`]
#'
#' [`%partin%`]
#'
#' @examples
#' 3:4 %anyin% 1:3 # TRUE
#' 4:5 %anyin% 1:3 # FALSE
#'
#' @export
`%anyin%` = function(x, vector) {
any(x %in% vector)
}
#' A simple extension of `%in%`.
#'
#' @inheritParams %allin%
#'
#' @return
#' `TRUE` or `FALSE`.
#'
#' @seealso
#' [`%in%`][base::match]
#'
#' [`%allin%`]
#'
#' [`%anyin%`]
#'
#' [`%partin%`]
#'
#' @examples
#' 3:4 %nonein% 1:3 # FALSE
#' 4:5 %nonein% 1:3 # TRUE
#'
#' @export
`%nonein%` = function(x, vector) {
!any(x %in% vector)
}
#' A simple extension of `%in%`.
#'
#' @param pattern Character string containing **regular expressions** to be matched.
#' @param vector Character vector.
#'
#' @return
#' `TRUE` or `FALSE`.
#'
#' @seealso
#' [`%in%`][base::match]
#'
#' [`%allin%`]
#'
#' [`%anyin%`]
#'
#' [`%nonein%`]
#'
#' @examples
#' "Bei" %partin% c("Beijing", "Shanghai") # TRUE
#' "bei" %partin% c("Beijing", "Shanghai") # FALSE
#' "[aeiou]ng" %partin% c("Beijing", "Shanghai") # TRUE
#'
#' @export
`%partin%` = function(pattern, vector) {
any(grepl(pattern, vector, perl=TRUE))
}
#### Basic Functions ####
installed = function(pkg) {
if(pkg %notin% utils::installed.packages())
utils::install.packages(pkg)
}
#' Check dependencies of R packages.
#'
#' @param pkgs Package(s).
#' @param excludes \[Optional\] Package(s) and their dependencies excluded from the dependencies of `pkgs`.
#' Useful if you want to see the unique dependencies of `pkgs`.
#'
#' @return
#' A character vector of package names.
#'
#' @export
pkg_depend = function(pkgs, excludes=NULL) {
default.pkgs = c("base", "boot", "class",
"cluster", "codetools", "compiler",
"datasets", "foreign", "graphics",
"grDevices", "grid", "KernSmooth",
"lattice", "MASS", "Matrix",
"methods", "mgcv", "nlme",
"nnet", "parallel", "rpart",
"spatial", "splines", "stats",
"stats4", "survival", "tcltk",
"tools", "translations", "utils")
exclude.pkgs = default.pkgs
for(ex in excludes)
exclude.pkgs = union(exclude.pkgs, unlist(tools::package_dependencies(ex, recursive=TRUE)))
for(pkg in pkgs)
pkgs = union(pkgs, unlist(tools::package_dependencies(pkg, recursive=TRUE)))
return(sort(setdiff(pkgs, exclude.pkgs)))
}
pkg_install_suggested = function(by) {
if(missing(by)) {
pkgs.suggests = "
rstudioapi, devtools, pacman,
tidyverse, ggstatsplot, jmv,
dplyr, tidyr, stringr, forcats, data.table,
rio, haven, foreign, readxl, openxlsx, clipr,
tibble, plyr, glue, crayon,
emmeans, effectsize, performance,
pwr, simr, MASS, sampling, careless,
irr, correlation, corpcor, corrplot,
afex, car, psych, lmtest, nnet,
lme4, lmerTest, multilevel, r2mlm, MuMIn,
metafor, meta, metaSEM, metapower,
mediation, interactions, JSmediation,
lavaan, lavaanPlot, semPlot, processR,
jtools, reghelper, summarytools, texreg,
sjstats, sjPlot, apaTables,
forecast, vars, pls, plm, AER,
TOSTER, BEST, BayesFactor, brms,
mlr, caret, party, randomForest, e1071, varImp,
downloader, rvest, RCurl, RSelenium, mailR, jiebaR,
ggplot2, ggtext, cowplot, see,
ggrepel, ggeffects, ggsignif, ggridges, ggthemes,
ggbreak, ggplotify, ggExtra, GGally, wordcloud2,
patchwork, showtext"
cat("\n")
Print(pkgs.suggests)
cat("\n")
yesno = utils::menu(title="All these R packages will be installed. Do it now?",
choices=c("Yes", "No"))
if(yesno==1)
pkgs.suggests = cc(pkgs.suggests)
else
return(invisible())
} else {
pkgs.suggests = pacman::p_depends(by, character.only=TRUE, local=TRUE)$Suggests
}
pkgs.installed = pacman::p_library()
pkgs.need.install = base::setdiff(pkgs.suggests, pkgs.installed)
if(length(pkgs.need.install)>0) {
utils::install.packages(pkgs.need.install)
} else {
if(missing(by))
Print("<<green Done!>>")
else
Print("<<green All packages suggested by `{by}` have been installed!>>")
}
}
#' Print strings with rich formats and colors.
#'
#' @describeIn Print
#' Paste and print strings.
#'
#' @description
#' Frustrated with [print()] and [cat()]? Try this!
#' Run examples to see what it can do.
#'
#' @details
#' Possible formats/colors that can be used in `"<< >>"` include:
#'
#' (1) bold, italic, underline, reset, blurred, inverse, hidden, strikethrough;
#'
#' (2) black, white, silver, red, green, blue, yellow, cyan, magenta;
#'
#' (3) bgBlack, bgWhite, bgRed, bgGreen, bgBlue, bgYellow, bgCyan, bgMagenta.
#'
#' See more details in [glue::glue()] and [glue::glue_col()].
#'
#' @param ... Character strings enclosed by `"{ }"` will be evaluated as R code.
#'
#' Character strings enclosed by `"<< >>"` will be printed as formatted and colored text.
#'
#' Long strings are broken by line and concatenated together.
#'
#' Leading whitespace and blank lines from the first and last lines are automatically trimmed.
#'
#' @return
#' Formatted text.
#'
#' @examples
#' name = "Bruce"
#' Print("My name is <<underline <<bold {name}>>>>.
#' <<bold <<blue Pi = {pi:.15}.>>>>
#' <<italic <<green 1 + 1 = {1 + 1}.>>>>
#' sqrt({x}) = <<red {sqrt(x):.3}>>", x=10)
#'
#' @export
Print = function(...) {
tryCatch({
output = glue(..., .transformer=sprintf_transformer, .envir=parent.frame())
output_color = glue_col( gsub("<<", "{", gsub(">>", "}", output)) )
print(output_color)
}, error = function(e) {
warning(e)
print(...)
})
}
#' @describeIn Print
#' Paste strings.
#' @export
Glue = function(...) {
output = glue(..., .transformer=sprintf_transformer, .envir=parent.frame())
output_color = glue_col( gsub("<<", "{", gsub(">>", "}", output)) )
return(output_color)
}
glue = glue::glue
glue_col = glue::glue_col
bold = crayon::bold
italic = crayon::italic
underline = crayon::underline
reset = crayon::reset
blurred = crayon::blurred
inverse = crayon::inverse
hidden = crayon::hidden
strikethrough = crayon::strikethrough
black = crayon::black
white = crayon::white
silver = crayon::silver
red = crayon::red
green = crayon::green
blue = crayon::blue
yellow = crayon::yellow
cyan = crayon::cyan
magenta = crayon::magenta
sprintf_transformer = function(text, envir) {
text = glue(text, .envir=envir)
m = regexpr(":.+$", text)
if(m!=-1) {
format = substring(regmatches(text, m), 2)
regmatches(text, m) = ""
res = eval(parse(text=text, keep.source=FALSE), envir)
do.call(sprintf, list(glue("%{format}f"), res))
} else {
eval(parse(text=text, keep.source=FALSE), envir)
}
}
#' Run code parsed from text.
#'
#' @param ... Character string(s) to run. You can use `"{ }"` to insert any R object in the environment.
#' @param silent Suppress error/warning messages. Defaults to `FALSE`.
#'
#' @return
#' Invisibly return the running expression(s).
#'
#' @examples
#' Run("a=1", "b=2")
#' Run("print({a+b})")
#'
#' @export
Run = function(..., silent=FALSE) {
text = glue(..., .sep="\n", .envir=parent.frame())
if(silent) {
suppressWarnings({
eval(parse(text=text), envir=parent.frame())
})
} else {
eval(parse(text=text), envir=parent.frame())
}
invisible(text)
}
#' Split up a string (with separators) into a character vector.
#'
#' Split up a string (with separators) into a character vector
#' (whitespace around separator is trimmed).
#'
#' @param ... Character string(s).
#' @param sep Pattern for separation.
#' Defaults to `"auto"`:
#' `,` `;` `|` `\n` `\t`
#' @param trim Remove whitespace from start and end of string(s)?
#' Defaults to `TRUE`.
#'
#' @return
#' Character vector.
#'
#' @examples
#' cc("a,b,c,d,e")
#'
#' cc(" a , b , c , d , e ")
#'
#' cc(" a , b , c , d , e ", trim=FALSE)
#'
#' cc("1, 2, 3, 4, 5")
#'
#' cc("A 1 , B 2 ; C 3 | D 4 \t E 5")
#'
#' cc("A, B, C",
#' " D | E ",
#' c("F", "G"))
#'
#' cc("
#' American
#' British
#' Chinese
#' ")
#'
#' @export
cc = function(..., sep="auto", trim=TRUE) {
dots = list(...)
x = paste(sapply(dots, function(i) paste(i, collapse=",")), collapse=",")
x = ifelse(trim, str_trim(x), x)
sep = ifelse(sep=="auto",
ifelse(trim, "\\s*[,;\\|\\n\\t]\\s*", "[,;\\|\\n\\t]"),
sep)
as.character(str_split(x, sep, simplify=TRUE))
}
cc_ci = function(llci, ulci, digits) {
paste0("[",
formatF(llci, digits), ", ",
formatF(ulci, digits), "]")
}
cc_m_ci = function(mean, llci, ulci, digits) {
paste0(formatF(mean, digits), " [",
formatF(llci, digits), ", ",
formatF(ulci, digits), "]")
}
#' Repeat a character string for many times and paste them up.
#'
#' @param char Character string.
#' @param rep.times Times for repeat.
#'
#' @return
#' Character string.
#'
#' @examples
#' rep_char("a", 5)
#'
#' @export
rep_char = function(char, rep.times) {
paste(rep(char, times=rep.times), collapse="")
}
## Capitalize the first letter of a string.
capitalize = function(string) {
capped = grep("^[A-Z]", string, invert=TRUE)
substr(string[capped], 1, 1) = toupper(substr(string[capped], 1, 1))
return(string)
}
## forcats::as_factor()
as_factor = function(x) {
factor(x, levels=unique(x))
}
## forcats::fct_rev()
fct_rev = function(f) {
factor(f, levels=rev(levels(f)))
}
#' Print a three-line table (to R Console and Microsoft Word).
#'
#' This basic function prints any data frame as a three-line table to either R Console or Microsoft Word (.doc). It has been used in many other functions of `bruceR` (see below).
#'
#' @param x Matrix, data.frame (or data.table), or any model object (e.g., `lm`, `glm`, `lmer`, `glmer`, ...).
#' @param digits Numeric vector specifying the number of decimal places of output. Defaults to `3`.
#' @param nspaces Number of whitespaces between columns. Defaults to `1`.
#' @param row.names,col.names Print row/column names. Defaults to `TRUE` (column names are always printed). To modify the names, you can use a character vector with the same length as the raw names.
#' @param title Title text, which will be inserted in `<p></p>` (HTML code).
#' @param note Note text, which will be inserted in `<p></p>` (HTML code).
#' @param append Other contents, which will be appended in the end (HTML code).
#' @param line Lines looks like true line (`TRUE`) or `=== --- ===` (`FALSE`).
#' @param file File name of MS Word (`.doc`).
#' @param file.align.head,file.align.text Alignment of table head or table text: `"left"`, `"right"`, `"center"`. Either one value of them OR a character vector of mixed values with the same length as the table columns. Default alignment (if set as `"auto"`): left, right, right, ..., right.
#'
#' @return
#' Invisibly return a list of data frame and HTML code.
#'
#' @examples
#' print_table(data.frame(x=1))
#'
#' print_table(airquality, file="airquality.doc")
#' unlink("airquality.doc") # delete file for code check
#'
#' model = lm(Temp ~ Month + Day + Wind + Solar.R, data=airquality)
#' print_table(model)
#' print_table(model, file="model.doc")
#' unlink("model.doc") # delete file for code check
#'
#' @export
print_table = function(
x, digits=3,
nspaces=1,
row.names=TRUE,
col.names=TRUE,
title="", note="", append="",
line=TRUE,
file=NULL,
file.align.head="auto",
file.align.text="auto"
) {
## Preprocess data.frame ##
if(!inherits(x, c("matrix", "data.frame", "data.table"))) {
coef.table = coef(summary(x))
if(!is.null(coef.table)) x = coef.table
}
x = as.data.frame(x)
sig = NULL
if(length(digits)==1) digits = rep(digits, length(x))
for(j in 1:length(x)) {
if(inherits(x[,j], "factor"))
x[,j] = as.character(x[,j])
if(grepl("Pr\\(|pval|p.value|<i>p</i>", names(x)[j])) {
sig = formatF(sig.trans(x[,j]), 0) # formatF will make * left-aligned
if(grepl("<i>p</i>", names(x)[j])==FALSE)
names(x)[j] = "p"
x[,j] = p.trans(x[,j])
} else {
x[,j] = formatF(x[,j], digits[j])
}
if(grepl("^S\\.E\\.$|^Std\\. Error$|^se$|^SE$|^BootSE$", names(x)[j])) {
x[,j] = paste0("(", x[,j], ")") # add ( ) to S.E.
x[grepl("\\d", x[,j])==FALSE, j] = "" # remove ( ) from blank S.E.
if(grepl("S\\.E\\.", names(x)[j])==FALSE) names(x)[j] = "S.E."
}
if(grepl("^S\\.D\\.$|^Std\\. Deviation$", names(x)[j])) {
x[,j] = paste0("(", x[,j], ")") # add ( ) to S.D.
x[grepl("\\d", x[,j])==FALSE, j] = "" # remove ( ) from blank S.E.
if(grepl("S\\.D\\.", names(x)[j])==FALSE) names(x)[j] = "S.D."
}
# if(grepl("\\[", names(x)[j])) x[,j] = paste0("[", x[,j], ",")
# if(grepl("\\]", names(x)[j])) x[,j] = paste0(x[,j], "]")
# if(grepl("^[Ee]stimate$", names(x)[j])) names(x)[j] = "Coef."
names(x)[j] = gsub(" value$|val$", "", names(x)[j])
}
if(is.null(sig)==FALSE & "sig" %notin% names(x)) {
p.pos = which(names(x) %in% c("p", "<i>p</i>"))
nvars = length(names(x))
if(p.pos<nvars)
x = cbind(x[1:p.pos], ` ` = sig, x[(p.pos+1):nvars])
else
x = cbind(x, ` ` = sig)
x$` ` = as.character(x$` `)
}
if(inherits(row.names, "character")) {
row.names(x) = row.names
row.names = TRUE
}
if(inherits(col.names, "character")) {
names(x) = col.names
col.names = TRUE
}
## Compute length to generate line-chars ##
linechar = ifelse(line, "\u2500", "-")
title.length = nchar(names(x), type="width")
vars.length = c() # bug: vars.length = apply(apply(x, 2, nchar), 2, max)
for(j in 1:length(x)) vars.length[j] = max(nchar(x[,j], type="width"))
n.lines = apply(rbind(title.length, vars.length), 2, max)+nspaces
n.lines.rn = max(nchar(row.names(x), type="width"))+nspaces
if(row.names)
table.line = rep_char(linechar, sum(n.lines)+n.lines.rn)
else
table.line = rep_char(linechar, sum(n.lines))
## Output ##
if(is.null(file)) {
if(title!="") Print(title)
Print(table.line)
if(row.names)
cat(rep_char(" ", n.lines.rn))
for(j in 1:length(x)) {
name.j = names(x)[j]
cat(rep_char(" ", n.lines[j]-nchar(name.j, type="width")) %^% name.j)
}
cat("\n")
Print(table.line)
for(i in 1:nrow(x)) {
if(row.names) {
row.name.i = row.names(x)[i]
cat(row.name.i %^% rep_char(" ", n.lines.rn-nchar(row.name.i, type="width")))
}
for(j in 1:length(x)) {
# cat(sprintf(glue("% {n.lines[j]}s"), ifelse(is.na(xr[i,j]) | grepl("NA$", xr[i,j]), "", xr[i,j])))
x.ij = ifelse(is.na(x[i,j]) | grepl("NA$", x[i,j]), "", x[i,j])
cat(rep_char(" ", n.lines[j]-nchar(x.ij, type="width")) %^% x.ij)
}
cat("\n")
}
Print(table.line)
if(note!="") Print(note)
}
if(row.names) {
x = cbind(rn=row.names(x), x)
names(x)[1] = ""
}
if(!is.null(file)) {
html = df_to_html(x, title=title, note=note, append=append,
file=file,
align.head=file.align.head,
align.text=file.align.text)
} else {
html = NULL
}
invisible(list(df=x, html=html))
}
df_to_html = function(df, title="", note="", append="",
file=NULL,
align.head="auto",
align.text="auto") {
if(!is.null(file)) {
if(file=="NOPRINT") {
file = NULL
} else {
file = str_replace(file, "\\.docx$", ".doc")
if(str_detect(file, "\\.doc$")==FALSE)
file = paste0(file, ".doc")
}
}
TITLE = title
TNOTE = note
APPEND = append
if(length(align.head)==1) {
if(align.head=="auto")
align.head = c("left", rep("right", times=ncol(df)-1))
else
align.head = rep(align.head, times=ncol(df))
}
if(length(align.text)==1) {
if(align.text=="auto")
align.text = c("left", rep("right", times=ncol(df)-1))
else
align.text = rep(align.text, times=ncol(df))
}
df = as.data.frame(df)
for(j in 1:ncol(df)) {
df[[j]] = "<td align='" %^% align.text[j] %^% "'>" %^%
str_trim(str_replace_all(df[[j]], "^\\s*-{1}", "\u2013")) %^% "</td>"
}
THEAD = "<tr> " %^%
paste("<th align='" %^%
align.head %^%
"'>" %^% names(df) %^% "</th>",
collapse=" ") %^% " </tr>"
TBODY = "<tr> " %^%
paste(apply(df, 1, function(...) paste(..., collapse=" ")),
collapse=" </tr>\n<tr> ") %^% " </tr>"
TBODY = TBODY %>%
str_replace_all(">\\s*NA\\s*<", "><") %>%
str_replace_all("\\s+</td>", "</td>") %>%
str_replace_all("\\[\\s+", "[") %>%
str_replace_all("\\,\\s+", ", ") %>%
str_replace_all("<\\.001", "< .001")
TABLE = paste0("
<table>
<thead>
", THEAD, "
</thead>
<tbody>
", TBODY, "
</tbody>
</table>
")
HTML = paste0("<!DOCTYPE html>
<html>
<head>
<meta charset='utf-8'>
<title></title>
<style>
", ifelse(
grepl("\\.doc$", file),
"body, pre {font-size: 10.5pt; font-family: Times New Roman;}",
""
), "
p {margin: 0px;}
table {border-collapse: collapse; border-spacing: 0px; color: #000000;
border-top: 2px solid #000000; border-bottom: 2px solid #000000;}
table thead th {border-bottom: 1px solid #000000;}
table th, table td {padding-left: 5px; padding-right: 5px; height: 19px;}
</style>
</head>
<body>
<p>", TITLE, "</p>", TABLE, "<p>", TNOTE, "</p>", APPEND, "
</body>
</html>")
if(!is.null(file)) {
if(file!="NOPRINT") {
# sink(file)
# cat(HTML)
# sink()
f = file(file, "w", encoding="UTF-8")
cat(HTML, file=f)
close(f)
Print("<<green \u2714>> Table saved to <<bold \"{paste0(getwd(), '/', file)}\">>")
cat("\n")
}
}
invisible(list(HTML=HTML, TABLE=TABLE))
}
#' Format "1234" to "1,234".
#'
#' @param x A number or numeric vector.
#' @param mark Usually `","`.
#'
#' @return
#' Formatted character string.
#'
#' @seealso
#' [format()]
#'
#' [formatF()]
#'
#' @examples
#' formatN(1234)
#'
#' @export
formatN = function(x, mark=",") {
format(x, big.mark=mark)
}
#' Format numeric values.
#'
#' @param x A number or numeric vector.
#' @param digits Number of decimal places of output. Defaults to `3`.
#'
#' @return
#' Formatted character string.
#'
#' @examples
#' formatF(pi, 20)
#'
#' @seealso
#' [format()]
#'
#' [formatN()]
#'
#' @export
formatF = function(x, digits=3) {
# format(x, digits=0, nsmall=digits, scientific=FALSE)
if(inherits(x, "character")) {
xf = sprintf(paste0("%-", max(nchar(x), na.rm=TRUE), "s"), x) # left adjustment
} else {
x = sprintf(paste0("%.", digits, "f"), x)
xf = sprintf(paste0("%", max(nchar(x), na.rm=TRUE), "s"), x)
}
return(xf)
}
#' A simple extension of [rgb()].
#'
#' @param r,g,b Red, Green, Blue: 0~255.
#' @param alpha Color transparency (opacity): 0~1.
#' If not specified, an opaque color will be generated.
#'
#' @return
#' `"#rrggbb"` or `"#rrggbbaa"`.
#'
#' @examples
#' RGB(255, 0, 0) # red: "#FF0000"
#' RGB(255, 0, 0, 0.8) # red with 80% opacity: "#FF0000CC"
#'
#' @export
RGB = function(r, g, b, alpha) {
grDevices::rgb(r/255, g/255, b/255, alpha)
}
#' Timer (compute time difference).
#'
#' @param t0 Time at the beginning.
#' @param unit Options: `"auto"`, `"secs"`, `"mins"`, `"hours"`, `"days"`, `"weeks"`. Defaults to `"secs"`.
#' @param digits Number of decimal places of output. Defaults to `0`.
#'
#' @return
#' A character string of time difference.
#'
#' @examples
#' \dontrun{
#'
#' t0 = Sys.time()
#' dtime(t0)
#' }
#'
#' @export
dtime = function(t0, unit="secs", digits=0) {
dt = difftime(Sys.time(), t0, units=unit)
format(dt, digits=1, nsmall=digits)
}
#### File I/O ####
#' Set working directory to the path of currently opened file.
#'
#' Set working directory to the path of currently opened file (usually an R script). You may use this function in both **.R/.Rmd files and R Console**.
#' [RStudio](https://posit.co/download/rstudio-desktop/)
#' (version >= 1.2) is required for running this function.
#'
#' @param path `NULL` (default) or a specific path. Defaults to extract the path of the currently opened file (usually .R or .Rmd) using the `rstudioapi::getSourceEditorContext` function.
#' @param ask `TRUE` or `FALSE` (default). If `TRUE`, you can select a folder with the prompt of a dialog.
#'
#' @return
#' Invisibly return the path.
#'
#' @seealso
#' [setwd()]
#'
#' @examples
#' \dontrun{
#'
#' # RStudio (version >= 1.2) is required for running this function.
#' set.wd() # set working directory to the path of the currently opened file
#' set.wd("~/") # set working directory to the home path
#' set.wd("../") # set working directory to the parent path
#' set.wd(ask=TRUE) # select a folder with the prompt of a dialog
#' }
#'
#' @describeIn set.wd
#' Main function
#' @aliases set_wd
#' @export
set.wd = function(path=NULL, ask=FALSE) {
# if(rstudioapi::isAvailable()==FALSE)
# stop("[RStudio] is required for running this function!\n",
# "Please download and install the latest version of RStudio:\n",
# "https://www.rstudio.com/products/rstudio/download/", call.=TRUE)
is.windows = ifelse(Sys.info()[["sysname"]]=="Windows", TRUE, FALSE)
if(is.null(path)) {
tryCatch({
if(ask) {
# RStudio version >= 1.1.287
if(is.windows)
path = iconv(rstudioapi::selectDirectory(), from="UTF-8", to="GBK")
else
path = rstudioapi::selectDirectory()
} else {
# # RStudio version >= 1.4.843
# if(is.windows)
# file.path = iconv(rstudioapi::documentPath(), from="UTF-8", to="GBK")
# else
# file.path = rstudioapi::documentPath()
# RStudio version >= 0.99.1111
path = dirname(rstudioapi::getSourceEditorContext()$path)
}
}, error = function(e) {
# Error: Function documentPath not found in RStudio
message("Your RStudio version is: ", rstudioapi::getVersion(), "\n")
message("Please update RStudio to the latest version:\n",
"https://rstudio.com/products/rstudio/download/preview/\n")
})
}
if(length(path)>0) {
Run("setwd(\"{path}\")")
Print("<<green \u2714>> Set working directory to <<bold \"{getwd()}\">>")
# rstudioapi::sendToConsole(paste0("setwd(\"", path, "\")"), execute=TRUE)
}
invisible(path)
}
#' @describeIn set.wd
#' The alias of `set.wd` (the same)
#' @export
set_wd = set.wd
file_ext = function(filename) {
filename = str_trim(filename)
pos = regexpr("\\.([[:alnum:]]+)$", filename)
ifelse(pos>-1L, tolower(substring(filename, pos+1L)), "")
}
#' Import data from a file (TXT, CSV, Excel, SPSS, Stata, ...) or clipboard.
#'
#' @description
#' Import data from a file, with format automatically judged from file extension. This function is inspired by [rio::import()] and has several modifications. Its purpose is to avoid using lots of `read_xxx()` functions in your code and to provide one tidy function for data import. It supports many file formats (local or URL) and uses the corresponding R functions:
#'
#' - Plain text (.txt, .csv, .csv2, .tsv, .psv), using [data.table::fread()]
#' - Excel (.xls, .xlsx), using [readxl::read_excel()]
#' - SPSS (.sav), using [haven::read_sav()] or [foreign::read.spss()]
#' - Stata (.dta), using [haven::read_dta()] or [foreign::read.dta()]
#' - R objects (.rda, .rdata, .RData), using [load()]
#' - R serialized objects (.rds), using [readRDS()]
#' - Clipboard (on Windows and Mac OS), using [clipr::read_clip_tbl()]
#' - Other formats, using [rio::import()]
#'
#' @param file File name (with extension).
#' If unspecified, then data will be imported from clipboard.
#' @param encoding File encoding. Defaults to `NULL`.
#'
#' Options: `"UTF-8"`, `"GBK"`, `"CP936"`, etc.
#'
#' If you find messy code for Chinese text in the imported data, it is usually effective to set `encoding="UTF-8"`.
#' @param header Does the first row contain column names (`TRUE` or `FALSE`)? Defaults to `"auto"`.
#' @param sheet \[Only for Excel\] Excel sheet name (or sheet number). Defaults to the first sheet. Ignored if the sheet is specified via `range`.
#' @param range \[Only for Excel\] Excel cell range. Defaults to all cells in a sheet. You may specify it as `range="A1:E100"` or `range="Sheet1!A1:E100"`.
#' @param pkg \[Only for SPSS & Stata\] Use which R package to read
#' SPSS (.sav) or Stata (.dta) data file? Defaults to `"haven"`. You may also use `"foreign"`.
#'
#' Notably, `"haven"` may be preferred because it is more robust to non-English characters and can also keep variable labels (descriptions) from SPSS.
#' @param value.labels \[Only for SPSS & Stata\] Convert variables with value labels into R factors with those levels? Defaults to `FALSE`.
#' @param as Class of the imported data. Defaults to `"data.frame"`. Ignored if the file is an R data object (.rds, .rda, .rdata, .RData).
#'
#' Options:
#' - data.frame: `"data.frame"`, `"df"`, `"DF"`
#' - data.table: `"data.table"`, `"dt"`, `"DT"`
#' - tbl_df: `"tibble"`, `"tbl_df"`, `"tbl"`
#' @param verbose Print data information? Defaults to `FALSE`.
#'
#' @return
#' A data object (default class is `data.frame`).
#'
#' @seealso
#' [export()]
#'
#' @examples
#' \dontrun{
#'
#' # Import data from system clipboard
#' data = import() # read from clipboard (on Windows and Mac OS)
#'
#' # If you have an Excel file named "mydata.xlsx"
#' export(airquality, file="mydata.xlsx")
#'
#' # Import data from a file
#' data = import("mydata.xlsx") # default: data.frame
#' data = import("mydata.xlsx", as="data.table")
#' }
#'
#' @export
import = function(
file,
encoding=NULL,
header="auto",
sheet=NULL,
range=NULL,
pkg=c("haven", "foreign"),
value.labels=FALSE,
as="data.frame",
verbose=FALSE
) {
## initialize
if(missing(file)) {
file = "clipboard"
fmt = "clipboard"
} else {
if(grepl("^http.*://", file)) {
# file = rio:::remote_to_local(file=file)
fmt = "http"
} else {
if(file.exists(file)==FALSE)
stop("No such file. Did you forget adding the path or file extension?", call.=FALSE)
fmt = file_ext(file) # file format extracted from file extension
}
}
## import data
if(fmt=="") {
warning("File has no extension.", call.=FALSE)
data = readLines(file, encoding=encoding)
} else if(fmt=="clipboard") {
installed("clipr")
if(header=="auto") header = TRUE
x = clipr::read_clip()
if(is.null(x) | (is.character(x) & length(x)==1 & x[1]==""))
stop("The system clipboard is empty. You may first copy something.", call.=FALSE)
else
data = clipr::read_clip_tbl(x=x, header=header)
} else if(fmt %in% c("rds")) {
data = readRDS(file=file)
} else if(fmt %in% c("rda", "rdata")) {
envir = new.env()
load(file=file, envir=envir)
if(length(ls(envir))>1)
warning("RData file contains multiple objects. Returning the first object.", call.=FALSE)
data = get(ls(envir)[1], envir)
} else if(fmt %in% c("txt", "csv", "csv2", "tsv", "psv")) {
if(is.null(encoding)) encoding = "unknown"
data = data.table::fread(input=file,
sep="auto",
encoding=encoding,
header=header)
} else if(fmt %in% c("xls", "xlsx")) {
installed("readxl")
if(header=="auto") header = TRUE
data = readxl::read_excel(path=file,
sheet=sheet,
range=range,
col_names=header)
} else if(fmt %in% c("sav")) {
error = TRUE
if(pkg[1]=="foreign") {
# pkg="foreign"
installed("foreign")
try({
data = foreign::read.spss(
file=file,
reencode=ifelse(is.null(encoding), NA, encoding),
to.data.frame=TRUE,
use.value.labels=value.labels)
error = FALSE
}, silent=TRUE)
}
if(error | pkg[1]=="haven") {
# pkg="haven"
if(pkg[1]=="foreign")
message("[Retry] Using `haven::read_sav()` to import the data...")
installed("haven")
data = haven::read_sav(file=file, encoding=encoding)
if(value.labels) data = haven::as_factor(data)
}
} else if(fmt %in% c("dta")) {
error = TRUE
if(pkg[1]=="foreign") {
# pkg="foreign"
installed("foreign")
try({
data = foreign::read.dta(file=file, convert.factors=value.labels)
error = FALSE
}, silent=TRUE)
}
if(error | pkg[1]=="haven") {
# pkg="haven"
if(pkg[1]=="foreign")
message("[Retry] Using `haven::read_dta()` to import the data...")
installed("haven")
data = haven::read_dta(file=file, encoding=encoding)
if(value.labels) data = haven::as_factor(data)
}
} else {
data = rio::import(file=file)
}
## report data
if(verbose) {
if(is.data.frame(data))
Print("<<green \u2714>> Successfully imported: {nrow(data)} obs. of {ncol(data)} variables")
else
Print("<<green \u2714>> Successfully imported: {length(data)} values of class `{class(data)[1]}`")
}
## return data
if(is.null(as) | fmt %in% c("rds", "rda", "rdata")) {
return(data)
} else if(as %in% c("data.frame", "df", "DF")) {
return(as.data.frame(data))
} else if(as %in% c("data.table", "dt", "DT")) {
return(data.table::as.data.table(data))
} else if(as %in% c("tibble", "tbl_df", "tbl")) {
installed("tibble")
return(tibble::as_tibble(data))
} else {
return(data)
}
}
#' Export data to a file (TXT, CSV, Excel, SPSS, Stata, ...) or clipboard.
#'
#' @description
#' Export data to a file, with format automatically judged from file extension. This function is inspired by [rio::export()] and has several modifications. Its purpose is to avoid using lots of `write_xxx()` functions in your code and to provide one tidy function for data export.
#'
#' It supports many file formats and uses corresponding R functions:
#'
#' - Plain text (.txt, .csv, .csv2, .tsv, .psv), using [data.table::fwrite()]; if the `encoding` argument is specified, using [utils::write.table()] instead
#' - Excel (.xls, .xlsx), using [openxlsx::write.xlsx()]
#' - SPSS (.sav), using [haven::write_sav()]
#' - Stata (.dta), using [haven::write_dta()]
#' - R objects (.rda, .rdata, .RData), using [save()]
#' - R serialized objects (.rds), using [saveRDS()]
#' - Clipboard (on Windows and Mac OS), using [clipr::write_clip()]
#' - Other formats, using [rio::export()]
#'
#' @param x Any R object, usually a data frame (`data.frame`, `data.table`, `tbl_df`). Multiple R objects should be included in a *named* `list` (see examples). To save R objects, specify `file` with extensions `.rda`, `.rdata`, or `.RData`.
#' @param file File name (with extension). If unspecified, data will be exported to clipboard.
#' @param encoding File encoding. Defaults to `NULL`.
#'
#' Options: `"UTF-8"`, `"GBK"`, `"CP936"`, etc.
#'
#' If you find messy code for Chinese text in the exported data (often in CSV when opened with Excel), it is usually useful to set `encoding="GBK"` or `encoding="CP936"`.
#' @param header Does the first row contain column names (`TRUE` or `FALSE`)? Defaults to `"auto"`.
#' @param sheet \[Only for Excel\] Excel sheet name(s). Defaults to "Sheet1", "Sheet2", ... You may specify multiple sheet names in a character vector `c()` with the *same length* as `x` (see examples).
#' @param overwrite Overwrite the existing file (if any)? Defaults to `TRUE`.
#' @param verbose Print output information? Defaults to `FALSE`.
#'
#' @return
#' No return value.
#'
#' @seealso
#' [import()]
#'
#' [print_table()]
#'
#' @examples
#' \dontrun{
#'
#' export(airquality) # paste to clipboard
#' export(airquality, file="mydata.csv")
#' export(airquality, file="mydata.sav")
#'
#' export(list(airquality, npk), file="mydata.xlsx") # Sheet1, Sheet2
#' export(list(air=airquality, npk=npk), file="mydata.xlsx") # a named list
#' export(list(airquality, npk), sheet=c("air", "npk"), file="mydata.xlsx")
#'
#' export(list(a=1, b=npk, c="character"), file="abc.Rdata") # .rda, .rdata
#' d = import("abc.Rdata") # load only the first object and rename it to `d`
#' load("abc.Rdata") # load all objects with original names to environment
#'
#' export(lm(yield ~ N*P*K, data=npk), file="lm_npk.Rdata")
#' model = import("lm_npk.Rdata")
#' load("lm_npk.Rdata") # because x is unnamed, the object has a name "List1"
#'
#' export(list(m1=lm(yield ~ N*P*K, data=npk)), file="lm_npk.Rdata")
#' model = import("lm_npk.Rdata")
#' load("lm_npk.Rdata") # because x is named, the object has a name "m1"
#' }
#'
#' @export
export = function(
x, file,
encoding=NULL,
header="auto",
sheet=NULL,
overwrite=TRUE,
verbose=FALSE
) {
## initialize
if(missing(file)) {
file = "clipboard"
fmt = "clipboard"
} else {
if(file.exists(file)==TRUE) {
if(overwrite)
message("Overwrite file \"", file, "\" ...")
else
stop("File \"", file, "\" existed!", call.=FALSE)
}
fmt = file_ext(file) # file format extracted from file extension
}
## export data
if(fmt=="") {
stop("File has no extension.", call.=FALSE)
} else if(fmt=="clipboard") {
installed("clipr")
if(header=="auto") header = TRUE
suppressWarnings({
clipr::write_clip(content=x, sep="\t",
row.names=FALSE,
col.names=header)
})
} else if(fmt %in% c("rds")) {
saveRDS(object=x, file=file)
} else if(fmt %in% c("rda", "rdata")) {
if(is.data.frame(x)) {
save(x, file=file)
} else if(is.list(x)) {
if(inherits(x, "list")==FALSE) x = list(x)
if(is.null(names(x)))
names(x) = paste0("List", 1:length(x))
envir = as.environment(x)
save(list=names(x), file=file, envir=envir)
} else if(is.environment(x)) {
save(list=ls(x), file=file, envir=x)
} else if(is.character(x)) {
save(list=x, file=file)
} else {
stop("`x` must be a data.frame, list, or environment.", call.=FALSE)
}
} else if(fmt %in% c("txt", "csv", "csv2", "tsv", "psv")) {
sep = switch(fmt,
txt="\t",
csv=",",
csv2=";",
tsv="\t",
psv="|")
dec = ifelse(fmt=="csv2", ",", ".")
if(header=="auto") header = TRUE
if(is.null(encoding)) {
data.table::fwrite(x=x, file=file,
sep=sep, dec=dec,
row.names=FALSE,
col.names=header)
} else {
utils::write.table(x=x, file=file,
sep=sep, dec=dec,
row.names=FALSE,
col.names=header,
quote=FALSE, na="",
fileEncoding=encoding)
}
} else if(fmt %in% c("xls", "xlsx")) {
installed("openxlsx")
if(inherits(x, "list")==FALSE) x = list(x) # one element
if(header=="auto") header = TRUE
if(is.null(sheet)) {
if(is.null(names(x)))
names(x) = paste0("Sheet", 1:length(x))
openxlsx::write.xlsx(x=x, file=file,
overwrite=overwrite,
rowNames=FALSE,
colNames=header)
} else {
sheet = as.character(sheet)
if(length(x)==length(sheet)) {
n = length(x)
if(file.exists(file)) {
wb = openxlsx::loadWorkbook(file=file)
sheets = openxlsx::getSheetNames(file=file)
for(i in 1:n) {
if(sheet[i] %in% sheets)
openxlsx::removeWorksheet(wb, sheet=sheet[i])
openxlsx::addWorksheet(wb, sheetName=sheet[i])
openxlsx::writeData(wb, sheet=sheet[i], x=x[[i]],
rowNames=FALSE,
colNames=header)
}
openxlsx::saveWorkbook(wb, file=file, overwrite=TRUE)
} else {
names(x) = sheet
openxlsx::write.xlsx(x=x, file=file,
overwrite=overwrite,
rowNames=FALSE,
colNames=header)
}
} else {
stop("Length of sheet should be equal to length of x!", call.=FALSE)
}
}
} else if(fmt %in% c("sav")) {
installed("haven")
x = restore_labelled(x)
haven::write_sav(data=x, path=file)
} else if(fmt %in% c("dta")) {
installed("haven")
x = restore_labelled(x)
haven::write_dta(data=x, path=file)
} else {
rio::export(x=x, file=file)
}
## report status
if(verbose) {
if(fmt=="clipboard")
Print("<<green \u2714>> Successfully paste to clipboard")
else
Print("<<green \u2714>> Successfully saved to <<bold \"{paste0(getwd(), '/', file)}\">>")
}
}
restore_labelled = function(x) {
# restore labelled variable classes
x[] = lapply(x, function(v) {
if(is.factor(v)) {
haven::labelled(
x = as.numeric(v),
labels = stats::setNames(seq_along(levels(v)), levels(v)),
label = attr(v, "label", exact=TRUE))
} else if(!is.null(attr(v, "labels", exact=TRUE)) | !is.null(attr(v, "label", exact=TRUE))) {
haven::labelled(
x = v,
labels = attr(v, "labels", exact=TRUE),
label = attr(v, "label", exact=TRUE))
} else {
v
}
})
x
}
#### Excel-Style Functions ####
#' Search, match, and look up values (like Excel's functions `INDEX + MATCH`).
#'
#' In Excel, we can use `VLOOKUP`, `HLOOKUP`, `XLOOKUP` (a new function released in 2019), or the combination of `INDEX` and `MATCH` to search, match, and look up values. Here I provide a similar function. If multiple values were simultaneously matched, a warning message would be printed.
#'
#' @param data Main data.
#' @param vars Character (vector), specifying the variable(s) to be searched in `data`.
#' @param data.ref Reference data containing both the reference variable(s) and the lookup variable(s).
#' @param vars.ref Character (vector), with the **same length and order** as `vars`, specifying the reference variable(s) to be matched in `data.ref`.
#' @param vars.lookup Character (vector), specifying the variable(s) to be looked up and returned from `data.ref`.
#' @param return What to return. Default (`"new.data"`) is to return a data frame with the lookup values added. You may also set it to `"new.var"` or `"new.value"`.
#'
#' @return
#' New data object, new variable, or new value (see the argument `return`).
#'
#' @seealso
#' [dplyr::left_join()]
#'
#' @examples
#' ref = data.table(City=rep(c("A", "B", "C"), each=5),
#' Year=rep(2013:2017, times=3),
#' GDP=sample(1000:2000, 15),
#' PM2.5=sample(10:300, 15))
#' ref
#'
#' data = data.table(sub=1:5,
#' city=c("A", "A", "B", "C", "C"),
#' year=c(2013, 2014, 2015, 2016, 2017))
#' data
#'
#' LOOKUP(data, c("city", "year"), ref, c("City", "Year"), "GDP")
#' LOOKUP(data, c("city", "year"), ref, c("City", "Year"), c("GDP", "PM2.5"))
#'
#' @export
LOOKUP = function(
data, vars,
data.ref, vars.ref,
vars.lookup,
return=c("new.data", "new.var", "new.value")
) {
by = vars.ref
names(by) = vars
data.ref = as.data.frame(data.ref)
data.new = left_join(data,
data.ref[c(vars.ref, vars.lookup)],
by=by)
if(nrow(data.new) > nrow(data))
warning("More than one values are matched!", call.=TRUE)
if(length(return)==3) return = "new.data"
if(return=="new.value" & length(vars.lookup)>=2) return = "new.var"
if(return=="new.data") {
return(data.new)
} else if(return=="new.var") {
return(data.new[vars.lookup])
} else if(return=="new.value") {
return(data.new[[vars.lookup]])
}
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.