#' Remove duplicated words from a string
#'
#' @param string character vector
#' @param as_list if \code{TRUE} output is returned as a list (see \code{\link{strsplit}})
#'
#' @references
#' \url{http://stackoverflow.com/a/28033338/3986320}
#'
#' @export
uniqueWords <- function(string, as_list = FALSE) {
string <- lapply(strsplit(string, " "), unique)
if (!as_list)
string <- vapply(string, paste, character(1L), collapse = " ")
string
}
#' Clean strings from any non-ASCII characters, extra spaces etc.
#'
#' @param x a character vector
#' @param ascii if \code{TRUE} then string is converted to ASCII using
#' \code{\link{iconv}}
#' @param encoding encoding parameter passed to \code{\link{iconv}}, if
#' \code{encoding = ""} is used, then \code{\link{iconv}}
#' uses the default system encoding
#' @param lowercase if \code{TRUE} characters are transformed to lowercase
#' @param trim parameter passed to \code{\link{trimws}}, if \code{trim = "none"}
#' then white spaces are not trimmed
#' @param single_space if \code{TRUE} duplicated spaces are removed
#' @param alphanum if \code{TRUE} non alphanumeric characters are replaced with
#' value provided in \code{other_char}
#' @param other_char replacement value for non alphanumeric characters, space by default
#'
#' @export
clearText <- function(x, ascii = TRUE, encoding = "", lowercase = TRUE,
trim = c("both", "left", "right", "none"),
single_space = TRUE, alphanum = FALSE, other_char = " ") {
if (ascii)
x <- iconv(x, from = encoding, to = "ASCII//TRANSLIT")
if (lowercase)
x <- tolower(x)
if (alphanum)
x <- gsub("[^[0-9a-zA-Z ]", other_char[1L], x)
if (single_space)
x <- gsub("(?<=[\\s])\\s*", "", x, perl=TRUE)
trim <- match.arg(trim)
if (trim != "none")
x <- trimws(x, which = trim)
return(x)
}
#' Check if object is of Date class
#'
#' @param object
#'
#' @export
is.Date <- function(x) "Date" %in% class(x)
#
# rep_along <- function(x, along.with) {
# rep(x, length.out = length(along.with))
# }
#' Normalize, squash and standarize data
#'
#' @param x numeric vector
#' @param na.rm if \code{TRUE} missing values are removed
#' @param FUN function used for transforming \code{x} prior
#' to squashing (by defauls \code{\link{identity}})
#' @param \dots further arguments passed to \code{FUN}
#'
#' @name normalize
#' @export
normalize <- function(x, na.rm = FALSE) {
if (na.rm)
xx <- x[!is.na(x)]
else
xx <- x
(x - min(xx)) / diff(range(xx))
}
#' @rdname normalize
#' @export
squash <- function(x, FUN = identity, ..., na.rm = FALSE) {
if (na.rm)
xx <- x[!is.na(x)]
else
xx <- x
FUN(x, ...) / sum(FUN(xx, ...))
}
#' @rdname normalize
#' @export
zscore <- function(x, na.rm = FALSE) {
if (na.rm)
xx <- x[!is.na(x)]
else
xx <- x
(x - mean(xx)) / sd(xx)
}
#' Pick n greatest or smallest values from the vector
#'
#' @param x numeric vector
#' @param n position of the values to return
#'
#' @name nth_max
#' @export
nth_max <- function(x, n = 1L) {
sort(x, decreasing = TRUE)[n]
}
#' @rdname nth_max
#' @export
nth_min <- function(x, n = 1L) {
sort(x, decreasing = FALSE)[n]
}
#' Return first or last value
#'
#' @param an object
#'
#' @seealso \code{\link{head}}
#'
#' @name first
#' @export
fitst <- function(x) head(x, n = 1L)
#' @name first
#' @export
last <- function(x) tail(x, n = 1L)
# space <- function(x, ...) {
# format(x, ..., big.mark = " ", scientific = FALSE, trim = TRUE)
# }
as.epoch <- function(x) {
as.numeric(as.POSIXct(x))
}
#' Take the first non-missing value rowwise
#'
#' @param \dots list of vectors
#'
#' @references
#' \url{http://stackoverflow.com/q/19253820/3986320}
#'
#' @export
coalesce <- function(...) {
Reduce(function(x, y) {
i <- which(is.na(x))[1L]
x[i] <- y[i]
x
}, list(...))
}
#' Fill missing values with predefined constant
#'
#' @param .data matrix or data.frame object
#' @param \dots column names if using restricted columns
#' @param fill replacement value
#'
#' @importFrom lazyeval lazy_dots
#'
#' @export
fill_na <- function(.data, ..., fill = NA) {
stopifnot(is.matrix(.data) || is.data.frame(.data))
if (is.na(fill))
return(.data)
if (!missing(...)) {
dots <- lazy_dots(...)
.data[apply(.data[, dots], 1L, anyNA), dots] <- fill[1L]
} else {
.data[is.na(.data)] <- fill[1L]
}
.data
}
#' Drop rows with missing values
#'
#' @param .data data.frame or matrix
#' @param \dots column names if using restricted columns
#'
#' @references
#' \url{http://stackoverflow.com/q/22353633/3986320}
#'
#' @importFrom lazyeval lazy_dots
#' @importFrom dplyr select_ filter
#' @importFrom stats complete.cases
#'
#' @export
drop_na <- function(.data, ...) {
if (missing(...)){
selected_rows <- complete.cases(.data)
} else {
selected_rows <- complete.cases(select_(.data, .dots = lazy_dots(...)))
}
filter(.data, selected_rows)
}
#' Rowwise finding the column with greatest value
#'
#' @param .data data.frame or matrix
#' @param \dots column names if using restricted columns
#'
#' @importFrom dplyr select_
#' @importFrom lazyeval lazy_dots
#'
#' @name min_col
#' @export
min_col <- function(.data, ...) {
if (!missing(...))
data <- select_(.data, .dots = lazy_dots(...))
colnames(.data)[apply(.data, 1L, which.min)]
}
#' @rdname min_col
#' @export
max_col <- function(.data, ...) {
if (!missing(...))
data <- select_(.data, .dots = lazy_dots(...))
colnames(.data)[apply(.data, 1L, which.max)]
}
#' Estimate mode of discrete or continous data
#'
#' @param x numeric vector
#' @param discrete if \code{TRUE} estimated mode assuming that data is dicrete
#' @param \dots additional parameters passed to \code{\link[stats]{density}}
#'
#' @importFrom stats density
#' @export
Mode <- function(x, discrete = FALSE, ...) {
if (!discrete) {
dx <- density(x, ...)
dx$x[which.max(dx$y)]
} else {
ux <- unique(x)
tx <- tabulate(match(x, ux))
ux[tx == max(tx)]
}
}
#' Report PRESS statistic for lm object
#'
#' @param model \code{\link[stats]{lm}} object
#'
#' @importFrom stats residuals lm.influence
#' @export
PRESS <- function(model) {
if (!inherits(model, "lm") || inherits(model, "glm"))
stop("model needs to be lm object")
pr <- residuals(model)/(1 - lm.influence(model)$hat)
sum(pr^2)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.