#' Compound tokens without overflowing memory and crashing R
#' @description A wrapper around \link[quanteda]{tokens_compound} that processes your tokens in chunks,
#' set by argument \code{stepsize}. See \link[quanteda]{tokens_compound} for more info.
#'
#' @export
#' @examples
#' toks<-tokens(data_corpus_inaugural)
#' compounded<-tokens_compound_stepwise(x=toks,pattern="I am",stepsize=10)
#'
#' #note: does not work?
#'
tokens_compound_stepwise<-function(x, pattern, stepsize=100, concatenator = "_",
valuetype = c("glob", "regex", "fixed"),
case_insensitive = TRUE, join = TRUE){
valuetype<-match.arg(valuetype)
output<-list()
lx<-length(x)
mxsteps<-ceiling(lx/stepsize)
for(i in seq_len(mxsteps)){
cat("\rCompounding tokens... ",round(100*(i-1)/mxsteps,digits=2),"%",sep="")
range<-(1+(i-1)*stepsize):min(i*stepsize, lx)
currcomp<-quanteda::tokens_compound(x[range],pattern,concatenator,valuetype,case_insensitive,join)
output<-c(output,currcomp)
}
cat("\rCompounding tokens... finished.")
return(output)
}
#' The logistic function
#'
#' @param x A numeric value
#'
#' @return \code{x} with the logistic function applied
#' @export
#'
#' @examples
#' logistic(-10:10)
#'
logistic<-function(x){
1/(1+exp(-x))
}
#wtd.median
#' Weighted Median
#'
#' @param x an input vector
#' @param wts a vector of weights
#' @param na.rm Logical indicating whether NA values in the input and weight vectors should be stripped.
#'
#' @return A weighted median of the input values and weights.
#' @export
#'
#' @examples
#' wtd.median(1:5,c(.5,4,1,2,1))
#'
wtd.median<-function(x,wts,na.rm=T){
#clean
if(na.rm){
to.include<-which(!(is.na(x) | is.na(wts)))
x<-x[to.include]
wts<-wts[to.include]
}
#sort
xord<-order(x)
x<-x[xord]
wts<-wts[xord]
#standardize
wts<-wts/sum(wts)
#find middle
cumwts<-cumsum(wts)-.5
sig<-sign(cumwts)
if(!any(sig==0)){
midx<-which(sig==1)[1]
out<-x[midx]
}else{
midx<-c(which(sig==1)[1],which(sig==0)[1])
out<-mean(x[midx])
}
return(out)
}
#' Downweight outliers
#' @description Computes weights; trials within certain bounds of the mean receive the maximum weight while trials
#' outside these bounds are downweighted to 0 or an optional minimum.
#'
#' @param x A numeric vector
#' @param mean An optional mean of the vector
#' @param s An optional standard deviation of the vector
#' @param sdist The number of standard deviations beyond which values should be downweighted
#' @param taper A number indicating how strongly values exceeding the standard deviation should taper off
#' @param scale How the weight vector should be scaled: "norm" sets the sum to 1, "max" sets the maximum to 1.
#' @param min A minimum weight.
#'
#' @return A numeric vector of weights
#' @export
#'
#' @examples
logit.weightfun<-function(x,mean=mean(x),s=sd(x),
sdist=3,taper=10,
scale=c("max","norm"),min=0){
scale <- match.arg(scale)
zx <- (x-m)/s * taper
out <- inv.logit((zx-sdist*taper)) * inv.logit((-zx-sdist*taper))
out <- (out/max(out)) * (1-min) + min
if(scale== "norm"){
out < -out / sum(out)
}
return(out)
}
#' Influence function of the Pearson correlation coefficient
#'
#' @param x,y Numeric vectors
#'
#' @return Influence values of all observations.
#' @export
#'
#' @examples
#' outlier<-numeric(100)
#' outlier[1]<-1000
#' cor.influence(rnorm(100)+outlier,rnorm(100)+outlier)
#'
cor.influence<-function(x,y){
x<-x-mean(x)
y<-y-mean(y)
x*y-(x^2+y^2)/2*cor(x,y)
}
#' Read and merge all .csv files in a folder
#'
#' @param folder path to a folder
#' @param readfunc list of functions that will be used to read the files;
#' if the first function fails, the second function will be used, etc.
#'
#' @return A data.frame containing all merged .csv files
#' @export
#'
#' @examples
#'
read.csv.folder <- function(folder="./", readfunc=list(read.csv,read.csv2,read.table)){
flist<-list.files(folder)
flist<-flist[grepl(".csv",flist)]
datlist<-list()
ct<-0
for(file in flist){
ct<-ct+1
datlist[[ct]]<-"fail"
for(i in seq_len(length(readfunc))){
#try(
datlist[[ct]]<-do.call(readfunc[[i]],list(file=paste0(folder,file),stringsAsFactors=F))
#,silent=T)
if(any(datlist[[ct]]!="fail")){ break; }
}
if(any(datlist[[ct]]=="fail")){
warning("Failed to read ",file)
}
}
combodat<-datlist[[1]]
if(length(datlist)>1){
for(i in 2:length(datlist)){
combodat<-rbind(combodat,datlist[[i]])
}
}
return(combodat)
}
#' Check convergence of a \code{brmsfit} object
#'
#' @param x A \code{brmsfit} object
#' @param min.ess The minimum effective sample size for any parameter.
#' Values below will trigger a warning.
#' @param max.rhat The maximum R-hat value for any parameter.
#' Values above will trigger a warning.
#'
#' @return Silently returns a \code{data.frame} with parameter names,
#' ESS, and rhat values.
#' @export
#'
#' @examples
#' \dontrun{
#' fit <- brm(time ~ age * sex, data = kidney)
#' brms.check(fit)
#' }
#'
brms.check <- function(x,min.ess=400,max.rhat=1.05){
info <- data.frame(par=dimnames(x$fit)$parameters,
ess=posterior::ess_basic(x),
rhat=posterior::rhat(x))
toolow.ess <- info$ess < min.ess
if(any(toolow.ess)){
warning("The following parameter(s) have an excessively low ",
"effective sample size, consider raising the iteration count: ",
paste0(info$par[toolow.ess],collapse=", "))
}
toohigh.rhat <- info$rhat > max.rhat
if(any(toohigh.rhat)){
warning("The following parameter(s) have an excessively high ",
"Rhat, consider changing the model or adding more warmup samples: ",
paste0(info$par[toohigh.rhat],collapse=", "))
}
return(invisible(info))
}
# Add option to define positive, negative, and zero color;
# add support for simple vector input
colorEdges<-function(x,maxedge=NULL){
edgevec<-as.vector(x)
if(is.null(maxedge)){
edgevec <- edgevec/max(abs(edgevec))
}else{
edgevec <- edgevec/abs(maxedge)
}
cols<-hsv(h=ifelse(sign(edgevec)==1,2/3,0),
s=abs(edgevec)*.9+ifelse(edgevec==0,0,.1),
v=1)
matrix(cols,ncol=ncol(x),nrow=nrow(x))
}
angular.mean <- function(x,period=2*pi){
period/(2*pi)*atan2(mean(sin(x/period*2*pi)),mean(cos(x/period*2*pi)))
}
# angular.mean(c(12,11,7),period=12)
# angular.mean(c(12,11,8),period=12)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.