# -----------------------------------
# The following commands are needed to ensure that the roxygen2 package, which deals with documenting the package, does not conflict with the Rcpp package. Do not alter!
#' @useDynLib bobFunctions
#' @importFrom Rcpp evalCpp
NULL
# -----------------------------------
#' myFunctions
#'
#' List all functions currently available in bobFunctions package.
#'
#' @export
myFunctions = function() {
categories <- c('Misc', 'Probability', 'Combinatorics', 'Plotting', 'Colour palettes')
v_misc <- c(
'header',
'loadPackage',
'zeroPad',
'logDescriptive',
'perlinNoise',
'logSum',
'colVars',
'rowVars',
'grad',
'cubeSpline_segment',
'cubeSpline',
'logHarmonicMean',
'first',
'last',
'latexTable',
'changeNames',
'reportConsole',
'dot',
'newLine',
'monthDays',
'is.int',
'removeCols',
'matrixSmooth',
'minSpanTree',
'fastRead',
'vec2mat',
'bin2D',
'safeDivide',
'exit',
'breakCoverage',
'getListDepth',
'gcDist',
'dummy1'
)
v_probability <- c(
'rdirichlet',
'rinvgamma',
'dinvgamma',
'rt_scaled',
'dt_scaled',
'normal_loglike',
'dBDI',
'dlgamma',
'rSTR1',
'dSTR1',
'rCRP',
'rCRP2',
'rMultiMix',
'rDPM',
'sampleVec'
)
v_combinatorics <- c(
'SterlingFirst',
'SterlingSecond',
'incrementRestrictedGrowth',
'allSamps',
'convertRadix',
'incrementBins'
)
v_plotting <- c(
'plotOn',
'plotOff',
'MCMCPlot',
'MCMCPlot2',
'densityPlot',
'errorBars',
'pieCharts',
'win',
'animateOn',
'animateOff',
'coordText',
'multiPanel',
'imageFix',
'filledContour2',
'ribbon',
'setMargin',
'faster'
)
v_colour <- c(
'bobSpectrum',
'bobRainbow',
'bobNiceCols',
'bobMapCols',
'bobFireIce',
'colPlot',
'transHex',
'smoothCols'
)
# print function names to console
l <- list(v_misc, v_probability, v_combinatorics, v_plotting, v_colour)
for (i in 1:length(categories)) {
cat(paste('\n###',categories[i],'\n'))
for (x in sort(l[[i]])) {
cat(paste('-',x,'\n'))
}
cat('\n')
}
}
# -----------------------------------
#' header
#'
#' Write header text to console that can be copied into a new script.
#'
#' @export
header <- function(name="Name") {
# note - do not indent this section or it will show up in header() output!
# note - indents can be inserted automatically when copying code from one editor to another, so be wary of this.
s <- paste("
# ",name,".R
# Author: Bob Verity
# Date: ",Sys.Date(),"
# Purpose:
# (this is an example header)
# ------------------------------------------------------------------
# load bobFunctions package. If not already installed, this can be obtained from github via the devtools command install_github('bobverity/bobFunctions')
library(bobFunctions)
", sep="")
cat(s)
}
# -----------------------------------
#' loadPackage
#'
#' Single command for installing (if not already installed) and loading a package. For example, the \code{loadPackage('devtools')} command would run the following code block:
#' \preformatted{
#' if (!"devtools"\%in\%rownames(installed.packages())) {
#' install.packages('devtools')
#' }
#' require("devtools")
#' }
#'
#' @export
loadPackage = function(string) {
# create command string
s1 = paste('if (!\'',string,'\'%in%rownames(installed.packages())) {',sep='')
s2 = paste('install.packages(\'',string,'\')',sep='')
s3 = '}'
s4 = paste('require(',string,')',sep='')
# evaluate string
fullString = paste(s1,s2,s3,s4,sep='\n')
eval(parse(text=fullString))
}
# -----------------------------------
#' plotOn
#'
#' Start optional save-to-file function.
#'
#' @export
plotOn = function(active=FALSE, fileroot='', filestem1='', filestem2='', fileindex='', type='pdf', width=6, height=6, res=300) {
if (active) {
if (type=='pdf') {
pdf(file=paste(fileroot,filestem1,filestem2,fileindex,".pdf",sep=""), bg="white", width=width, height=height)
}
if (type=='png') {
png(file=paste(fileroot,filestem1,filestem2,fileindex,".png",sep=""), bg="white", width=width*500, height=height*500,res=res)
}
}
}
# -----------------------------------
#' plotOff
#'
#' End optional save-to-file function.
#'
#' @export
plotOff = function(active=FALSE) {
if (active) {
dev.off()
}
}
# -----------------------------------
#' zeroPad
#'
#' Add leading zeros to number. Can handle negative numbers. For more advanced options see help for the formatC function.
#'
#' @export
zeroPad = function(number,padding) {
preamble <- ''
if (substr(number,1,1)=='-') {
preamble <- '-'
number <- substr(number,2,nchar(x))
}
output <- paste(paste(rep(0,padding-nchar(number)),collapse=""),number,sep="")
output <- paste(preamble,output,sep='')
return(output)
}
# -----------------------------------
#' rdirichlet
#'
#' Draw from Dirichlet distribution with parameters alpha_vec (does not have to be symmetric).
#'
#' @export
rdirichlet <- function(alpha_vec) {
Y <- rgamma(length(alpha_vec), shape=alpha_vec, scale=1)
output <- Y/sum(Y)
return(output)
}
# -----------------------------------
#' rdirichlet
#'
#' Draw from Dirichlet distribution with parameters alpha_vec (does not have to be symmetric).
#'
#' @export
rdirichlet <- function(alpha_vec) {
Y <- rgamma(length(alpha_vec), shape=alpha_vec, scale=1)
output <- Y/sum(Y)
return(output)
}
# -----------------------------------
#' rinvgamma
#'
#' Draw from inverse-gamma distribution
#'
#' @export
rinvgamma <- function(n, alpha, beta) {
ret <- 1/rgamma(n,shape=alpha,rate=beta)
return(ret)
}
# -----------------------------------
#' dinvgamma
#'
#' Density of inverse-gamma distribution
#'
#' @export
dinvgamma <- function(x, alpha, beta, log=TRUE) {
ret <- alpha*log(beta) - lgamma(alpha) - (alpha+1)*log(x) - beta/x
if (log) {
return(ret)
} else {
return(exp(ret))
}
}
# -----------------------------------
#' dt_scaled
#'
#' Density of scaled student's t distribution.
#'
#' @export
dt_scaled = function(x, df, ncp, scale, log=FALSE) {
output <- lgamma((df+1)/2)-lgamma(df/2)-0.5*log(pi*df*scale^2)-((df+1)/2)*log(1+1/df*((x-ncp)/scale)^2)
if (log==FALSE)
output <- exp(output)
return(output)
}
# -----------------------------------
#' normal_loglike
#'
#' Log-probability of data x integrated over normal likelihood with standard deviation sigma and normal prior with mean priorMean and standard deviation priorSD. Probability of data is raised to the power beta, which has default value of beta=1 giving ordinary likelihood.
#'
#' Model specification:
#' x_i ~ Normal(mu, sigma)
#' mu ~ Normal(priorMean, priorSD)
#'
#' @export
normal_loglike <- function(x, sigma, priorMean, priorSD, beta=1) {
n <- length(x)
mu_postVar <- 1/(beta*n/sigma^2+1/priorSD^2)
mu_postMean <- mu_postVar * (beta*sum(x)/sigma^2+priorMean/priorSD^2)
ret <- -0.5*beta*n*log(2*pi*sigma^2) - 0.5*log(priorSD^2) + 0.5*log(mu_postVar) - 0.5*(beta*sum(x^2)/sigma^2 + priorMean^2/priorSD^2 - mu_postMean^2/mu_postVar)
return(ret)
}
# -----------------------------------
#' logDescriptive
#'
#' Calculates descriptive stats on extremely large or extremely small values. Input vector of values in log-space. Output sample mean and variance, also in log-space.
#'
#' @export
logDescriptive = function(Y) {
n <- length(Y)
log_ybar <- log(mean(exp(Y-min(Y))))+min(Y)
log_samplevar <- 2*min(Y) + log(n/(n-1)*mean(exp(2*Y-2*min(Y))-2*exp(Y+log_ybar-2*min(Y))+exp(2*log_ybar-2*min(Y))))
return(c(log_ybar,log_samplevar))
}
# -----------------------------------
#' perlinNoise
#'
#' Generates Perlin noise of any scale in, a matrix of any size. Raw code is copied from internet.
#'
#' @param outRows rows in output matrix.
#' @param outCols columns in output matrix.
#' @param levelsX bumpyness in x dimension.
#' @param levelsY bumpyness in y dimension.
#'
#' @export
perlinNoise = function(outRows=100, outCols=100, levelsX=10, levelsY=10) {
# convert to more convenient names
M = outRows
N = outCols
n = levelsX
m = levelsY
# For each point on this n*m grid, choose a unit 1 vector
vector_field <- apply(
array( rnorm( 2 * n * m ), dim = c(2,n,m) ),
2:3,
function(u) u / sqrt(sum(u^2))
)
f <- function(x,y) {
# Find the grid cell in which the point (x,y) is
i <- floor(x)
j <- floor(y)
stopifnot( i >= 1 || j >= 1 || i < n || j < m )
# The 4 vectors, from the vector field, at the vertices of the square
v1 <- vector_field[,i,j]
v2 <- vector_field[,i+1,j]
v3 <- vector_field[,i,j+1]
v4 <- vector_field[,i+1,j+1]
# Vectors from the point to the vertices
u1 <- c(x,y) - c(i,j)
u2 <- c(x,y) - c(i+1,j)
u3 <- c(x,y) - c(i,j+1)
u4 <- c(x,y) - c(i+1,j+1)
# Scalar products
a1 <- sum( v1 * u1 )
a2 <- sum( v2 * u2 )
a3 <- sum( v3 * u3 )
a4 <- sum( v4 * u4 )
# Weighted average of the scalar products
s <- function(p) 3 * p^2 - 2 * p^3
p <- s( x - i )
q <- s( y - j )
b1 <- (1-p)*a1 + p*a2
b2 <- (1-p)*a3 + p*a4
(1-q) * b1 + q * b2
}
xs <- seq(from = 1, to = n, length = N+1)[-(N+1)]
ys <- seq(from = 1, to = m, length = M+1)[-(M+1)]
outer( xs, ys, Vectorize(f) )
}
# -----------------------------------
#' dBDI
#'
#' Probability mass function for the birth-death-immigration model. Note that this is simply a particular version of the negative-binomial likelihood. When applied to gene families birthRate=rate of gene duplication, deathRate=rate of gene deletion, immigrationRate=rate of de novo gene creation.
#'
#' @param x number of individuals or genes (i.e. value of random variable).
#' @param birthRate rate of birth.
#' @param deathRate rate of death.
#' @param immigrationRate rate of immigration.
#'
#' @export
dBDI = function(x, birthRate, deathRate, immigrationRate, log=FALSE) {
dnbinom(x=x, size=immigrationRate/birthRate, prob=1-birthRate/deathRate, log=log)
}
# -----------------------------------
#' dlgamma
#'
#' Probability density function for log(X), where X is gamma(shape=alpha,rate=beta). The mean of this distribution is equal to the integral of log(x)*dgamma(x) over the interval [0,infinity), which does not have a simple analytical solution that I am aware of.
#'
#' @export
dlgamma <- function(x, alpha, beta, log=FALSE) {
output <- alpha*x+alpha*log(beta)-lgamma(alpha)-beta*exp(x)
if (log==FALSE) {
output <- exp(output)
}
return(output)
}
# -----------------------------------
#' logSum
#'
#' Add numbers together in log space while avoiding under/overflow problems. Accepts vector inputs.
#'
#' @export
logSum <- function(logA, logB) {
output <- rep(0,length(logA))
output[logA<logB] <- ( logB + log(1 + exp(logA-logB)) )[logA<logB]
output[logB<=logA] <- (logA + log(1 + exp(logB-logA)) )[logB<=logA]
return(output)
}
# -----------------------------------
#' SterlingFirst
#'
#' Computes Sterling numbers of the first kind. Results are output in log space, and in general this function is robust to overflow, being capable of handling values as large as n=177.
#'
#' @export
SterlingFirst <- function(n) {
if (n<=2)
return(rep(0,n))
Sterlingvec <- c(1,1)
K <- 0
for (i in 2:(n-1)) {
tempmat <- rbind(c(0,Sterlingvec),i*c(Sterlingvec,0))
Sterlingvec <- colSums(tempmat)
K <- K + log(max(Sterlingvec))
Sterlingvec <- Sterlingvec/max(Sterlingvec)
}
output <- K + log(Sterlingvec)
return(output)
}
# -----------------------------------
#' SterlingSecond
#'
#' Computes Sterling numbers of the second kind. Results are output in log space, and in general this function is robust to overflow, being capable of handling values as large as n=177.
#'
#' @export
SterlingSecond = function(n) {
if (n<1)
return(-Inf)
if (n==1)
return(0)
Sterlingvec <- 0
for (i in 1:(n-1)) {
Sterlingvec <- logSum( c(-Inf,Sterlingvec), c(Sterlingvec,-Inf) + log(1:(i+1)) )
}
return(Sterlingvec)
}
# -----------------------------------
#' incrementRestrictedGrowth
#'
#' Find next group that satisfies restricted growth function (max value K). Allows user to eplore all unique partitions by iteratively updating the grouping. Returns 0 when no more groups.
#'
#' @export
incrementRestrictedGrowth <- function(group,K) {
target <- length(group)
group[target] <- group[target] + 1
while (group[target]>(max(group[1:(target-1)])+1) | group[target]>K) {
group[target] <- 1
target <- target - 1
group[target] <- group[target] + 1
}
if (target==1) {
return(0)
} else {
return(group)
}
}
# -----------------------------------
#' allSamps
#'
#' Output all possible sequences that are possible by sampling without replacement from a given list.
#'
#' @export
allSamps <- function(targetlist) {
lengths <- mapply(length,targetlist)
outputmat <- matrix(0, nrow=prod(lengths), ncol=length(lengths))
for (i in 1:length(lengths)) {
val1 <- prod(lengths[1:i][-i])
val2 <- prod(lengths[i:length(lengths)][-1])
outputmat[,i] <- rep(rep(targetlist[[i]],each=val2),times=val1)
}
return(outputmat)
}
# -----------------------------------
#' convertRadix
#'
#' Convert decimal number system to any other radix (base).
#'
#' @param x number to convert.
#' @param base radix to convert to.
#' @param fixlength if used, always output at least this many digits.
#'
#' @export
convertRadix <- function(x, base=2, fixlength=NA) {
floornumber <- x
output <- NULL
while (floornumber!=0) {
output <- c(floornumber%%base,output)
floornumber <- floor(floornumber/base)
}
if (!is.na(fixlength) & fixlength>length(output)) {
output <- c(rep(0,fixlength-length(output)),output)
}
return(output)
}
# -----------------------------------
#' rSTR1
#'
#' Draw from STR1 (Stirling type 1) distribution.
#'
#' @export
rSTR1 <- function(n, size, theta) {
drawmat <- matrix(runif(size*n), nrow=size, ncol=n)
output <- colSums(drawmat<theta/(row(drawmat)-1+theta))
return(output)
}
# -----------------------------------
#' dSTR1
#'
#' Probability mass function of STR1 (Stirling type 1) distribution. Defined for k in 1:n
#'
#' @export
dSTR1 = function(k, n, theta, log=FALSE) {
output <- lgamma(theta)-lgamma(n+theta)+StirlingFirst(n)[k]+k*log(theta)
if (!log)
output <- exp(output)
return(output)
}
# -----------------------------------
#' rCRP
#'
#' Draw group and group frequencies from a Chinese restaurant process with concentration parameter theta.
#'
#' @export
rCRP <- function(n, theta=1) {
if (n==1)
return(list(group=1,groupFreqs=1))
group <- rep(1,n)
maxGroup <- 1
groupFreqs <- rep(0,n)
groupFreqs[1] <- 1
probVec <- groupFreqs
probVec[maxGroup+1] <- theta
for (i in 2:n) {
newGroup <- sample(n,1,prob=probVec)
group[i] <- newGroup
if (newGroup>maxGroup) {
maxGroup <- newGroup
groupFreqs[maxGroup] <- 1
probVec[maxGroup] <- 1
probVec[maxGroup+1] <- theta
} else {
groupFreqs[newGroup] <- groupFreqs[newGroup]+1
probVec[newGroup] <- probVec[newGroup]+1
}
}
return(list(group=group,groupFreqs=groupFreqs))
}
# -----------------------------------
#' rCRP2
#'
#' Draw from Chinese restaurant process multiple times using efficient stick-breaking construction.
#'
#' @export
rCRP2 = function(n, reps, theta=1) {
# initialise objects
n_left <- rep(n,reps)
freqs <- NULL
# draw from stick-breaking process until no new groups
while (any(n_left>0)) {
p <- rbeta(reps,1,theta)
newFreqs <- rbinom(reps,n_left,p)
freqs <- cbind(freqs,newFreqs)
n_left <- n_left - newFreqs
}
# drop all-zero columns
freqs <- freqs[,colSums(freqs)>0,drop=FALSE]
return(freqs)
}
# -----------------------------------
#' colVars
#'
#' Calculate variance (sample or population) of columns of a matrix or data frame. If sampleVar==FALSE then calcualte relative to mean mu.
#'
#' @export
colVars <- function(mat, sampleVar=TRUE, mu=0) {
n <- colSums(!is.na(mat))
X <- colSums(mat,na.rm=T)
X2 <- colSums(mat^2,na.rm=T)
if (sampleVar) {
output <- 1/(n-1)*(X2-X^2/n)
} else {
output <- 1/n*(X2-2*mu*X+n*mu^2)
}
return(output)
}
# -----------------------------------
#' rowVars
#'
#' Calculate variance (sample or population) of rows of a matrix or data frame. If sampleVar==FALSE then calcualte relative to mean mu.
#'
#' @export
rowVars = function(mat, sampleVar=TRUE, mu=0) {
n <- rowSums(!is.na(mat))
X <- rowSums(mat,na.rm=T)
X2 <- rowSums(mat^2,na.rm=T)
if (sampleVar) {
output <- 1/(n-1)*(X2-X^2/n)
} else {
output <- 1/n*(X2-2*mu*X+n*mu^2)
}
return(output)
}
# -----------------------------------
#' rlomax
#'
#' Draws from the lomax (Pareto type II) distribution with shape alpha and scale lambda.
#'
#' @export
rlomax = function(n,alpha,lambda) {
r <- rgamma(n, shape=alpha, rate=lambda)
output <- rexp(n, rate=r)
return(output)
}
# -----------------------------------
#' dlomax
#'
#' Density function for the lomax (Pareto type II) distribution with shape alpha and scale lambda.
#'
#' @export
dlomax = function(n,alpha,lambda) {
output <- log(alpha)+alpha*log(lambda)-(alpha+1)*log(x+lambda)
if (log==FALSE)
output <- exp(output)
return(output)
}
# -----------------------------------
#' rMultiMix
#'
#' Draw from finite or infinite (Dirichlet process) multivariate normal mixture model. Note that although correlations are allowed for each mixture component, the locations of components are drawn from a multivariate normal distribution with standard deviation tau in all directions (i.e. no correlation). Allows for generation of missing data by randomly eliminating a proportion of results.
#'
#' @param n number of draws.
#' @param K number of mixture components. Set K=Inf to use infinite mixture model.
#' @param psi prior matrix for inverse Wishart prior on covariance. When psi is a scalar value the model is one-dimensional, and variances are effectively drawn from an inverse-gamma distribution with shape alpha=nu/2 and rate beta=psi/2.
#' @param nu degrees of freedom for inverse Wishart prior on covariance.
#' @param alpha Dirichlet prior parameter for mixture weights. Set alpha=Inf to use model of equal weights.
#' @param tau standard deviation of prior on individual components means (this prior is centred at 0).
#' @param pMissing proportion of data that is missing.
#'
#' @export
rMultiMix <- function(n, K, psi, nu, alpha, tau, pMissing=0) {
# load package mvtnorm for multivariate normal draws
loadPackage('mvtnorm')
# load package MCMCpack for inverse-Wishart draws. Note that the riwish() function in MCMCpack was found to be much faster than the equivalent rinvwishart() function in package LaplacesDemon.
loadPackage('MCMCpack')
# force psi to be a matrix
psi <- as.matrix(psi)
d <- ncol(psi)
# draw grouping from finite or infinite mixture model
if (is.finite(K)) {
weights <- rdirichlet(rep(alpha/K,K))
if (!is.finite(alpha))
weights <- rep(1/K, K)
groupCounts <- rmultinom(1,n,prob=weights)
group <- rep(1:K,times=groupCounts)
} else {
weights <- NULL
if (!is.finite(alpha))
stop('cannot have infinite K and infinite alpha simultaneously')
c <- rCRP(n,alpha)
groupCounts <- c$groupFreqs
group <- c$group
}
# draw from mixture model conditional on grouping
draws <- NULL
sigma <- list()
mu <- list()
for (i in 1:length(groupCounts)) {
if (groupCounts[i]>0) {
sigma[[i]] <- riwish(nu,psi)
mu[[i]] <- rmvnorm(1,sigma=diag(tau^2,d))
draws <- rbind(draws, rmvnorm(groupCounts[i],mean=mu[[i]],sigma=sigma[[i]]))
}
}
# generate missing data matrix (if possible, otherwise error)
pMissing <- round(n*d*pMissing)/(n*d)
if (pMissing<=0) {
missingMat <- NULL
} else if (pMissing>((d-1)/d)) {
stop(paste("cannot generate ",pMissing*100,"% missing data without eliminating some observations entirely. Maximum missing data for this dimensionality is ",100*(d-1)/d,"%.",sep=""))
} else {
# draw number of missing elements in each observation
missingRowSums <- rmultinom(1,n*d*pMissing,prob=rep(1,n))
while(max(missingRowSums)>(d-1)) {
numOverLimit <- sum(missingRowSums>(d-1))
missingRowSums[missingRowSums>(d-1)] <- missingRowSums[missingRowSums>(d-1)]-1
numUnderLimit <- sum(missingRowSums<(d-1))
missingRowSums[missingRowSums<(d-1)] <- missingRowSums[missingRowSums<(d-1)] + rmultinom(1,numOverLimit,prob=rep(1,numUnderLimit))
}
# create a random matrix that satisfies this constraint
missingMat <- matrix(0,n,d)
for (j in 1:d) {
probvec <- missingRowSums/(d-j+1)
missingMat[,j] <- as.integer(runif(n)<probvec)
missingRowSums <- missingRowSums-missingMat[,j]
}
}
# return list of elements
outlist <- list(draws=draws, weights=weights, group=group, sigma=sigma, mu=mu, missingMat=missingMat)
return(outlist)
}
# -----------------------------------
#' grad
#'
#' Numerically calculate gradient at each point (x,y). The gradient at a point is calculated as the mean of the two gradients in either direction from the point, apart from at the ends of the vector where there is only a single value to consider.
#'
#' @export
grad <- function(x=1:length(y), y) {
# check that same length
if (length(y)!=length(x))
stop('x and y must be same length')
# calculate gradient from differences in x and y direction
delta_y <- y[-1]-y[-length(y)]
delta_x <- x[-1]-x[-length(x)]
grad <- delta_y/delta_x
# gradient at each point is mean of gradient in each direction from point (apart from at ends).
grad_left <- c(grad[1],grad)
grad_right <- c(grad,grad[length(grad)])
grad_mean <- (grad_left+grad_right)/2
return(grad_mean)
}
# -----------------------------------
#' cubeSpline_segment
#'
#' Calculate cubic spline of the form ax^3+bx^2+cx+d between the points (x[1],y[1]) and (x[2],y[2]) with gradients g[1] and g[2]. Return constants a, b, c and d.
#'
#' @export
cubeSpline_segment <- function(x, y, g) {
# calculate coefficients
a <- (y[2]-y[1]-0.5*(g[1]+g[2])*(x[2]-x[1]))/(2*x[1]^3-3*x[1]^2*x[2]+x[2]^3-0.5*(3*x[2]^2-3*x[1]^2)*(x[2]-x[1]))
b <- (g[2]-g[1]-a*(3*x[2]^2-3*x[1]^2))/(2*x[2]-2*x[1])
c <- g[1]-3*a*x[1]^2-2*b*x[1]
d <- y[2]-a*x[2]^3-b*x[2]^2-g[1]*x[2]+3*a*x[1]^2*x[2]+2*b*x[1]*x[2]
# return results
return(list('a'=a,'b'=b,'c'=c,'d'=d))
}
# -----------------------------------
#' cubeSpline
#'
#' Calculate cubic spline passing through the points (x,y) with known gradients g at these points. The parameter 'interpoints' describes the number of points in each segment.
#'
#' @export
cubicSpline <- function(x, y, g, interpoints=10) {
#xvec <- seq(x[1],x[n],l=(n-1)*interpoints)
#yvec <- rep(0,(n-1)*interpoints)
# interpolate each segment using cubic spline
n <- length(x)
yout <- xout <- NULL
for (i in 2:n) {
#whichPoints <- ((i-2)*interpoints+1):((i-1)*interpoints)
coeffs <- cubeSpline_segment(x[(i-1):i], y[(i-1):i], g[(i-1):i])
xvec <- seq(x[i-1], x[i], l=interpoints+1)
#yvec[whichPoints] <- z$a*xvec[whichPoints]^3+z$b*xvec[whichPoints]^2+z$c*xvec[whichPoints]+z$d
yvec <- coeffs$a*xvec^3 + coeffs$b*xvec^2 + coeffs$c*xvec + coeffs$d
if (i==n) {
xout <- c(xout, xvec)
yout <- c(yout, yvec)
} else {
xout <- c(xout, xvec[-length(xvec)])
yout <- c(yout, yvec[-length(yvec)])
}
}
return(list('x'=xout,'y'=yout))
}
# -----------------------------------
#' logHarmonicMean
#'
#' Calculates harmonic mean of values z, where values are in log space. Accounts for underflow and returns value of log harmonic mean.
#'
#' @export
logHarmonicMean <- function(z) {
m <- min(z)
output <- m - log(sum(exp(-(z-m)))) + log(length(z))
return(output)
}
# -----------------------------------
#' MCMCPlot
#'
#' Produces simple plot useful for visualising MCMC chains. Enter x vector to plot single chain, or x and y vectors to visualise correlation between chains.
#'
#' @export
MCMCPlot <- function(x, y=NULL, col=grey(0.7), pch=4, cex=0.4, xmin=NULL, xmax=NULL, ymin=min(x,na.rm=T), ymax=max(x,na.rm=T), xlab=NULL, main='', ylab=main) {
if (is.null(y)) {
if (is.null(xmin))
xmin <- 1
if (is.null(xmax))
xmax <- length(x)
if (is.null(xlab))
xlab <- 'iteration'
plot(x, xlim=c(xmin,xmax), ylim=c(ymin,ymax), xlab=xlab, ylab=ylab, main=main, pch=pch, cex=cex, col=col)
} else {
if (is.null(xmin))
xmin <- min(x,na.rm=T)
if (is.null(xmax))
xmax <- max(x,na.rm=T)
if (is.null(xlab))
xlab <- ''
plot(x, y, xlim=c(xmin,xmax), ylim=c(ymin,ymax), xlab=xlab, ylab=ylab, main=main, pch=pch, cex=cex, col=col)
}
}
# -----------------------------------
#' MCMCPlot2
#'
#' Similar to \code{MCMCPlot()}, but aimed at very large numbers of samples where ordinary plotting would not be feasible. Works by binning points before plotting. Can only handle a single chain (cannot plot x vs. y).
#'
#' @export
MCMCPlot2 <- function(x, h_cells=100, v_cells=100, xmin=1, xmax=length(x), ymin=min(x,na.rm=T), ymax=max(x,na.rm=T), xlab='iteration', main='', ylab=main) {
# make x,y data frame
df <- data.frame(x=1:length(x),y=x)
# make vectors of breaks and matrix z for storing final results
v_breaks <- seq(ymin, ymax, l=v_cells+1)
h_breaks <- seq(xmin, xmax, l=h_cells+1)
v_mids <- (v_breaks[-1]+v_breaks[-length(v_breaks)])/2
h_mids <- (h_breaks[-1]+h_breaks[-length(h_breaks)])/2
z <- matrix(0,v_cells,h_cells)
# split data based on vertical breaks
df_split <- split(df,f=cut(df$y,v_breaks))
# for each element, count frequency classes based on horizontal breaks
for (i in 1:length(df_split)) {
df_split2 <- split(df_split[[i]],f=cut(df_split[[i]]$x,h_breaks))
z[i,] <- mapply(nrow,df_split2)
}
# plot final matrix
palette <- colorRampPalette(colors()[c(1,131,107)])
image(h_mids, v_mids, t(z), col=palette(100), xlab=xlab, ylab=ylab, main=main)
}
# -----------------------------------
#' densityPlot
#'
#' Produce kernel density plot of data x, with mean, median and 95% limits shown.
#'
#' @export
densityPlot <- function(x, xmin=NULL, xmax=NULL, ymin=0, ymax=NULL, xlab='', ylab='density', main='', meanOn=TRUE, medianOn=TRUE, quantileOn=TRUE, boxOn=TRUE, boxSize=0.8, meanTextOn=TRUE, medianTextOn=TRUE, quantileTextOn=TRUE, boxSigFig=3) {
# get x limits from data if not specified
if (is.null(xmin))
xmin <- 1.5*min(x,na.rm=T)-0.5*max(x,na.rm=T)
if (is.null(xmax))
xmax <- 1.5*max(x,na.rm=T)-0.5*min(x,na.rm=T)
# produce kernel density
fx <- density(x,from=xmin,to=xmax)
if (is.null(ymax))
ymax <- 1.1*max(fx$y,na.rm=T)
plot(fx, ylim=c(ymin,ymax), xlab=xlab, ylab=ylab, main=main)
polygon(c(fx$x,rev(fx$x)), c(fx$y,rep(0,length(fx$y))), col=grey(0.7))
# add lines for median and mean
if (meanOn) {
best_mean <- which.min(abs(fx$x-mean(x)))
lines(rep(fx$x[best_mean],2),c(0,fx$y[best_mean]),lty=2)
}
if (medianOn) {
best_median <- which.min(abs(fx$x-median(x)))
lines(rep(fx$x[best_median],2),c(0,fx$y[best_median]))
}
# add coloured quantiles to density plot
if (quantileOn) {
quantiles <- quantile(x,prob=c(0.025,0.975))
fx_left_x <- fx$x[fx$x<quantiles[1]]
fx_left_y <- fx$y[fx$x<quantiles[1]]
polygon(c(fx_left_x,rev(fx_left_x)), c(fx_left_y,rep(0,length(fx_left_y))), col=colors()[373])
fx_right_x <- fx$x[fx$x>quantiles[2]]
fx_right_y <- fx$y[fx$x>quantiles[2]]
polygon(c(fx_right_x,rev(fx_right_x)), c(fx_right_y,rep(0,length(fx_right_y))), col=colors()[373])
}
# add legend
if (boxOn) {
legendText <- NULL
legend_lty <- NULL
if (meanTextOn & meanOn) {
legendText <- c(legendText, paste(' mean=', signif(mean(x), boxSigFig), sep=''))
legend_lty <- c(legend_lty, 2)
}
if (medianTextOn & medianOn) {
legendText <- c(legendText, paste('median=', signif(median(x), boxSigFig), sep=''))
legend_lty <- c(legend_lty, 1)
}
if (quantileTextOn & quantileOn) {
legendText <- c(legendText, paste(' q0.025=', signif(quantiles[1], boxSigFig), sep=''))
legendText <- c(legendText, paste(' q0.975=', signif(quantiles[2], boxSigFig), sep=''))
legend_lty <- c(legend_lty, NA, NA)
}
legend('topright', legend=legendText, lty=legend_lty, seg.len=1, bg='#FFFFFF80', box.col='#00000050', cex=boxSize)
}
}
# -----------------------------------
#' bobRainbow
#'
#' 12 rainbow colours.
#'
#' @export
bobRainbow <- function(n=12) {
# key of hex colours against RGB values
# note - can use function rgb(red/255, green/255, blue/255) to convert RGB to hex
#ee2c2c = RGB{ 238,44,44 }
#64b4ff = RGB{ 100,180,255 }
#7ce42c = RGB{ 124,228,44 }
#ffdc00 = RGB{ 255,220,0 }
#aa46ff = RGB{ 170,70,255 }
#ff7f00 = RGB{ 255,127,0 }
#ff50aa = RGB{ 255,80,170 }
#2c962c = RGB{ 44,150,44 }
#1f78c8 = RGB{ 31,120,200 }
#be6e32 = RGB{ 190,110,50 }
#6b6b6b = RGB{ 107,107,107 }
#c8b4ff = RGB{ 200,180,255 }
# define basic colours
colours_raw <- c(
'#ee2c2c',
'#64b4ff',
'#7ce42c',
'#ffdc00',
'#aa46ff',
'#ff7f00',
'#ff50aa',
'#2c962c',
'#1f78c8',
'#be6e32',
'#6b6b6b',
'#c8b4ff'
)
# remap to make n colours
colours <- smoothCols(1:n, rawCols=colours_raw)
return(colours)
}
# -----------------------------------
#' bobNiceCols
#'
#' 6 colours going from red to blue. Previously called BobRedBlue
#'
#' @export
bobNiceCols <- function(n=6) {
# key of hex colours against RGB values
# note - can use function rgb(red/255, green/255, blue/255) to convert RGB to hex
#FB2D0A = RGB{ 251,45,10 }
#FED00B = RGB{ 254,208,11 }
#62D500 = RGB{ 98,213,0 }
#1F7C1A = RGB{ 31,124,26 }
#1B79FE = RGB{ 27,121,254 }
#00006D = RGB{ 0,0,109 }
# define basic colours
colours_raw <- c(
'#FB2D0A',
'#FED00B',
'#62D500',
'#1F7C1A',
'#1B79FE',
'#00006D'
)
# remap to make n colours
colours <- smoothCols(1:n, rawCols=colours_raw)
return(colours)
}
# -----------------------------------
#' bobMapCols
#'
#' 11 colours going from red to blue. Previously called bobRedBlue2.
#'
#' @export
bobMapCols <- function(n=11) {
# key of hex colours against RGB values
# note - can use function rgb(red/255, green/255, blue/255) to convert RGB to hex
#9E0142 = RGB{ 158,1,66 }
#D53E4F = RGB{ 213,62,79 }
#F46D43 = RGB{ 244,109,67 }
#FDAE61 = RGB{ 253,174,97 }
#FEE08B = RGB{ 254,224,139 }
#FFFFBF = RGB{ 255,255,191 }
#E6F598 = RGB{ 230,245,152 }
#ABDDA4 = RGB{ 171,221,164 }
#66C2A5 = RGB{ 102,194,165 }
#3288BD = RGB{ 50,136,189 }
#5E4FA2 = RGB{ 94,79,162 }
# define basic colours
colours_raw <- c(
'#9E0142',
'#D53E4F',
'#F46D43',
'#FDAE61',
'#FEE08B',
'#FFFFBF',
'#E6F598',
'#ABDDA4',
'#66C2A5',
'#3288BD',
'#5E4FA2'
)
# remap to make n colours
colours <- smoothCols(1:n, rawCols=colours_raw)
return(colours)
}
# -----------------------------------
#' colPlot
#'
#' Produce simple plot to visualise a color palette.
#'
#' @export
colPlot <- function(colVec, size=8) {
n <- length(colVec)
plot(1:n, rep(0,n), col=colVec, pch=20, cex=size, axes=FALSE, xlab='', ylab='')
text(1:n,rep(0,n),labels=1:n)
}
# -----------------------------------
#' colPlot
#'
#' Take vector of colours c in hexadecimal form and build in transparancy alpha.
#'
#' @export
transHex <- function(c, alpha) {
z <- t(col2rgb(c))/255
output <- mapply(function(x) {rgb(x[1],x[2],x[3],alpha=alpha)}, split(z,f=row(z)))
return(output)
}
# -----------------------------------
#' first
#'
#' Return first value in a vector, or alternatively trim off the first value in a vector.
#'
#' @export
first <- function(x, trim=FALSE) {
if (trim) {
return(x[-1])
} else {
return(x[1])
}
}
# -----------------------------------
#' last
#'
#' Return last value in a vector, or alternatively trim off the last value in a vector.
#'
#' @export
last <- function(x, trim=FALSE) {
if (trim) {
return(x[-length(x)])
} else {
return(x[length(x)])
}
}
# -----------------------------------
#' latexTable
#'
#' Convert data frame x to LaTeX format with set decimal places, and write output to file.
#'
#' @export
latexTable <- function(x, digits=3, file='~/Desktop/tester.txt') {
outVec <- rep(NA,nrow(x))
s <- paste("%.",digits,"f",sep='')
for (i in 1:nrow(x)) {
outVec[i] <- paste(paste(sprintf(s,x[i,]),collapse=' & '),'\\\\')
}
write.table(outVec, file=file, quote=FALSE, row.names=FALSE, col.names=FALSE)
}
# -----------------------------------
#' smoothCols
#'
#' Read in continuous values between xmin and xmax and return colours associated with these values, taken from a smoothly varying scale.
#'
#' @param x values from which to obtain colours.
#' @param xmin minimum range of x.
#' @param xmax maximum range of x.
#' @param res number of colours in palette.
#' @param rawCols colours that make up the palette. Leave as NULL to use default values, taken from tim.colors().
#'
#' @export
smoothCols <- function(x, xmin=min(x,na.rm=T), xmax=max(x,na.rm=T), res=1e3, rawCols=NULL) {
# load RColorBrewer if needed
loadPackage('RColorBrewer')
# get x between 0 and 1
x <- (x-xmin)/(xmax-xmin)
# define colours if needed
if (is.null(rawCols))
rawCols <- c('#00008F', '#00009F', '#0000AF', '#0000BF', '#0000CF', '#0000DF', '#0000EF', '#0000FF', '#0010FF', '#0020FF', '#0030FF', '#0040FF', '#0050FF', '#0060FF', '#0070FF', '#0080FF', '#008FFF', '#009FFF', '#00AFFF', '#00BFFF', '#00CFFF', '#00DFFF', '#00EFFF', '#00FFFF', '#10FFEF', '#20FFDF', '#30FFCF', '#40FFBF', '#50FFAF', '#60FF9F', '#70FF8F', '#80FF80', '#8FFF70', '#9FFF60', '#AFFF50', '#BFFF40', '#CFFF30', '#DFFF20', '#EFFF10', '#FFFF00', '#FFEF00', '#FFDF00', '#FFCF00', '#FFBF00', '#FFAF00', '#FF9F00', '#FF8F00', '#FF8000', '#FF7000', '#FF6000', '#FF5000', '#FF4000', '#FF3000', '#FF2000', '#FF1000', '#FF0000', '#EF0000', '#DF0000', '#CF0000', '#BF0000', '#AF0000', '#9F0000', '#8F0000', '#800000')
# make smooth colours
myPal <- colorRampPalette(rawCols)
cols <- myPal(res+1)[floor(x*res)+1]
return(cols)
}
# -----------------------------------
#' changeNames
#'
#' Change names of selected variables in a data frame. Note that for large data frames the raw code of this function should be copied into script rather than using this function, as there will be a memory cost in copying the data frame within the function.
#'
#' @export
changeNames <- function(df, oldNames=names(df), newNames) {
names(df)[names(df)%in%oldNames] <- newNames
return(df)
}
# -----------------------------------
#' reportConsole
#'
#' Use within a loop to print current iteration to console.
#'
#' @export
reportConsole <- function(i,imax,istep=1) {
if (i%%istep==0) {
s <- paste("iteration ",i," of ",imax,", (",round(i/imax*100),"%)\n",sep="")
cat(s)
flush.console()
}
}
# -----------------------------------
#' dot
#'
#' Prints one or more dots to screen, useful for tracking progress within nested loops. Follow by newLine() to add carriage return.
#'
#' @export
dot <- function(n=1) {
cat(paste(rep('.',n),collapse=''))
flush.console()
}
# -----------------------------------
#' newLine
#'
#' Prints one or more carriage returns.
#'
#' @export
newLine <- function(n=1) {
cat(paste(rep('\n',n),collapse=''))
flush.console()
}
# -----------------------------------
#' monthDays
#'
#' Return days in each month of chosen year.
#'
#' @export
monthDays <- function(year) {
date1 <- paste(year,"-01-01",sep="")
date2 <- paste(year+1,"-01-01",sep="")
output <- as.numeric(diff(seq(as.Date(date1),as.Date(date2),by="month")))
return(output)
}
# -----------------------------------
#' is.int
#'
#' Check that character string can be converted to integer without issue (for example, the value 3.4 would return FALSE).
#'
#' @export
is.int <- function (x) {
output <- rep(FALSE, length(x))
w <- which(!is.na(suppressWarnings(as.numeric(x))))
output[w] <- as.numeric(x[w])%%1 == 0
return(output)
}
# -----------------------------------
#' errorBars
#'
#' Produce error bars at given positions. When \code{se=FALSE}, input should be \code{y1}=lower limit and \code{y2}=upper limit. When \code{se=TRUE}, input should be \code{y1}=mean and \code{y2}=standard error, in which case error bars represent two standard errors either side of the mean. Can handle vector inputs as well as scalars.
#'
#' @param y1 under first method (when \code{se=FALSE}) this value is the lower limit of the error bar. Under the second method (when \code{se=TRUE}) this value is the mean of the distribution that will be used to produce error bars (mean +- 1.96 standard deviations).
#' @param y2 as above, either the upper limit of the error bar, or the standard deviation of the distribution that will be used to produce error bars.
#' @param x horizontal position of error bars.
#' @param se whether to use values \code{y1} and \code{y2} as mean and standard deviation.
#' @param width length of error bar handles.
#' @param col colour of error bars.
#' @param lty line type of error bars.
#' @param lwd line width of error bars.
#'
#' @export
errorBars <- function(y1, y2, x=1:length(y1), se=FALSE, width=1, col=1, lty=1, lwd=1) {
# calculate limits based on method
if (se) {
LL <- y1-2*y2
UL <- y1+2*y2
} else {
LL <- y1
UL <- y2
}
segments(x,LL,x,UL,lty=lty,lwd=lwd,col=col)
segments(x-width/2,LL,x+width/2,LL,lty=lty,lwd=lwd,col=col)
segments(x-width/2,UL,x+width/2,UL,lty=lty,lwd=lwd,col=col)
}
# -----------------------------------
#' incrementBins
#'
#' Increment vector x under binMax constraint. Return NULL at final increment.
#'
#' @export
incrementBins <- function(x,binMax=c(1,4,3,2)) {
if (length(x)!=length(binMax))
stop("x and binMax of different lengths")
if (any(x>binMax))
stop("some x greater than binMax")
if (all(x==binMax))
return()
for (i in length(x):1) {
x[i] <- x[i]+1
if (x[i]<=binMax[i]) {
break
} else {
x[i] <- 1
}
}
return(x)
}
# -----------------------------------
#' removeCols
#'
#' Remove named columns from data frame.
#'
#' @export
removeCols <- function(df, nameVec) {
output <- subset(df,select=setdiff(names(df),nameVec))
return(output)
}
# -----------------------------------
#' matrixSmooth
#'
#' Interpolate rows and columns of a matrix alternately using spline. reps is the number of times to double the matrix size.
#'
#' @export
matrixSmooth <- function(M, reps=1) {
for (i in 1:reps) {
M2 <- cbind(M,M)
for (i in 1:nrow(M)) {
M2[i,] <- spline(M[i,],n=2*length(M[i,]))$y
}
M <- rbind(M2,M2)
for (i in 1:ncol(M2)) {
M[,i] <- spline(M2[,i],n=2*length(M2[,i]))$y
}
}
return(M)
}
# -----------------------------------
#' minSpanTree
#'
#' Use Prim's algorithm to calculate minimum spanning tree. The 'data' argument must be a matrix or data frame with observations in rows and dimensions in columns. Distances are calculated as Euclidean distance over all the dimensions of the data. Returns multiple objects; 1) a data frame of nodePairs giving all pairwise links that make up the tree in the order that they were added, 2) an edgeList giving the nodes attached to each node, 3) edgeNum giving the number of edges associated with each node.
#' \cr\cr
#' Although this function may not be as fast as some alternatives (not tested), it has the advantage that distances are only calculated as needed, meaning there is no need to read in a huge matrix of pairwise distances.
#'
#' @export
minSpanTree <- function(data) {
# extract basic properties of data
data <- as.matrix(data)
n <- nrow(data)
dims <- ncol(data)
# initialise objects for storing results
nodePairs <- data.frame(node1=rep(0,n-1),node2=0)
edgeList <- replicate(n,NULL)
edgeNum <- rep(0,n)
# initialise algorithm by measuring all distances relative to first point
point1 <- data[1,]
data <- data[-1,]
d_running <- 0
for (j in 1:dims) {
d_running <- d_running + (data[,j]-point1[j])^2
}
# given a point A and a cluster B, the distance between A and B is equal to the *minimum* distance between A and any point in B. The vector bestNode stores *which* node in B the point A links to
bestNode <- rep(1,n-1)
# during the algorithm the data object is trimmed, and so the row index no longer stores the unique ID of the data point. The vector pos fixes this by maintaining a record of unique IDs.
pos <- 2:n
# run Prims algorithm
for (i in 1:(n-1)) {
# node2 is the node with the minimum distance to the current tree. node1 is the node within the tree that node2 links to
nodeIndex <- which.min(d_running)
node1 <- bestNode[nodeIndex]
node2 <- pos[nodeIndex]
# store nodes and edges
nodePairs$node1[i] <- node1
nodePairs$node2[i] <- node2
edgeList[[node1]] <- c(edgeList[[node1]], node2)
edgeList[[node2]] <- c(edgeList[[node2]], node1)
edgeNum[node1] <- edgeNum[node1]+1
edgeNum[node2] <- edgeNum[node2]+1
# calculate new distance of all points from node2
d_new <- 0
for (j in 1:dims) {
d_new <- d_new + (data[,j]-data[nodeIndex,j])^2
}
# recalculate the *minimum* distance between the current tree and all nodes, and if the new distance is shorter then reflect that in bestNode
bestNode[d_running>d_new] <- node2
d_running <- (d_running<=d_new)*d_running + (d_running>d_new)*d_new
# drop new node from data set and all other objects
data <- data[-nodeIndex,,drop=FALSE]
d_running <- d_running[-nodeIndex]
pos <- pos[-nodeIndex]
bestNode <- bestNode[-nodeIndex]
}
# return multiple objects
return(list('nodePairs'=nodePairs, 'edgeList'=edgeList, 'edgeNum'=edgeNum))
}
# -----------------------------------
#' rDPM
#'
#' Draw from a two-dimensional Dirichlet process mixture model (no correlation between dimensions). Return multiple outputs, including true group allocation. For a more complex model including correlations between dimensions see the \code{rMultiMix()} function.
#'
#' @export
rDPM <- function(n, sigma=1, priorMean_x=0, priorMean_y=0, priorSD=10, alpha=1) {
# draw grouping
c <- rCRP(n,alpha)
group <- sort(c$group)
# draw means and data
mu_x <- rnorm(length(freqs),priorMean_x,priorSD)
mu_y <- rnorm(length(freqs),priorMean_y,priorSD)
x <- rnorm(n,mu_x[group],sigma)
y <- rnorm(n,mu_y[group],sigma)
# return results
return(list(x=x, y=y, group=group, mu_x=mu_x, mu_y=mu_y))
}
# -----------------------------------
#' pieCharts
#'
#' Adds pie charts to an existing plot. Proportions should be a list of vectors, that do not need to sum to one.
#'
#' @export
pieCharts <- function(x, y, proportions, radius=0.2, lwd=1, border_col=1, seg_col=bobRainbow(), smoothness=50, x_stretch=1) {
theta <- seq(0,2*pi,l=smoothness)
for (i in 1:length(x)) {
segs <- length(proportions[[i]])
z <- c(0, cumsum(proportions[[i]]/sum(proportions[[i]])))
if (segs==1) {
polygon(x[i]+x_stretch*radius*cos(theta), y[i]+radius*sin(theta), lwd=lwd, border=border_col, col=seg_col[1])
} else {
for (j in 2:(segs+1)) {
theta_sub <- c(2*pi*z[j-1], theta[theta>(2*pi*z[j-1]) & theta<(2*pi*z[j])], 2*pi*z[j])
polygon(c(x[i],x[i]+x_stretch*radius*sin(theta_sub),x[i]), c(y[i],y[i]+radius*cos(theta_sub),y[i]), lwd=lwd, border=border_col, col=seg_col[j-1])
}
}
}
}
# -----------------------------------
#' win
#'
#' Shortcut for running \code{par(mfrow=c(rows,cols))}, which takes slightly longer to type!
#'
#' @export
win = function(rows=1, cols=1) {
par(mfrow=c(rows,cols))
}
# -----------------------------------
#' animateOn
#'
#' First of two functions needed to create mpg from static images. Run this function, then run the code needed to create a sequence of plots, then run \code{animateOff()}. Individual plots will be saved as jpg with a four-character number on the end (for example myFile0012.jpg), and can either be saved in the local directory or in a temporary directory. The \code{animateOff()} function should have the same shared arguments as \code{animateOn()} (for example the same fileName).
#' \cr\cr
#' Note that this function requires ImageMagick to be installed, and has only been tested on Windows. If running for the first time you will likely need to add ImageMagick to the path using something like the following:
#' \code{Sys.setenv(PATH=paste(Sys.getenv("PATH"),"C:\\Program Files\\ImageMagick-6.9.3-Q16",sep=";"))}
#'
#' @param active allows function to be selectively de-activated using a variable.
#' @param fileName the filename of input files (without extension or number). The same name is used for the final video file.
#' @param saveFrames if FALSE then images are saved to a temporary location before being deleted in \code{animateOff()}.
#' @param width width of video.
#' @param height height of video.
#' @param units units of jpg files that make up video (see ?jpeg).
#' @param pointsize pointsize of jpg files that make up video (see ?jpg).
#' @param quality quality of jpg files that make up video (see ?jpg).
#' @param bg background of jpg files that make up video (see ?jpg).
#' @param res resolution of jpg files that make up video (see ?jpg).
#'
#' @export
animateOn <- function(active=FALSE, fileName, saveFrames=FALSE, width=480, height=480, units="px", pointsize=12, quality=75, bg="white", res=NA) {
# exit if not active
if (!active)
return()
# set file path locally or in temp file
filePath <- paste(fileName,"%04d.jpeg",sep="")
if (!saveFrames)
filePath <- paste(tempdir(),"/",filePath,sep="")
# open jpg filestream
jpeg(filePath, width=width, height=height, units=units, pointsize=pointsize, quality=quality, bg=bg, res=res)
}
# -----------------------------------
#' animateOff
#'
#' Second of two functions needed to create mpg from static images. This function should be run after running \code{animateOn()} and creating a series of plots. The \code{animateOff()} function should have the same shared arguments as \code{animateOn()}.
#'
#' @export
animateOff <- function(active=FALSE, fileName, saveFrames=FALSE, frameRate=25) {
# exit if not active
if (!active)
return()
# close file stream
dev.off()
# convert images to mpg
filePath <- paste(fileName,"%04d.jpeg",sep="")
if (!saveFrames)
filePath <- paste(tempdir(),"/",filePath,sep="")
myCommand <- paste("ffmpeg -y -r ",frameRate," -i ",filePath," ",fileName,".mp4",sep="")
shell(myCommand,intern=TRUE)
}
# -----------------------------------
#' coordText
#'
#' Defines a square region from 0-1 in both x and y and inserts text at designated location. Useful for e.g. adding panel labels to figures.
#'
#' @export
coordText <- function(x=0.05, y=0.95, text='A)', cex=1.5) {
pars <- par(new=T,xpd=T,mar=c(0,0,0,0))
plot(0, type='n', axes=F, xlab=NA, ylab=NA, xlim=c(0,1), ylim=c(0,1))
text(x,y,text,cex=cex)
par(pars)
}
# -----------------------------------
#' fastRead
#'
#' Shortcut function for reading in data quickly using the data.table package.
#'
#' @export
fastRead <- function(fileName, header='auto') {
loadPackage('data.table')
data <- as.data.frame(fread(fileName, header=header))
return(data)
}
# -----------------------------------
#' multiPanel
#'
#' This function is not really a proper function (it is not intended to return anything). Rather, it is a place to store this useful bit of code that can adapted as needed. Copy this code over, and then drop whatever individual plots are needed into the inner loop.
#'
#' @export
multiPanel <- function() {
# setup multi-panel plotting parameters
plot_rows <- 2
plot_cols <- 2
outer_margin <- c(4,4,2,1)
tickLength <- -0.5
x_range <- c(-5,5)
y_range <- c(0,300)
x_axis_at <- seq(-4,4,2)
y_axis_at <- seq(0,250,50)
sub_main <- paste('submain',1:(plot_rows*plot_cols))
x_lab <- 'x axis'
y_lab <- 'y axis'
x_cex <- 0.8
y_cex <- 0.8
# create multi-panel plot
par_store <- par(mfrow=c(plot_rows, plot_cols), mar=c(0,0,0,0), oma=outer_margin, tcl=tickLength)
index <- 0
for (i in 1:plot_rows) {
for (j in 1:plot_cols) {
index <- index+1
# put individual plots here
hist(rnorm(1e3), xlim=x_range, ylim=y_range, ann=FALSE, axes=FALSE)
title(sub_main[index],line=-1)
# add axes and box
if (i==plot_rows)
axis(1, at=x_axis_at)
if (j==1)
axis(2, at=y_axis_at)
box()
}
}
mtext(x_lab, side=1, outer=TRUE, cex=x_cex, line=2.2)
mtext(y_lab, side=2, outer=TRUE, cex=y_cex, line=2.2)
par(par_store)
}
# -----------------------------------
#' imageFix
#'
#' Produces an image plot, but has option for changing orientation. Values of \code{orientation} from 1 to 4 rotate the image clockwise.
#'
#' @export
imageFix <- function(z, x=1:nrow(z), y=1:ncol(z), orientation=1, ...) {
if (orientation==1) {
image(x, y, z, ...)
} else if (orientation==2) {
image(y, x, t(z[nrow(z):1,]), ...)
} else if (orientation==3) {
image(x, y, z[nrow(z):1,ncol(z):1], ...)
} else if (orientation==4) {
image(y, x, t(z[,ncol(z):1]), ...)
}
}
# -----------------------------------
#' filledContour2
#'
#' Produces pretty alternative to ordinary filled contour.
#'
#' @export
filledContour2 <- function(z, x=NULL, y=NULL, l=11, col=bobMapCols(), orientation=1, zmin=min(z,na.rm=TRUE), zmax=max(z,na.rm=TRUE), main=NA, xlab="x", ylab="y", xlab_line=3, ylab_line=3) {
# rotate z as needed
if (orientation==2) {
z <- t(z[nrow(z):1,])
} else if (orientation==3) {
z <- z[nrow(z):1,ncol(z):1]
} else if (orientation==4) {
z <- t(z[,ncol(z):1])
}
# set x and y based on matrix dimensions
if (is.null(x))
x <- 1:nrow(z)
if (is.null(y))
y <- 1:ncol(z)
# produce plot
myLevels <- seq(zmin, zmax, l=l+1)
myCols <- smoothCols(1:l,rawCols=col)
parStore <- par(mar=c(1+xlab_line, 1+ylab_line, 3, 1))
image(x, y, z, zlim=c(zmin, zmax), col=myCols, xlab=NA, ylab=NA, main=main)
title(xlab=xlab, line=xlab_line)
title(ylab=ylab, line=ylab_line)
contour(x, y, z, levels=myLevels, drawlabels=FALSE, add=TRUE)
par(parStore)
}
# -----------------------------------
#' vec2mat
#'
#' Produce matrix from two vectors. dim=1 returns the x-matrix, dim=2 the y-matrix.
#'
#' @export
vec2mat <- function(x, y, dim) {
if (dim==1) {
output <- matrix(rep(x,each=length(y)),length(y))
} else {
output <- matrix(rep(y,length(x)),length(y))
}
return(output)
}
# -----------------------------------
#' bin2D
#'
#' Read in x and y data, along with vectors of breaks. Output 2D bin counts and vectors of midpoints.
#'
#' @export
bin2D <- function(x, y, x_breaks, y_breaks) {
# check that inputs are the same length
stopifnot(length(x)==length(y))
# bin data in both x and y
freq <- as.data.frame(table(findInterval(x,x_breaks),findInterval(y,y_breaks)))
freq[,1] <- as.numeric(as.character(freq[,1]))
freq[,2] <- as.numeric(as.character(freq[,2]))
# get rid of bins outside of range
freq <- freq[freq[,1]>0 & freq[,1]<length(x_breaks) & freq[,2]>0 & freq[,2]<length(y_breaks),]
# fill in 2D matrix
freq2D <- matrix(0,length(y_breaks)-1,length(x_breaks)-1)
freq2D[cbind(freq[,2],freq[,1])] <- freq[,3]
# calculate midpoints
x_mids <- (x_breaks[-1]+x_breaks[-length(x_breaks)])/2
y_mids <- (y_breaks[-1]+y_breaks[-length(y_breaks)])/2
# output all as list
output <- list(x_mids= x_mids,y_mids= y_mids,z=freq2D)
return(output)
}
# -----------------------------------
#' safeDivide
#'
#' Divide two numbers, but return 0 rather than NaN if both numerator and denominator are zero.
#'
#' @export
safeDivide <- function(a,b) {
output <- a/b
output[a==0 & b==0] <- 0
return(output)
}
# -----------------------------------
#' exit
#'
#' Force-exits R.
#'
#' @export
exit <- function() {
exit_cpp()
}
# -----------------------------------
#' ribbon
#'
#' Adds ribbon to plot. Set upper and lower limits of ribbon, or middle values and thickness.
#'
#' @param y1 lower limit of ribbon, or alternatively middle value if upperLower=FALSE
#' @param y2 upper limit of ribbon, or alternatively thickness if upperLower=FALSE
#' @param x x-values
#' @param upperLower whether y-values represent upper and lower values of the ribbon
#' @param density density of shading (leave NA for no shading)
#' @param border colour of border (leave NA for no border)
#' @param col colour of ribbon, or colour of shading lines if density>0
#'
#' @export
ribbon <- function(y1, y2, x=1:length(y1), upperLower=TRUE, density=NA, border=NA, col='#FF000020') {
# choose limits based on method choice
if (upperLower) {
y_lower <- y1
y_upper <- y2
} else {
y_lower <- y1-y2/2
y_upper <- y1+y2/2
}
# build poly coordinates
poly_x <- c(x, rev(x))
poly_y <- c(y_lower, rev(y_upper))
# add polygon to plot
polygon(poly_x, poly_y, density=NA, border=NA, col=col)
}
# -----------------------------------
#' bobSpectrum
#'
#' 20 colours going from pink to blue through green. Previously called bobPinkBlue.
#'
#' @export
bobSpectrum <- function(n=20) {
# define colours
colours_raw <- c(
"#C87FDB",
"#CE7FC8",
"#D37FB4",
"#D87FA0",
"#DE7F8D",
"#E4857F",
"#E8A080",
"#EDBA81",
"#F3D583",
"#FAF085",
"#F3FA86",
"#DFF485",
"#CAED85",
"#B7E684",
"#A5E084",
"#9ACF95",
"#93BBAD",
"#8BA6C7",
"#8492E0",
"#7F7FFB"
)
# remap to make n colours
colours <- smoothCols(1:n, rawCols=colours_raw)
return(colours)
}
# -----------------------------------
#' breakCoverage
#'
#' Feed in a vector of breaks x, and min and max values of a range that falls within x. Output the proportion of each slice that is covered by the range. Useful for calculating things like prevalence in a given age range.
#'
#' @param breaks vector of breaks
#' @param range_min minimum value of range of interest
#' @param range_max maximum value of range of interest
#'
#' @export
breakCoverage <- function(breaks, range_min, range_max) {
# get lower and upper breaks
breaks0 <- breaks[-length(breaks)]
breaks1 <- breaks[-1]
# get total proportion of each break covered
ret <- punif(range_min, breaks0, breaks1, lower.tail=F) - punif(range_max, breaks0, breaks1, lower.tail=F)
return(ret)
}
# -----------------------------------
#' get list depth
#'
#' Get list depth up to 5 levels of nesting. Non-lists return a value 0.
#'
#' @param x list or nested list to assess depth
#'
#' @export
getListDepth <- function(x) {
x_depth <- 0
x_sub <- x
if (is.list(x_sub) & length(x_sub)>0) {
x_depth <- x_depth+1
x_sub <- x_sub[[1]]
if (is.list(x_sub) & length(x_sub)>0) {
x_depth <- x_depth+1
x_sub <- x_sub[[1]]
if (is.list(x_sub) & length(x_sub)>0) {
x_depth <- x_depth+1
x_sub <- x_sub[[1]]
if (is.list(x_sub) & length(x_sub)>0) {
x_depth <- x_depth+1
x_sub <- x_sub[[1]]
if (is.list(x_sub) & length(x_sub)>0) {
x_depth <- x_depth+1
}
}
}
}
}
return(x_depth)
}
# -----------------------------------
#' sample always from vector
#'
#' Carries out sample(), but always assumes input is vector.
#'
#' @param x vector from which to sample
#'
#' @export
sampleVec <- function(x, ...) {
x[sample.int(length(x), ...)]
}
# -----------------------------------
#' great circle distance
#'
#' Calculates great circle distance (km) between an origin and one or more destination points.
#'
#' @param origin_lat scalar latitude of origin in degrees
#' @param origin_lon scalar longitude of origin in degrees
#' @param origin_lat vector latitude of destination in degrees
#' @param origin_lon vector longitude of destination in degrees
#'
#' @export
gcDist <- function(origin_lat, origin_lon, dest_lat, dest_lon) {
# convert input arguments to radians
origin_lat <- origin_lat*2*pi/360
dest_lat <- dest_lat*2*pi/360
origin_lon <- origin_lon*2*pi/360
dest_lon <- dest_lon*2*pi/360
# calculate great circle distance
delta_lon <- dest_lon-origin_lon
tmp <- sin(origin_lat)*sin(dest_lat) + cos(origin_lat)*cos(dest_lat)*cos(delta_lon)
tmp[tmp>1] <- 1
gc_angle <- acos(tmp)
earthRad <- 6371
gc_dist <- earthRad*gc_angle
return(gc_dist)
}
# -----------------------------------
#' change margins
#'
#' Changes margins to one of a number of defaults
#'
#' @param level choose from a number of settings: 1) default margins c(5.1,4.1,4.1,2.1), 2) narrow margins c(1,3,3,1).
#'
#' @export
setMargin <- function(level) {
x <- switch(level,
c(5.1,4.1,4.1,2.1),
c(1,3,3,1)
)
par(mar=x)
}
# -----------------------------------
#' bobFireIce
#'
#' 7 colours going from red to blue.
#'
#' @export
bobFireIce <- function(n=7) {
# key of hex colours against RGB values
# note - can use function rgb(red/255, green/255, blue/255) to convert RGB to hex
#CD0000 = RGB{ 205, 0, 0 }
#FF8C00 = RGB{ 255, 140, 0 }
#FFD700 = RGB{ 255, 215, 0 }
#FFEC8B = RGB{ 255, 236, 139 }
#BFEFFF = RGB{ 191, 239, 255 }
#5CACEE = RGB{ 92, 172, 238 }
#436EEE = RGB{ 67, 110, 238 }
# define basic colours
colours_raw <- c(
'#CD0000',
'#FF8C00',
'#FFD700',
'#FFEC8B',
'#BFEFFF',
'#5CACEE',
'#436EEE'
)
# remap to make n colours
colours <- smoothCols(1:n, rawCols=colours_raw)
return(colours)
}
# -----------------------------------
#' dummy1
#'
#' Example call Rcpp function
#'
#' @export
dummy1 <- function() {
cat("R function working\n")
dummy1_cpp()
}
# -----------------------------------
#' faster
#'
#' Custom version of raster plot, that is lightweight and doesn't suffer from problems relating to layout. Ordinary raster produces strange results when used as part of more complex layouts due to its method of setting and re-setting par(). This is avoided here by passing in an optional layout matrix that corresponds to the current layout.
#'
#' @export
faster <- function (..., col=NULL, breaks=NULL, nlevel=64, layout_mat=NULL, horizontal=FALSE, legend.shrink=0.9, legend.width=1.2, legend.mar=ifelse(horizontal, 3.1, 5.1), legend.lab=NULL, legend.line=2, bigplot=NULL, smallplot=NULL, lab.breaks=NULL, axis.args=NULL, legend.args=NULL, legend.cex=1) {
# set defaults
if (!is.null(breaks)) {
nlevel <- length(breaks)-1
}
if (is.null(col)) {
col <- bobFireIce(nlevel)
} else {
nlevel <- length(col)
}
if (is.null(legend.mar)) {
legend.mar <- ifelse(horizontal, 3.1, 5.1)
}
old.par <- NULL
# get layout matrix assuming simple increasing grid of values
mfrow <- par()$mfrow
layout_simple <- matrix(1:(mfrow[1]*mfrow[2]), mfrow[1], mfrow[2], byrow=TRUE)
# set layout to simple grid if not specified
if (is.null(layout_mat)) {
layout_mat <- layout_simple
}
# set breaks evenly over data range if not specified
nlevel <- length(col)
info <- fields::imagePlotInfo(..., breaks = breaks, nlevel = nlevel)
breaks <- info$breaks
# get plotting limits
temp <- fields::imageplot.setup(add = FALSE, legend.shrink = legend.shrink,
legend.width = legend.width, legend.mar = legend.mar,
horizontal = horizontal, bigplot = bigplot, smallplot = smallplot)
smallplot <- temp$smallplot
bigplot <- temp$bigplot
# check legend will fit
if ((smallplot[2] < smallplot[1]) | (smallplot[4] < smallplot[3])) {
par(old.par)
stop("plot region too small to add legend\n")
}
# make colour scale
ix <- 1:2
iy <- breaks
nBreaks <- length(breaks)
midpoints <- (breaks[1:(nBreaks - 1)] + breaks[2:nBreaks])/2
iz <- matrix(midpoints, nrow = 1, ncol = length(midpoints))
# add colour scale
par(old.par)
old.par <- par(pty = "m", plt = smallplot, err = -1)
if (!horizontal) {
image(ix, iy, iz, xaxt = "n", yaxt = "n", xlab = "",
ylab = "", col = col, breaks = breaks)
}
else {
image(iy, ix, t(iz), xaxt = "n", yaxt = "n", xlab = "",
ylab = "", col = col, breaks = breaks)
}
# add numbers and box to scale
if (!is.null(lab.breaks)) {
axis.args <- c(list(side = ifelse(horizontal, 1, 4),
mgp = c(3, 1, 0), las = ifelse(horizontal, 0, 2),
at = breaks, labels = lab.breaks), axis.args)
}
else {
axis.args <- c(list(side = ifelse(horizontal, 1, 4),
mgp = c(3, 1, 0), las = ifelse(horizontal, 0, 2)),
axis.args)
}
do.call("axis", axis.args)
box()
# add legend to scale
if (!is.null(legend.lab)) {
legend.args <- list(text = legend.lab, side = ifelse(horizontal,
1, 4), line = legend.line, cex = legend.cex)
}
if (!is.null(legend.args)) {
do.call(mtext, legend.args)
}
# main image plot
old.par <- par(new = TRUE, plt = bigplot)
image(..., breaks = breaks, add = FALSE, col = col)
# get position of current and next plot
mfg <- par()$mfg
thisPlot <- layout_mat[mfg[1], mfg[2]]
if (max(layout_simple)>1) {
nextPlot <- which(layout_simple==(thisPlot+1), arr.ind=TRUE)[1,]
}
# switch back to old pars
par(old.par)
# set position of next plot
if (max(layout_simple)>1) {
par(mfg=c(nextPlot[1], nextPlot[2], nrow(layout_mat), ncol(layout_mat)))
}
par(new=FALSE)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.