# R/kdemode.R In trnnick/TStools: Time Series Analysis Tools and Functions

#### Documented in kdemode

```kdemode <- function(data,type=c("diffusion","SJ","nrd0"),
weights=NULL,from=NULL,to=NULL,outplot=c(FALSE,TRUE),...){
# Return mode of a vector as calculated using KDE
#
# Inputs:
#   data      One-dimensional vector of data
#   type      Bandwidth selection:
#               - diffusion: Kernel Density Estimation Via Diffusion, Botev el al. 2010
#               - SJ: Sheater and Jones method
#               - nrd0: Silverman heuristic
#   weights   Numeric vector of non-negative observation weights, of the same length as data.
#   from      Lower bound of values for KDE estimation. By default this is min(data)-0.1*diff(range(data)).
#   to        Upper bound of values for KDE estimation. By default this is max(data)+0.1*diff(range(data)).
#   outplot   If TRUE provides plot of the KDE and the mean, median and mode
#   ...       Additional arguments can be passed to the plot.
#
# Outputs:
#   mode      Estimated mode value.
#
# Example:
#   data <- rnorm(200,mean=0,sd=1)
#   kdemode(data,outplot=TRUE)
#
# Notes:
#   For a discussion of the selection between mean, median and mode
#   for the combination of forecasts see:
#   Kourentzes, N., Barrow, D. K., & Crone, S. F. (2014).
#   Neural network ensemble operators for time series forecasting.
#   Expert Systems with Applications, Volume 41, Issue 9, Pages 4235-4244
#
#   Diffusion method by Z. I. Botev, J. F. Grotowski and D. P. Kroese
#   "Kernel Density Estimation Via Diffusion"
#   Annals of Statistics, 2010, Volume 38, Number 5, Pages 2916-2957
#   Code by Z. I. Botev: http://web.maths.unsw.edu.au/~zdravkobotev/
#
# Nikolaos Kourentzes, 2014 <[email protected]>
# Updated 2016

# Defaults
type <- type[1]
outplot <- outplot[1]

# Fix from/to
if (is.null(from)){
from <- min(data)-0.1*diff(range(data))
}
if (is.null(to)){
to <- max(data)+0.1*diff(range(data))
}

# Calculate KDE
if (type != "diffusion"){
ks <- density(data,bw="SJ",n=512,from=from,to=to,weights=weights)
x <- ks\$x
f <- ks\$y
h <- ks\$bw
} else {
if (!is.null(weights)){stop("Weighted KDE not implemented with diffusion estimation for bandwidth.")}
ks <- kde(data, n=512,MIN=from,MAX=to)
x <- ks\$out[1,]
f <- ks\$out[2,]
h <- ks\$bandwidth
}

# Find mode
mo <- x[which(f==max(f))] # mode

# Produce plot
if (outplot == TRUE){

# Allow user to override plot defaults
args <- list(...)
if (!("xlim" %in% names(args))){
args\$xlim <- c(min(x),max(x))
}
if (!("ylim" %in% names(args))){
args\$ylim <- c(0,max(f))
}
# Remaining defaults
args\$x <- args\$y <- NA

# Use do.call to use manipulated ellipsis (...)
do.call(plot,args)

md <- median(data)
mn <- mean(data)
plot(x,f,type="l")
lines(x,f)
lines(data,rep(0,length(data)),type="p",pch="*")
lines(mn,f[which(abs(x - mn) == min(abs(x - mn)))],col="black",bg="red",type="p",pch=21,cex=1.5)
lines(md,f[which(abs(x - md) == min(abs(x - md)))],col="black",bg="yellow",type="p",pch=21,cex=1.5)
lines(mo,f[which(f==max(f))],col="black",bg="green",type="p",pch=21,cex=1.5)
legend("topleft",c("Mean","Median","Mode"),cex=1,
col=c("red","yellow","green"), pch=19, bty="n")
}

return(list(mode=mo,xd=x,fd=f,h=h))

}

# -------------------------------------
# Code by Z. I. Botev: http://web.maths.unsw.edu.au/~zdravkobotev/
kde <- function(data,n,MIN,MAX){
#
#       State-of-the-art gaussian kernel density estimator for one-dimensional data;
#       The estimator does not use the commonly employed 'gaussian rule of thumb'.
#       As a result it outperforms many plug-in methods on multimodal densities
#       with widely separated modes (see example).
# INPUTS:
#     data    - a vector of data from which the density estimate is constructed;
#          n  - the number of mesh points used in the uniform discretization of the
#               interval [MIN, MAX]; n has to be a power of two; if n is not a power of two, then
#               n is rounded up to the next power of two; the default value of n is n=2^12;
#   MIN, MAX  - defines the interval [MIN,MAX] on which the density estimate is constructed;
#               the default values of MIN and MAX are:
#               MIN=min(data)-Range/10 and MAX=max(data)+Range/10, where Range=max(data)-min(data);
# OUTPUT:
#       matrix 'out' of with two rows of length 'n', where out[2,]
#       are the density values on the mesh out[1,];
# EXAMPLE:
##Save this file in your directory as kde.R and copy and paste the commands:
# rm(list=ls())
# source(file='kde.r')
# data=c(rnorm(10^3),rnorm(10^3)*2+30);
# d=kde(data)
# plot(d[1,],d[2,],type='l',xlab='x',ylab='density f(x)')

# REFERENCE:
# Z. I. Botev, J. F. Grotowski and D. P. Kroese
# "Kernel Density Estimation Via Diffusion"
# Annals of Statistics, 2010, Volume 38, Number 5, Pages 2916-2957
# for questions email: [email protected]

nargin=length(as.list(match.call()))-1;
if (nargin<2) n=2^14
n=2^ceiling(log2(n)); # round up n to the next power of 2;
if (nargin<4)
{# define the default  interval [MIN,MAX]
minimum=min(data); maximum=max(data);
Range=maximum-minimum;
MIN=minimum-Range/10; MAX=maximum+Range/10;
}
# set up the grid over which the density estimate is computed;
R=MAX-MIN; dx=R/n; xmesh=MIN+seq(0,R,dx); N=length(data);
# if data has repeated observations use the N below
# N=length(as.numeric(names(table(data))));
# bin the data uniformly using the grid defined above;
w=hist(data,xmesh,plot=FALSE);initial_data=(w\$counts)/N;
initial_data=initial_data/sum(initial_data);

a=dct1d(initial_data); # discrete cosine transform of initial data
# now compute the optimal bandwidth^2 using the referenced method
I=(1:(n-1))^2; a2=(a[2:n]/2)^2;
# use  fzero to solve the equation t=zeta*gamma^[5](t)

t_star=tryCatch(uniroot(fixed_point,c(0,.1),N=N,I=I,a2=a2,tol=10^(-14))\$root,error=function(e) .28*N^(-2/5));
# smooth the discrete cosine transform of initial data using t_star
a_t=a*exp(-(0:(n-1))^2*pi^2*t_star/2);
# now apply the inverse discrete cosine transform

density=idct1d(a_t)/R;
# take the rescaling of the data into account
bandwidth=sqrt(t_star)*R;
xmesh=seq(MIN,MAX,R/(n-1));
out=matrix(c(xmesh,density),nrow=2,byrow=TRUE);

return(list(out=out,bandwidth=bandwidth))

}

dct1d <- function(data){
# computes the discrete cosine transform of the column vector data
n= length(data);
# Compute weights to multiply DFT coefficients
weight = c(1,2*exp(-1i*(1:(n-1))*pi/(2*n)));
# Re-order the elements of the columns of x
data = c(data[seq(1,n-1,2)], data[seq(n,2,-2)]);
# Multiply FFT by weights:
data= Re(weight* fft(data));
data}

fixed_point <-  function(t,N,I,a2){
# this implements the function t-zeta*gamma^[l](t)
l=7;
f=2*(pi^(2*l))*sum((I^l)*a2*exp(-I*(pi^2)*t));
for (s in (l-1):2){

K0=prod(seq(1,2*s-1,2))/sqrt(2*pi);  const=(1+(1/2)^(s+1/2))/3;
time=(2*const*K0/N/f)^(2/(3+2*s));
f=2*pi^(2*s)*sum(I^s*a2*exp(-I*pi^2*time));
}
out=t-(2*N*sqrt(pi)*f)^(-2/5);
}

idct1d <-  function(data){
# computes the inverse discrete cosine transform
n=length(data);
# Compute weights
weights = n*exp(1i*(0:(n-1))*pi/(2*n));
# Compute x tilde using equation (5.93) in Jain
data = Re(fft(weights*data,inverse=TRUE))/n;
# Re-order elements of each column according to equations (5.93) and
# (5.94) in Jain
out = rep(0,n);
out[seq(1,n,2)] = data[1:(n/2)];
out[seq(2,n,2)] = data[n:(n/2+1)];
out;
}
```
trnnick/TStools documentation built on Aug. 12, 2018, 4:31 a.m.