R/tsal-test.R
In tsallisqexp: Tsallis q-Exp Distribution

Documented in test.tsal.LR.distribution test.tsal.quantile.transform

# R functions for use with the q-exponential distribution of Tsallis

# Copyright (c) 2007, Cosma Shalizi, cshalizi@cmu.edu or cosma.shalizi@gmail.com

#####  This is free software; you can redistribute it and/or modify
#####  it under the terms of the GNU General Public License as published by
#####  the Free Software Foundation; either version 2 of the License, or
#####  (at your option) any later version.
#####
#####  This software is distributed in the hope that it will be useful,
#####  but WITHOUT ANY WARRANTY; without even the implied warranty of
#####  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#####  GNU General Public License for more details.
#####
#####  You should have received a copy of the GNU General Public License
#####  along with this software; if not, write to the Free Software
#####  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA

# Please contact me at the above e-mail addresses for bug reports, questions,
# etc.  Please also contact me if you use this code in a scientific paper!

# This is a special case of the generalized Pareto distribution (type II), but
# arguably of independent interest.
# The distribution is defined through the upper cumulative or complementary
# distribution function, a.k.a. survival function,
### Pr(X>=x) = (1-(1-q)x/kappa)^(1/(1-q))
# It is convenient to introduce a re-parameterization
### shape = -1/(1-q)
### scale = shape*kappa
# which makes the relationship to the Pareto clearer, and eases estimation.
# Users can employ either parameterization, with shape/scale as the default,
# but q/kappa over-riding them.  (No warning is given if a user provides both
# sets of parameters.)
# For the derivation of the maximum likelihood estimators, please see the
# manuscript "Maximum Likelihood Estimation for q-Exponential (Tsallis)
# Distributions", which should accompany this file; if not it is available from
# http://bactra.org/research/tsallis-MLE/, along with the latest version of
# this code.  The manuscript is also available from
# http://arxiv.org/abs/math.ST/0701854

# Functions are divided into four parts:
# functions for the distribution itself, conforming to the R standards;
# functions to estimate the parameters;
# functions which illustrate the numerical accuracy of the implementation;
# functions to go from one parametrization to the other.


# Functions in this file:
# Distribution-related (per R standards):
# dtsal				Probability density
# ptsal				Cumulative probability
# qtsal				Quantiles
# rtsal				Random variate generation
# Parameter Estimation:
# tsal.loglik			Log-likelihood calculation
# tsal.fit			Estimate parameters; wrapper for detailed
#				methods.  Call this, not its subroutines
# tsal.mle.direct		Direct maximization of likelihood
# tsal.mle.equation		Find MLE by solving estimating equation; default
# tsal.est.shape.from.scale	MLE of shape parameter given scale parameter
# tsal.est.scale.from.shape	MLE of scale parameter given shape parameter
# tsal.curvefit			Find parameters by fitting a curve to the
#				empirical distribution function; avoid
# tsal.bootstrap.errors		Bootstrap standard errors, biases for MLE
# tsal.fisher			Fisher information matrix, for asymptotics
# tsal.mean			Calculate the expectation value
# tsal.total.magnitude		Total magnitude of a population (estimated)
# Implementation Testing:
# plot.tsal.quantile.transform	Illustrates relative numerical inaccuracy in
#				ptsal and qtsal, which should be inverses
# plot.tsal.LR.distribution	Calculates the log likelihood ratio for
#				estimated vs. fixed true parameters, and plots
#				it against the theoretical asymptotic
#				distribution (chi^2 with 2 d.f.).
# Censored Data:
# dtsal.tail			Probability density (tail-conditional)
# ptsal.tail			Cumulative probability (tail-conditional)
# qtsal.tail			Quantiles (tail-conditional)
# rtsal.tail			Random variate generation (from the tail)
# Parameter Conversion:
# tsal.shape.from.q		Get shape parameter from q
# tsal.scale.from.qk		Get scale parameter from q, kappa
# tsal.q.from.shape		Get q from from shape parameter
# tsal.kappa.from.ss		Get kappa from shape, scale
# tsal.ss.from.qk		Get shape, scale from q, kappa (as pairs)
# tsal.qk.from.ss		Get q, kappa from shape, scale (as pairs




#########################################################################
########          IMPLEMENTATION-TESTING FUNCTIONS              #########
#########################################################################


# Visually check the accuracy of quantile/inverse quantile transformations
# For all parameter values and all x, qtsal(ptsal(x)) should = x.
# This function displays the relative error in the transformation, which is due
# to numerical imprecision.  This indicates roughly how far off random variates
# generated by the transformation method will be.
# If everything is going according to plan, the curve plotted should oscillate
# extremely rapidly between positive and negative limits which, while growing,
# stay quite small in absolute terms, e.g., on the order of 1e-5 when
# x is on the order of 1e9.
# Inputs: Lower limit for x, upper limit for x, distributional parameters,
#         number of points at which to evaluate the function over the domain,
#         line width, left-censoring threshold, optional graphics parameters
# Outputs: None
# Side effects: Plot of relative error, [qtsal(ptsal(x)) - x]/x
test.tsal.quantile.transform <- function(from=0, to=1e6, shape=1, scale=1,
                                         q=tsal.q.from.shape(shape),
                                         kappa=tsal.kappa.from.ss(shape,scale),
                                         n=1e5, lwd=0.01, xmin=0, ...) {
  # If we have both shape/scale and q/kappa parameters, the latter over-ride.
  ss <- tsal.ss.from.qk(q,kappa)
  shape <- ss[1]
  scale <- ss[2]
  # Sanity-check from argument
  if (from < xmin) { from <- xmin }
  
  if(xmin == 0 || is.null(xmin))
  {
      f <- function(x) { (qtsal(ptsal(x,shape,scale),shape,scale) - x)/x }
  }else
  {
      f <- function(x) { (qtsal.tail(ptsal.tail(x,shape,scale,xmin=xmin),shape,scale,xmin=xmin) - x)/x }
  }
  curve(f, from=from, to=to, n=n, lwd=lwd, ylab="relative error", ...)
  invisible()
}

# Likelihood estimation accuracy check
# 2*[(Likelihood at estimated parameters) - (likelihood at true parameters)]
# should have a chi^2 distribution with 2 degrees of freedom, at least
# asymptotically for large samples
# Inputs: Number of sample points, number of replicates, distributional
#         parameters, left-censoring threshold
# Outputs: Results of Kolmgorov-Smirnov test against theoretical distribution
# Side effects: Plot of the empirical CDF of likelihood ratios, decorated
#               with the chi^2_2 CDF
test.tsal.LR.distribution <- function(n=100, reps=100, shape=1, scale=1,
                                      q=tsal.q.from.shape(shape),
                                      kappa=tsal.kappa.from.ss(shape,scale),
                                      xmin=0,method="mle.equation",...) {
  # If we have both shape/scale and q/kappa parameters, the latter over-ride.
  ss <- tsal.ss.from.qk(q,kappa)
  shape <- ss[1]
  scale <- ss[2]
  LR <- function(r) {tsal.fit(r,xmin=xmin,method=method)$loglik - tsal.loglik(r,shape=shape, scale=scale,xmin=xmin)}
  if(xmin == 0 || is.null(xmin))
  {
      LRs <- replicate(reps, LR(rtsal(n, shape, scale)))
  }else
  {
      LRs <- replicate(reps, LR(rtsal.tail(n, shape, scale, xmin=xmin)))
  }
  label.main <- "Sample distribution of the log-likelihood ratio"
  label.y <- "Cumulative probability"
  label.x <- "Log likelihood ratio"
  LR2 <- 2*LRs
  xlim <- range(LR2[is.finite(LR2)])
  
  plot(ecdf(LR2), main=label.main, xlab=label.x, ylab=label.y, xlim=xlim, ...)
  pc2 <- function(x) pchisq(x,2)
  curve(pc2, col="red", add=TRUE)
  ks.test(LR2, pchisq, 2)
}