In mpadge/paretoconv: Calculates the N-Fold Convolution of Two Pareto Distributions

(Note that the LaTeX equations are not rendered here but are kept that way for ready copy-paste into other rendering environments.)

devtools::load_all (".")

If the data converge to the underlying power-law relationship, then the expected CDF at some point $x\times \Delta x$ following an observed value of $y(x)$ will be, \begin{equation} y = A \left(\frac{x}{x_0}\right)^{-\alpha+1},\ {\rm and}\, y^\prime = A \left(\frac{x\cdot\Delta x}{x_0}\right)^{-\alpha+1}, \end{equation} so that \begin{equation} \frac{y^\prime}{y} = \Delta x^{-\alpha+1}. \end{equation}

The equivalent PDF will be, \begin{equation} y = \frac{a-1}{x_0} \left(\frac{x}{x_0}\right)^{-\alpha},\ {\rm and}\, y^\prime = \frac{a-1}{x_0} \left(\frac{x\cdot\Delta x}{x_0}\right)^{-\alpha}, \end{equation} so that the ratio of consecutive values in this case is, \begin{equation} \frac{y^\prime}{y} = \Delta x^{-\alpha}. \end{equation}

Raw Values

The raw values look like this:

dx <- 1.1
x <- cumprod (rep (dx, 50))
x <- x [x > 1 & x < 100]
a <- 2.2
x0 <- 2
n <- 2
y0 <- (x / x0) ^ (-a+1)
y1 <- paretoconv (x=x, a=a, x0=x0, n=n, cdf=TRUE)
plot (x, y0, "l", col="blue", log="xy", ylim=range (c (y0, y1)))
lines (x, y1, col="red")
points (x, y1, pch=1, col="red")

This shows that the lines themselves don't necessarily converge, yet the gradients do, and thus convergence tests can only be performed on the gradients.

Gradients

The ratios of consectuve points for dx=1.1 are:

indx <- 2:length (x)
y0r <- y0 [indx] / y0 [indx - 1]
y1r <- y1 [indx] / y1 [indx - 1]
plot (x [indx - 1], y0r, "l", col="blue", ylim=range (c (y0r, y1r)), xlab="x")
lines (x [indx - 1], y1r, col="red")

Both lines converge to the expected ratio of:

dx ^ (-a + 1)
## [1] 0.8919259

tail (y0r, n=1); tail (y1r, n=1)
## [1] 0.8919259
## [1] 0.8905734

Convergence Loop

A convergence loop looks something like this:

convloop <- function (dx=1.1, tol=0.05, xs=10, a=2.2, x0=2, n=2, quiet=TRUE)
{
    yold <- paretoconv (x=xs, a=a, x0=x0, n=n, cdf=TRUE)
    y <- paretoconv (x=xs * dx, a=a, x0=x0, n=n, cdf=TRUE)
    dy0 <- dx ^ (-a + 1) # expected value
    dy <- abs (y / yold - dy0) / (dx - 1)

    count <- 1
    while (dy > tol)
    {
        yold <- y
        xs <- xs * dx
        y <- paretoconv (x=xs, a=a, x0=x0, n=n, cdf=TRUE)
        dy <- abs (y / yold - dy0) / (dx - 1)
        if (!quiet)
            message ("[", count, "] (x,dy)=(", xs, ", ", dy, ")")
        count <- count + 1
    }
    # return x as mean of final two values
    list (count=count, x=mean (c (xs, xs / dx)))
}

Different values of dx give the following results

convloop (dx=1.01)
## $count
## [1] 92
## 
## $x
## [1] 24.60876

convloop (dx=1.05)
## $count
## [1] 20
## 
## $x
## [1] 24.66785

convloop (dx=1.1)
## $count
## [1] 11
## 
## $x
## [1] 24.75845

convloop (dx=1.5)
## $count
## [1] 3
## 
## $x
## [1] 18.75

convloop (dx=2)
## $count
## [1] 1
## 
## $x
## [1] 7.5

This suggests in turn that one could start with large values of dx and decrease them until count exceeds, say, 5 or so.

Convergence of dx

A second try:

convloop2 <- function (tol=0.05, xs=10, a=2.2, x0=2, n=2, quiet=TRUE)
{
    count <- 1
    dx <- 2
    while (count < 5)
    {
        res <- convloop (dx=dx, tol=tol, xs=xs, a=a, x0=x0, n=n, quiet=quiet)
        count <- res$count
        if (!quiet)
            message ("dx=", dx, " (count, x) = (", count, ", ", res$x, ")")
        dx <- sqrt (dx)
    }
    list (dx=dx^2, n=count, x=res$x)
}
convloop2 ()
## $dx
## [1] 1.189207
## 
## $n
## [1] 6
## 
## $x
## [1] 21.89207

The relationship between tol and resultant estimates of x is as expected:

tol <- 5:1 / 100
res <- sapply (tol, function (i) {
                   pt0 <- proc.time ()[3]
                   c (convloop2 (tol=i)$x, proc.time ()[3] - pt0)
                })
res <- data.frame (x=res[1,], t=res[2,])

Then plot the results

cols <- c ("blue", "red")
plot.new ()
par (mar=c(5.1,4.1,2.1,4.1))
plot (tol, res$x, "l", ylab="x", col=cols [1])
points (tol, res$x, pch=1, col=cols [1])
par ("new"=TRUE)
plot (tol, res$t, "l", col=cols [2], xaxt="n", yaxt="n", xlab="", ylab="")
points (tol, res$t, pch=1, col=cols [2])
Axis (res$t, side=4)
legend ("topright", lwd=1, col=cols, bty="n", legend=c("x", "time"))

American civil war example

Values for the American civil war data analysed by Colin Gillespie are:

x0 <- 20
alpha <- 2.21

The point of convergence can be estimated with the above routines like this:

pt0 <- proc.time ()
convloop2 (tol=0.05, xs=50, a=alpha, x0=x0, n=1, quiet=FALSE)
proc.time () - pt0
## $dx
## [1] 1.414214
##
## $n
## [1] 7
##
## $x
## [1] 341.4214
##   user  system elapsed 
## 71.516   0.000  71.516

And that takes quite a long time (over one minute) but gets there in the end.

What is needed, however, is for convergence to measured for each given value of x. This is illustrated here by dividing the values for native American casualties by 10 to enable calculation in a reasonable time (and it is presumed for convenience that x values are sorted):

data ("native_american", package="poweRlaw")
x <- sort (unique (native_american$Cas)) / 10
x0 <- 20 / 10
alpha <- 2.21
fity <- function (x, x0, alpha, tol=0.01, quiet=TRUE)
{
    y <- rep (NA, length (x))
    y [1:2] <- paretoconv (x=x [1:2], a=alpha, x0=x0, n=1, cdf=TRUE)
    dx <- x [2] / x [1]
    dy <- abs (y [2] / y [1] - dx ^ (-a + 1)) / (dx - 1)
    i <- 3
    yold <- y [2]
    while (dy > tol)
    {
        y [i] <- paretoconv (x=x [i], a=alpha, x0=x0, n=1, cdf=TRUE)
        dx <- x [i] / x [i-1]
        dy <- abs (y [i] / y [i-1] - dx ^ (-a + 1)) / (dx - 1)
        if (!quiet)
            message ("[", i, "] (x, dx, dy) = (", x [i], ", ", dx, ", ", dy, ")")
        i <- i + 1
    }
    x [i-1]
}

pt0 <- proc.time ()
fity (x=x, x0=x0, alpha=alpha)
proc.time () - pt0
## [1] 100
##   user  system elapsed
## 16.696   0.000  16.698

Replacing `paretoconv` values with asymptotic equivalents

What is finally needed is to replace the values for x>=100 with asymptotic estimates.

x3 <- tail (x, n=3)
y3 <- paretoconv (x=x3 [1], a=alpha, x0=x0, n=1, cdf=TRUE)
y3 <- c (y3, NA, NA)
y3 [2] <- y3 [1] * (x3 [2] / x3 [1]) ^ (-alpha + 1)
y3 [3] <- y3 [2] * (x3 [3] / x3 [2]) ^ (-alpha + 1)

Calculate the preceding values with paretoconv

x1 <- x [1:(length (x) - 3)]
y1 <- paretoconv (x=x1, a=alpha, x0=x0, n=1, cdf=TRUE)

And finally combine the two on the same plot:

plot (x1, y1, "l", col="blue", log="xy", 
      xlim=range (c (x1, x3)), ylim=range (c (y1, y3)))
lines (x3, y3, col="red", lwd=2, lty=2)

Equivalent values for the PDF look like this:

x3 <- tail (x, n=3)
y3 <- paretoconv (x=x3 [1], a=alpha, x0=x0, n=1, cdf=FALSE)
y3 <- c (y3, NA, NA)
y3 [2] <- y3 [1] * (x3 [2] / x3 [1]) ^ (-alpha)
y3 [3] <- y3 [2] * (x3 [3] / x3 [2]) ^ (-alpha)

Calculate the preceding values with paretoconv

x1 <- x [1:(length (x) - 3)]
y1 <- paretoconv (x=x1, a=alpha, x0=x0, n=1, cdf=FALSE)
y1 [y1 < 0] <- NA

And finally combine the two on the same plot:

plot (x1, y1, "l", col="blue", log="xy", 
      xlim=range (c (x1, x3)), ylim=range (c (y1, y3), na.rm=TRUE))
lines (x3, y3, col="red", lwd=2, lty=2)

mpadge/paretoconv documentation built on March 9, 2020, 9:54 p.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

mpadge/paretoconv
Calculates the N-Fold Convolution of Two Pareto Distributions

In mpadge/paretoconv: Calculates the N-Fold Convolution of Two Pareto Distributions

Raw Values

Gradients

Convergence Loop

Convergence of dx

American civil war example

Replacing `paretoconv` values with asymptotic equivalents

R Package Documentation

Browse R Packages

We want your feedback!

mpadge/paretoconv Calculates the N-Fold Convolution of Two Pareto Distributions

In mpadge/paretoconv: Calculates the N-Fold Convolution of Two Pareto Distributions

Raw Values

Gradients

Convergence Loop

Convergence of dx

American civil war example

Replacing paretoconv values with asymptotic equivalents

R Package Documentation

Browse R Packages

We want your feedback!

mpadge/paretoconv
Calculates the N-Fold Convolution of Two Pareto Distributions

Replacing `paretoconv` values with asymptotic equivalents