(Note that the LaTeX equations are not rendered here but are kept that way for ready copy-paste into other rendering environments.)
devtools::load_all (".")
If the data converge to the underlying power-law relationship, then the expected CDF at some point $x\times \Delta x$ following an observed value of $y(x)$ will be, \begin{equation} y = A \left(\frac{x}{x_0}\right)^{-\alpha+1},\ {\rm and}\, y^\prime = A \left(\frac{x\cdot\Delta x}{x_0}\right)^{-\alpha+1}, \end{equation} so that \begin{equation} \frac{y^\prime}{y} = \Delta x^{-\alpha+1}. \end{equation}
The equivalent PDF will be, \begin{equation} y = \frac{a-1}{x_0} \left(\frac{x}{x_0}\right)^{-\alpha},\ {\rm and}\, y^\prime = \frac{a-1}{x_0} \left(\frac{x\cdot\Delta x}{x_0}\right)^{-\alpha}, \end{equation} so that the ratio of consecutive values in this case is, \begin{equation} \frac{y^\prime}{y} = \Delta x^{-\alpha}. \end{equation}
The raw values look like this:
dx <- 1.1 x <- cumprod (rep (dx, 50)) x <- x [x > 1 & x < 100] a <- 2.2 x0 <- 2 n <- 2 y0 <- (x / x0) ^ (-a+1) y1 <- paretoconv (x=x, a=a, x0=x0, n=n, cdf=TRUE) plot (x, y0, "l", col="blue", log="xy", ylim=range (c (y0, y1))) lines (x, y1, col="red") points (x, y1, pch=1, col="red")
1 |
This shows that the lines themselves don't necessarily converge, yet the gradients do, and thus convergence tests can only be performed on the gradients.
The ratios of consectuve points for dx=1.1
are:
indx <- 2:length (x) y0r <- y0 [indx] / y0 [indx - 1] y1r <- y1 [indx] / y1 [indx - 1] plot (x [indx - 1], y0r, "l", col="blue", ylim=range (c (y0r, y1r)), xlab="x") lines (x [indx - 1], y1r, col="red")
1 |
Both lines converge to the expected ratio of:
dx ^ (-a + 1) ## [1] 0.8919259
tail (y0r, n=1); tail (y1r, n=1) ## [1] 0.8919259 ## [1] 0.8905734
A convergence loop looks something like this:
convloop <- function (dx=1.1, tol=0.05, xs=10, a=2.2, x0=2, n=2, quiet=TRUE) { yold <- paretoconv (x=xs, a=a, x0=x0, n=n, cdf=TRUE) y <- paretoconv (x=xs * dx, a=a, x0=x0, n=n, cdf=TRUE) dy0 <- dx ^ (-a + 1) # expected value dy <- abs (y / yold - dy0) / (dx - 1) count <- 1 while (dy > tol) { yold <- y xs <- xs * dx y <- paretoconv (x=xs, a=a, x0=x0, n=n, cdf=TRUE) dy <- abs (y / yold - dy0) / (dx - 1) if (!quiet) message ("[", count, "] (x,dy)=(", xs, ", ", dy, ")") count <- count + 1 } # return x as mean of final two values list (count=count, x=mean (c (xs, xs / dx))) }
Different values of dx
give the following results
convloop (dx=1.01) ## $count ## [1] 92 ## ## $x ## [1] 24.60876
convloop (dx=1.05) ## $count ## [1] 20 ## ## $x ## [1] 24.66785
convloop (dx=1.1) ## $count ## [1] 11 ## ## $x ## [1] 24.75845
convloop (dx=1.5) ## $count ## [1] 3 ## ## $x ## [1] 18.75
convloop (dx=2) ## $count ## [1] 1 ## ## $x ## [1] 7.5
This suggests in turn that one could start with large values of dx
and
decrease them until count
exceeds, say, 5 or so.
A second try:
convloop2 <- function (tol=0.05, xs=10, a=2.2, x0=2, n=2, quiet=TRUE) { count <- 1 dx <- 2 while (count < 5) { res <- convloop (dx=dx, tol=tol, xs=xs, a=a, x0=x0, n=n, quiet=quiet) count <- res$count if (!quiet) message ("dx=", dx, " (count, x) = (", count, ", ", res$x, ")") dx <- sqrt (dx) } list (dx=dx^2, n=count, x=res$x) } convloop2 () ## $dx ## [1] 1.189207 ## ## $n ## [1] 6 ## ## $x ## [1] 21.89207
The relationship between tol
and resultant estimates of x
is as expected:
tol <- 5:1 / 100 res <- sapply (tol, function (i) { pt0 <- proc.time ()[3] c (convloop2 (tol=i)$x, proc.time ()[3] - pt0) }) res <- data.frame (x=res[1,], t=res[2,])
Then plot the results
cols <- c ("blue", "red") plot.new () par (mar=c(5.1,4.1,2.1,4.1)) plot (tol, res$x, "l", ylab="x", col=cols [1]) points (tol, res$x, pch=1, col=cols [1]) par ("new"=TRUE) plot (tol, res$t, "l", col=cols [2], xaxt="n", yaxt="n", xlab="", ylab="") points (tol, res$t, pch=1, col=cols [2]) Axis (res$t, side=4) legend ("topright", lwd=1, col=cols, bty="n", legend=c("x", "time"))
1 |
Values for the American civil war data analysed by Colin Gillespie are:
x0 <- 20 alpha <- 2.21
The point of convergence can be estimated with the above routines like this:
pt0 <- proc.time () convloop2 (tol=0.05, xs=50, a=alpha, x0=x0, n=1, quiet=FALSE) proc.time () - pt0 ## $dx ## [1] 1.414214 ## ## $n ## [1] 7 ## ## $x ## [1] 341.4214 ## user system elapsed ## 71.516 0.000 71.516
And that takes quite a long time (over one minute) but gets there in the end.
What is needed, however, is for convergence to measured for each given value of
x
. This is illustrated here by dividing the values for native American
casualties by 10 to enable calculation in a reasonable time (and it is presumed
for convenience that x
values are sorted):
data ("native_american", package="poweRlaw") x <- sort (unique (native_american$Cas)) / 10 x0 <- 20 / 10 alpha <- 2.21 fity <- function (x, x0, alpha, tol=0.01, quiet=TRUE) { y <- rep (NA, length (x)) y [1:2] <- paretoconv (x=x [1:2], a=alpha, x0=x0, n=1, cdf=TRUE) dx <- x [2] / x [1] dy <- abs (y [2] / y [1] - dx ^ (-a + 1)) / (dx - 1) i <- 3 yold <- y [2] while (dy > tol) { y [i] <- paretoconv (x=x [i], a=alpha, x0=x0, n=1, cdf=TRUE) dx <- x [i] / x [i-1] dy <- abs (y [i] / y [i-1] - dx ^ (-a + 1)) / (dx - 1) if (!quiet) message ("[", i, "] (x, dx, dy) = (", x [i], ", ", dx, ", ", dy, ")") i <- i + 1 } x [i-1] }
pt0 <- proc.time () fity (x=x, x0=x0, alpha=alpha) proc.time () - pt0 ## [1] 100 ## user system elapsed ## 16.696 0.000 16.698
paretoconv
values with asymptotic equivalentsWhat is finally needed is to replace the values for x>=100
with asymptotic
estimates.
x3 <- tail (x, n=3) y3 <- paretoconv (x=x3 [1], a=alpha, x0=x0, n=1, cdf=TRUE) y3 <- c (y3, NA, NA) y3 [2] <- y3 [1] * (x3 [2] / x3 [1]) ^ (-alpha + 1) y3 [3] <- y3 [2] * (x3 [3] / x3 [2]) ^ (-alpha + 1)
Calculate the preceding values with paretoconv
x1 <- x [1:(length (x) - 3)] y1 <- paretoconv (x=x1, a=alpha, x0=x0, n=1, cdf=TRUE)
And finally combine the two on the same plot:
plot (x1, y1, "l", col="blue", log="xy", xlim=range (c (x1, x3)), ylim=range (c (y1, y3))) lines (x3, y3, col="red", lwd=2, lty=2)
1 |
Equivalent values for the PDF look like this:
x3 <- tail (x, n=3) y3 <- paretoconv (x=x3 [1], a=alpha, x0=x0, n=1, cdf=FALSE) y3 <- c (y3, NA, NA) y3 [2] <- y3 [1] * (x3 [2] / x3 [1]) ^ (-alpha) y3 [3] <- y3 [2] * (x3 [3] / x3 [2]) ^ (-alpha)
Calculate the preceding values with paretoconv
x1 <- x [1:(length (x) - 3)] y1 <- paretoconv (x=x1, a=alpha, x0=x0, n=1, cdf=FALSE) y1 [y1 < 0] <- NA
And finally combine the two on the same plot:
plot (x1, y1, "l", col="blue", log="xy", xlim=range (c (x1, x3)), ylim=range (c (y1, y3), na.rm=TRUE)) lines (x3, y3, col="red", lwd=2, lty=2)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.