tests/testthat/test-psdfit.R

# 
# 
# This code tests the psd-fitting functions
# 

library(poweRlaw)
library(plyr)
library(ggplot2)

context('Test the fitting of distributions')

test_that("dtpl handles the log argument correctly", { 
  testpoints <- 10^(seq(0, 5, length.out = 64))
  expect_equal(exp(dtpl(testpoints, 2, 1.0, 1, log = TRUE)), 
               dtpl(testpoints, 2, 1.0, 1, log = FALSE), 
               tol = 1e-8)
})

if ( exists("EXTENDED_TESTS") && EXTENDED_TESTS ) { 
  
  test_that('PL fitting works', { 
    
    # Change dir if running tests manually
    if ( file.exists('./tests/testthat') ) { 
      library(testthat)
      setwd('./tests/testthat') 
    }

    # Setup pli from Clauzet et al
    for ( s in dir('./pli-R-v0.0.3-2007-07-25', 
                  full.names = TRUE, pattern = '*.R') ) { 
      source(s)
    }

    # Compile auxiliary binaries
    system("cd ./pli-R-v0.0.3-2007-07-25/zeta-function/ && make")
    system("cd ./pli-R-v0.0.3-2007-07-25/exponential-integral/ && make")
    system("cd ./pli-R-v0.0.3-2007-07-25/ && \
              gcc discpowerexp.c -lm -o discpowerexp && \
              chmod +x discpowerexp")
      
    expos <- c(1.2, 2.5)
    xmins <- c(1,  10)
    for ( expo in expos ) { 
      for ( xmin in xmins ) { 
        
        dat <- unique(round(seq.int(1, 1000, length.out = 100)))
        pldat <- poweRlaw::rpldis(1000, xmin = xmin, alpha = expo)
        # Squeeze tail a bit for speed
        pldat <- pldat[pldat < 1e5]
        
        # left: pli <-> right: spw
        # dpl <-> dzeta
        expect_equal(dzeta(dat,, exponent = expo, threshold = xmin), 
                      dpl(dat, expo, xmin = xmin))
        
        # ppl <-> pzeta with higher tail
        expect_equal(pzeta(dat, exponent = expo, threshold = xmin, lower.tail = FALSE), 
                      ippl(dat, expo, xmin = xmin))
        
        # ppl_ll <-> zeta.loglike
        expect_equal(zeta.loglike(pldat, exponent = expo, threshold = xmin), 
                     pl_ll(pldat, expo, xmin = xmin))
        
        # pl_fit <-> zeta.fit 
        our_expo <- pl_fit(pldat, xmin = xmin)[['plexpo']]
        clauset_expo <- zeta.fit(pldat, threshold = xmin)[['exponent']]
        powerlaw_expo <- estimate_pars( poweRlaw::displ$new(pldat) )[["pars"]]
        expect_equal(clauset_expo, our_expo, tol = 1e-3)
        
        # Look at fit
        plot(log10(cumpsd(pldat[pldat>=xmin])))
        xs <- c(min(pldat[pldat>=xmin]), max(pldat[pldat>=xmin]))
        lines(log10(xs), log10(ippl(xs, clauset_expo, xmin = xmin)), 
              lwd = 2, col = 'blue')
        lines(log10(xs), log10(ippl(xs, powerlaw_expo, xmin = xmin)), 
              lwd = 2, col = 'green')
        lines(log10(xs), log10(ippl(xs, our_expo, xmin = xmin)), col = 'red')
        title('PLFIT')
      }
    }
    
  })

  test_that('EXP fitting works', { 
    
    rates <- c(1.1, 1.5)  
    xmins <- c(1, 3)
    for (xmin in xmins) { 
      for ( rate in rates ) { 
          dat <- seq.int(1000)
          expdat <- ceiling(rexp(1000, rate = rate))
          
          expect_equal(ddiscexp(dat, rate, threshold = xmin),
                       ddisexp(dat, rate, xmin = xmin))
          
          ipdisexp(dat, rate, xmin = xmin)
          
          fit <- exp_fit(expdat, xmin = xmin)
          expect_equal(fit[['cutoff']],
                       discexp.fit(expdat, threshold = xmin)[["lambda"]], 
                       tol = 1e-3)
          
          # Look at fit
          plot(log10(cumpsd(expdat[expdat >= xmin])))
          xs <- seq(min(expdat), max(expdat), length.out = 100)
          lines(log10(xs), 
                log10(ipdisexp(xs, fit[["cutoff"]], xmin = xmin)), 
                col = 'red')
          title('EXPFIT')
      }
    }
    
  })

  test_that('LNORM fitting works', { 
    
    meanlogs <- c(1, 10)
    sdlogs <- c(1, 3)
    xmins <- c(1, 30)
    for (xmin in xmins) { 
      for ( meanlog in meanlogs ) { 
        for ( sdlog in sdlogs ) { 
          
          xmax <- 1000
          dat <- seq.int(xmax)
          lnormdat <- ceiling(rlnorm(xmax, meanlog, sdlog))
  #         cat('meanlog: ', meanlog, ', sdlog: ', sdlog, ', xmin: ', xmin, "\n")
          
          # Test distr functions
          expect_equal(ddislnorm(dat, meanlog, sdlog, xmin),
                        dlnorm.tail.disc(dat, meanlog, sdlog, threshold = xmin))
          
          # plnorm.tail.disc returns negative values (?!)
  #         plnorm.tail.disc(dat, meanlog, sdlog, threshold = xmin)
  #         ipdislnorm(dat, meanlog, sdlog, xmin)
          
          # Note that dlnorm.disc assumes xmin = 0, so we cannot test it 
          #   with variable xmin. 
          our_dlnorm <- ddislnorm(dat, meanlog, sdlog, xmin = 0)
          clauset_dlnorm <- dlnorm.disc(dat, meanlog, sdlog)
          expect_equal(our_dlnorm, 
                        clauset_dlnorm)
          
          # Test obtained fits
          our_fit <- suppressWarnings( lnorm_fit(lnormdat, xmin = xmin) )
          clauset_fit <- suppressWarnings( fit.lnorm.disc(lnormdat, threshold = xmin) )
  #         expect_equal(our_fit[['meanlog']], clauset_fit[['meanlog']], tol = 1e-2)
  #         expect_equal(our_fit[['sdlog']], clauset_fit[['sdlog']], tol = 1e-2)
          
          # Look at fit
          plot(log10(cumpsd(lnormdat[lnormdat >= xmin])))
          xs <- seq(min(lnormdat), max(lnormdat), length.out = 10000)
          suppressWarnings( 
            lines(log10(xs), 
                  log10(ipdislnorm(xs, 
                                   our_fit[['meanlog']], 
                                   our_fit[['sdlog']], 
                                   xmin = xmin)), col = 'red')
          )
          suppressWarnings(
            lines(log10(xs), 
                  log10(ipdislnorm(xs, 
                                   fit.lnorm.disc(lnormdat, threshold = xmin)[["meanlog"]], 
                                  fit.lnorm.disc(lnormdat, threshold = xmin)[["sdlog"]],
                                  xmin = xmin)), 
                                  col = "blue")
          )
          title('LNORMFIT')
          
        }
      }
    }
    
  })

  # For TPL xmax must not be too low (>3?)
  test_that('TPL fitting works', { 
    
    rates <- c(0.05, 1.1)
    expos <- c(1.1, 1.3)
    xmins <- c(1, 10) 
    for (xmin in xmins) { 
      for ( rate in rates ) { 
        for ( expo in expos ) { 
          
          tpldat <- round(rpowerexp(1000, 1, expo, rate))
          tpldat <- tpldat[tpldat < 1e5]
          dat <- seq(0, 1000)
          
          # Normalizing coeff
          # Here, we check that the binary called by the Clauset code actually 
          # returns something. 
          clauset_result <- suppressWarnings({ 
            discpowerexp.norm(xmin, expo, rate) 
          })
          
          if ( length(clauset_result) > 0 ) { 
            expect_equal(clauset_result, tplnorm(expo, rate, xmin), 
                          tolerance = 1e-4)
          }
          
          # P(X=x)
          # Note: we explicitely convert output from pli to numeric as if 
          # the returned values are all NAs then R thinks it's logical 
          dtpl_result <- dtpl(dat, expo, rate, xmin)
          ddpxp_result <- suppressWarnings ( 
              as.numeric( ddiscpowerexp(dat, expo, rate, threshold = xmin) )
            )
          if ( ! all( is.na(dtpl_result) ) ) { 
            expect_equal(dtpl_result, ddpxp_result, 
                         tolerance = 1e-4)
          }
          
          # Here, we check that the binary called by the Clauset code actually 
          # returns something before performing the test. 
          clauset_result <- suppressWarnings( 
              discpowerexp.loglike(tpldat, expo, rate, threshold = xmin)
            )
          if (length(clauset_result) > 0) { 
            expect_equal(tpl_ll(tpldat, expo, rate, xmin),
                         clauset_result, 
                         tolerance = 1e-4)
          }
          
          our_fit <- tpl_fit(tpldat, xmin = xmin)
          
          # The fit with pli can fail, so we check only the results 
          # when it succeeds. Sometimes the returned fits are on the boundary
          # (expo ~= 1), so we do not test in those cases. 
          clauset_fit <- tryNULL(discpowerexp.fit(tpldat, threshold = xmin))
          
          if ( !is.null(clauset_fit) && 
                abs(clauset_fit$exponent - (-1)) > 1e-4 && 
                !is.na(our_fit$ll) ) { 
            if ( our_fit$ll < clauset_fit$loglike ) { 
              if ( abs(our_fit$ll - clauset_fit$loglike) > 1e-3 ) { 
                message("We found a better fit than the reference code")
              }
            } else { 
              # If the difference is not floating-point error
              if ( abs(our_fit$ll - clauset_fit$loglike) > 1e-3 ) { 
                message("Reference code found a better fit")
                browser()
                fail()
              }
              # These tests will fail
  #               expect_equal(our_fit$expo, clauset_fit$exponent, tol = 5e-2)
  #               expect_equal(our_fit$rate, clauset_fit$rate, tol = 5e-2)
            }
          }
          if ( !is.null(clauset_fit) ) { 
            # Look at fit
            plot(log10(cumpsd(tpldat[tpldat >= xmin])))
            xs <- unique( round( seq(min(tpldat), max(tpldat), 
                                      length.out = 100) )) 
            lines(log10(xs), 
                  log10(iptpl(xs, 
                              clauset_fit[["exponent"]], 
                              clauset_fit[["rate"]], xmin)), col = 'blue', 
                  lwd = 2)
            lines(log10(xs), 
                  log10(iptpl(xs, our_fit[["plexpo"]], 
                              our_fit[["cutoff"]], xmin)), 
                  col = 'red')
            title('TPLFIT')
          }
        }
      }
    }
    
  })


  test_that("PL estimations work with xmins", { 
    
    expos <- c(1.5, 2)
    for (expo in expos) { 
      for (xmin in c(1, 5)) {
        x <- seq.int(1000)
        
        pldat <- poweRlaw::rpldis(1000, xmin, expo)
        pldat <- pldat[pldat < 1e5] # squeeze tail for speed
        
        # Test dpl with xmin != 1
        expect_equal(dzeta(x, xmin, expo), 
                     dpl(x, expo, xmin))
        
        # Test ippl 
        expect_equal(pzeta(x, xmin, expo, lower.tail = FALSE),
                     ippl(x, expo, xmin))
        
        # Test likelihood func
        expect_equal(zeta.loglike(pldat, xmin, expo),
                     pl_ll(pldat, expo, xmin))
        
        # Test equality of fits
        expect_equal(pl_fit(pldat, xmin = xmin)[["plexpo"]], 
                     zeta.fit(pldat, xmin)[["exponent"]], 
                     tol = 1e-3)
        
        # Test the estimation of xmin
        expect_is(xmin_estim(pldat), "numeric")
      }
    }
    
  })
  
  # Remove auxiliary binaries now that tests are done
  system("cd ./pli-R-v0.0.3-2007-07-25/zeta-function/ && rm zeta_func zeta_func.o")
  system("cd ./pli-R-v0.0.3-2007-07-25/exponential-integral/ && rm exp_int exp_int.o")
  system("cd ./pli-R-v0.0.3-2007-07-25/ && rm discpowerexp")

}

Try the spatialwarnings package in your browser

Any scripts or data that you put into this service are public.

spatialwarnings documentation built on March 21, 2022, 5:08 p.m.