In petrelharp/landsim: Simulate Populations on Landscapes

fig.dim <- 3
knitr::opts_chunk$set(fig.width=3*fig.dim,fig.height=fig.dim,fig.align='center')
library(Matrix)
library(raster)
library(microbenchmark)
library(landsim)
set.seed(42)

How well do the simulation methods work with larger rasters?

Here are the demographic parameters:

germination_fun <- vital( 
        function (N,carrying.capacity,...) {
            # note this is 'sum' applied to a RasterBrick object, which acts like rowSums
            r0 / ( 1 + migrate_raster(sum(N),competition)/carrying.capacity )
        },
        r0 = 0.01,
        competition = migration(
                                kern="gaussian",
                                sigma=100,
                                radius=300,
                                normalize=1
                            )
     )

this.demography <- demography(
        prob.seed = 0.2,
        fecundity = 200,
        prob.germination = germination_fun,
        prob.survival = 0.9,
        pollen.migration = migration(
                            kern = function (x) { exp(-sqrt(x)) },
                            sigma = 100,
                            radius = 1000,
                            normalize = NULL
                     ),
        seed.migration = migration(
                            kern = "gaussian",
                            sigma = 20,
                            radius = 400,
                            normalize = 1
                     ),
        genotypes = c("aa","aA","AA"),
        mating = mating_tensor( c("aa","aA","AA") )
    )

Raster extent

We will time a generation on rasters of different sizes. Here is a function to create a $k$-kilometer square random raster. (But note that the base units of the raster are in meters.)

habitat_size <- function (k) {
    habitat <- raster(xmn=-k*1000/2, xmx=k*1000/2, ymn=-k*1000/2, ymx=k*1000/2, 
          resolution=100, #fixed resolution
          crs="+proj=utm +zone=11 +ellps=GRS80 +towgs84=0,0,0,0,0,0,0 +units=m +no_defs")
    values(habitat) <- sample( 100*c(1,2,5,NA), length(habitat), replace=TRUE )
    migrate_raster(habitat,kern="gaussian",sigma=200,radius=1000)
}

Here's timing for one generation on rasters of various sizes:

sizes <- 2^(0:6)
size.results <- lapply( sizes, function (k) {
                  habitat <- habitat_size(k)
                  N <- rpois_raster( do.call( brick, list(habitat)[rep(1,length(this.demography$genotypes))] ) )
                  names(N) <- this.demography$genotypes
                  gen.time <- system.time( NN <- generation_raster(N,this.demography,carrying.capacity=habitat) )
                  return( list( time=gen.time, N=N, NN=NN ) )
          } )

Here's the results:

size.tab <- data.frame( area=sizes^2, ncell=sapply(lapply(size.results,"[[","N"),ncell), t(sapply( size.results, "[[", "time" )) )
size.tab
layout(t(1:2))
with(size.tab, {
     plot(area, user.self, log='xy', xlab='total area', ylab='computation time (sec)') 
     plot(ncell, user.self, log='xy', xlab='number of raster cells', ylab='computation time (sec)') 
     } )

The computation time will depend critically on the number of cells within the radii of the migration operations; but at these settings, adding r round( 1/coef(with( subset(size.tab,ncell>5e3), lm(user.self ~ ncell) ))[2], 0 ) cells increases the run time by a second.

Raster resolution

Changing the resolution at a fixed size also changes the number of cells, but affects the running time quite differently. Here is a function to create a 10-kilometer square random raster, with different resolutions. (But note that the base units of the raster are in meters.)

habitat_res <- function (resolution) {
    habitat <- raster(xmn=-10*1000/2, xmx=10*1000/2, ymn=-10*1000/2, ymx=10*1000/2, 
          resolution=resolution,
          crs="+proj=utm +zone=11 +ellps=GRS80 +towgs84=0,0,0,0,0,0,0 +units=m +no_defs")
    values(habitat) <- sample( 100*c(1,2,5,NA), length(habitat), replace=TRUE )
    migrate_raster(habitat,kern="gaussian",sigma=200,radius=1000)
}

Here's timing for one generation on rasters of various sizes:

resolutions <- 100*2^(-2:2)
res.results <- lapply( resolutions, function (resolution) {
                  habitat <- habitat_res(resolution)
                  N <- rpois_raster( do.call( brick, list(habitat)[rep(1,length(this.demography$genotypes))] ) )
                  names(N) <- this.demography$genotypes
                  gen.time <- system.time( NN <- generation_raster(N,this.demography,carrying.capacity=habitat) )
                  return( list( time=gen.time, N=N, NN=NN ) )
          } )

Here's the results:

res.tab <- data.frame( resolution=resolutions, ncell=sapply(lapply(res.results,"[[","N"),ncell), t(sapply( res.results, "[[", "time" )) )
res.tab
layout(t(1:2))
with(res.tab, {
     plot(resolution, user.self, log='xy', xlab='total area', ylab='computation time (sec)') 
     plot(ncell, user.self, log='xy', xlab='number of raster cells', ylab='computation time (sec)') 
     } )

The computation time will depend critically on the number of cells within the radii of the migration operations; but at these settings, adding r round( 1/coef(with( subset(res.tab,ncell>5e3), lm(user.self ~ ncell) ))[2], 0 ) cells increases the run time by a second.

Two implementations

Based on the above, we have two implementations of the method, one that works with Raster objects (so can take advantage of methods for those that work on layers too big to fit in memory); and one that precomputes a migration matrix and works directly with numeric matrices.

Let's compare the speeds.

habitat <- habitat_size(6)
N <- rpois_raster( do.call( brick, list(habitat)[rep(1,length(this.demography$genotypes))] ) )
names(N) <- this.demography$genotypes
raster.time <- microbenchmark( generation_raster(N,this.demography,carrying.capacity=habitat), times=10 )

pop <- population( 
                  habitat = habitat,
                  genotypes = this.demography$genotypes,
                  N = matrix(values(N)[!is.na(values(habitat))],ncol=3)
             )
matrix.demography <- this.demography
matrix.demography$seed.migration <- migration( matrix.demography$seed.migration, do.M=TRUE, population=pop )
matrix.demography$pollen.migration <- migration( matrix.demography$pollen.migration, do.M=TRUE, population=pop )
matrix.demography$prob.germination <- vital( 
        function (N,carrying.capacity,...) {
            # and this is rowSums, for the matrix
            r0 / ( 1 + migrate(competition,x=rowSums(N))/carrying.capacity )
        },
        r0 = 0.01,
        competition = migration(
                                kern="gaussian",
                                sigma=100,
                                radius=300,
                                normalize=1,
                                do.M=TRUE,
                                population=pop
                            )
     )
matrix.time <- microbenchmark( generation(pop,matrix.demography,carrying.capacity=values(habitat)[!is.na(values(habitat))]), times=10 )

speedup.fac <- round(mean(raster.time$time)/mean(matrix.time$time))

rbind( raster.time, matrix.time )