fig.dim <- 3 knitr::opts_chunk$set(fig.width=3*fig.dim,fig.height=fig.dim,fig.align='center') library(Matrix) library(raster) library(microbenchmark) library(landsim) set.seed(42)
How well do the simulation methods work with larger rasters?
Here are the demographic parameters:
germination_fun <- vital( function (N,carrying.capacity,...) { # note this is 'sum' applied to a RasterBrick object, which acts like rowSums r0 / ( 1 + migrate_raster(sum(N),competition)/carrying.capacity ) }, r0 = 0.01, competition = migration( kern="gaussian", sigma=100, radius=300, normalize=1 ) ) this.demography <- demography( prob.seed = 0.2, fecundity = 200, prob.germination = germination_fun, prob.survival = 0.9, pollen.migration = migration( kern = function (x) { exp(-sqrt(x)) }, sigma = 100, radius = 1000, normalize = NULL ), seed.migration = migration( kern = "gaussian", sigma = 20, radius = 400, normalize = 1 ), genotypes = c("aa","aA","AA"), mating = mating_tensor( c("aa","aA","AA") ) )
We will time a generation on rasters of different sizes. Here is a function to create a $k$-kilometer square random raster. (But note that the base units of the raster are in meters.)
habitat_size <- function (k) { habitat <- raster(xmn=-k*1000/2, xmx=k*1000/2, ymn=-k*1000/2, ymx=k*1000/2, resolution=100, #fixed resolution crs="+proj=utm +zone=11 +ellps=GRS80 +towgs84=0,0,0,0,0,0,0 +units=m +no_defs") values(habitat) <- sample( 100*c(1,2,5,NA), length(habitat), replace=TRUE ) migrate_raster(habitat,kern="gaussian",sigma=200,radius=1000) }
Here's timing for one generation on rasters of various sizes:
sizes <- 2^(0:6) size.results <- lapply( sizes, function (k) { habitat <- habitat_size(k) N <- rpois_raster( do.call( brick, list(habitat)[rep(1,length(this.demography$genotypes))] ) ) names(N) <- this.demography$genotypes gen.time <- system.time( NN <- generation_raster(N,this.demography,carrying.capacity=habitat) ) return( list( time=gen.time, N=N, NN=NN ) ) } )
Here's the results:
size.tab <- data.frame( area=sizes^2, ncell=sapply(lapply(size.results,"[[","N"),ncell), t(sapply( size.results, "[[", "time" )) ) size.tab layout(t(1:2)) with(size.tab, { plot(area, user.self, log='xy', xlab='total area', ylab='computation time (sec)') plot(ncell, user.self, log='xy', xlab='number of raster cells', ylab='computation time (sec)') } )
The computation time will depend critically on the number of cells within the radii
of the migration
operations;
but at these settings, adding
r round( 1/coef(with( subset(size.tab,ncell>5e3), lm(user.self ~ ncell) ))[2], 0 )
cells increases the run time by a second.
Changing the resolution at a fixed size also changes the number of cells, but affects the running time quite differently. Here is a function to create a 10-kilometer square random raster, with different resolutions. (But note that the base units of the raster are in meters.)
habitat_res <- function (resolution) { habitat <- raster(xmn=-10*1000/2, xmx=10*1000/2, ymn=-10*1000/2, ymx=10*1000/2, resolution=resolution, crs="+proj=utm +zone=11 +ellps=GRS80 +towgs84=0,0,0,0,0,0,0 +units=m +no_defs") values(habitat) <- sample( 100*c(1,2,5,NA), length(habitat), replace=TRUE ) migrate_raster(habitat,kern="gaussian",sigma=200,radius=1000) }
Here's timing for one generation on rasters of various sizes:
resolutions <- 100*2^(-2:2) res.results <- lapply( resolutions, function (resolution) { habitat <- habitat_res(resolution) N <- rpois_raster( do.call( brick, list(habitat)[rep(1,length(this.demography$genotypes))] ) ) names(N) <- this.demography$genotypes gen.time <- system.time( NN <- generation_raster(N,this.demography,carrying.capacity=habitat) ) return( list( time=gen.time, N=N, NN=NN ) ) } )
Here's the results:
res.tab <- data.frame( resolution=resolutions, ncell=sapply(lapply(res.results,"[[","N"),ncell), t(sapply( res.results, "[[", "time" )) ) res.tab layout(t(1:2)) with(res.tab, { plot(resolution, user.self, log='xy', xlab='total area', ylab='computation time (sec)') plot(ncell, user.self, log='xy', xlab='number of raster cells', ylab='computation time (sec)') } )
The computation time will depend critically on the number of cells within the radii
of the migration
operations;
but at these settings, adding
r round( 1/coef(with( subset(res.tab,ncell>5e3), lm(user.self ~ ncell) ))[2], 0 )
cells increases the run time by a second.
Based on the above, we have two implementations of the method,
one that works with Raster
objects (so can take advantage of methods for those that work on layers too big to fit in memory);
and one that precomputes a migration matrix and works directly with numeric matrices.
Let's compare the speeds.
habitat <- habitat_size(6) N <- rpois_raster( do.call( brick, list(habitat)[rep(1,length(this.demography$genotypes))] ) ) names(N) <- this.demography$genotypes raster.time <- microbenchmark( generation_raster(N,this.demography,carrying.capacity=habitat), times=10 ) pop <- population( habitat = habitat, genotypes = this.demography$genotypes, N = matrix(values(N)[!is.na(values(habitat))],ncol=3) ) matrix.demography <- this.demography matrix.demography$seed.migration <- migration( matrix.demography$seed.migration, do.M=TRUE, population=pop ) matrix.demography$pollen.migration <- migration( matrix.demography$pollen.migration, do.M=TRUE, population=pop ) matrix.demography$prob.germination <- vital( function (N,carrying.capacity,...) { # and this is rowSums, for the matrix r0 / ( 1 + migrate(competition,x=rowSums(N))/carrying.capacity ) }, r0 = 0.01, competition = migration( kern="gaussian", sigma=100, radius=300, normalize=1, do.M=TRUE, population=pop ) ) matrix.time <- microbenchmark( generation(pop,matrix.demography,carrying.capacity=values(habitat)[!is.na(values(habitat))]), times=10 ) speedup.fac <- round(mean(raster.time$time)/mean(matrix.time$time)) rbind( raster.time, matrix.time )
That's a speedup of r speedup.fac
times, pretty good.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.