#######################################
# Methods to simulated large scale flowering observations
#######################################
#' @description Generate x,y points in a clumped fashion.
#' The number of clump centers is a poisson dist. with lambda=5.
#' Optionally set the the percentage of points n which
#' will be drawn from a spatially uniform distribution. This results
#' in clumped points that are not "too" clumped.
#'
#' @param n numeric The number of points to return
#' @param xlimits vector Lower and upper bounds of the x axis
#' @param ylimits vector Lower and upper bounds of the y axis
#' @param uniform_random_percent numeric Percent of samples to be drawn from a spatially uniform, as
#' opposed to clustered, distribution.
#'
#' @return data.frame Two column data.frame of x,y locations
generate_clustered_points = function(n,
xlimits,
ylimits,
uniform_random_percent=0.2){
uniform_n = floor(n * uniform_random_percent)
clumped_n = n - uniform_n
point_pattern = spatstat::rThomas(5, 0.05, clumped_n*1.5)
# Spatstat will generate a random number of points, so from
# those choose the n points requested.
number_of_random_points = length(point_pattern$x)
if(number_of_random_points < clumped_n){
# If not enough random points were generated by spatstat try again
return(generate_clustered_points(n=n,
xlimits=xlimits,
ylimits=ylimits,
uniform_random_percent = uniform_random_percent))
} else {
uniform_x = runif(n=uniform_n, min=0, max=1)
uniform_y = runif(n=uniform_n, min=0, max=1)
random_selection = sample(1:number_of_random_points, size = clumped_n,replace = FALSE)
x = c(point_pattern$x[random_selection], uniform_x)
y = c(point_pattern$y[random_selection], uniform_y)
return(dplyr::tibble(x=x, y=y))
}
}
#' Generate a non-linear spatial gradient
#' Random draws from a spatial model.
#' x and y are coordinates and thus should be equal length.
#' returns a vector of the same length with values between 0-1 representing
#' a spatially correlated random field
#'
#' @param x vector Coordinates of sample locations on the x axis
#' @param y vector Coordinates of sample locatiosn on the y axis
#' @param seed numeric random seed to create reproducable results
#'
#' @return vector A vector of length(x/y) representing the underlying random surface (from 0-1)
#' of the respective point.
generate_random_spatial_gradient = function(x,y,seed=NULL){
# The random surface generated depends on the seed as well as the unique set
# of points used in predict(variogram_model,...). This functions needs to be
# able to reproduce the same random surfrance regardless of the points used.
# Thus it uses the fixed_grid, which combined with the same seed will produce
# the same random surface. The points of interest (x,y in the arguments) are
# then calculated from the nearest neighbor of the fixed grid.
if(!is.null(seed)){
set.seed(seed)
}
fixed_grid = expand.grid(x=seq(0,1,0.01),
y=seq(0,1,0.01))
varigram_model = gstat::gstat(formula=z~1, locations=~x+y, dummy=T, beta=0.5, nmax=20,
model=gstat::vgm(psill=0.025,model="Gau",range=0.6, nugget = 0))
p = predict(varigram_model, newdata=fixed_grid, nsim=1)$sim1
p = (p-min(p)) / (max(p) - min(p)) # scale to 0-1
# in spatstat::nnwhich() all the coordinates, both from the fixed_grid and the sample
# coordinates x,y need to be input all at once. coordinate_sources differentiates them.
coordinate_sources = factor(c(rep('grid_point',nrow(fixed_grid)),rep('sample_point',length(x))))
nearest_point = spatstat::nnwhich(c(fixed_grid$x,x),c(fixed_grid$y,y), by=coordinate_sources)
# 2 columns returned. One for the nearest grid point, one for the nearest
# sample point.
if(colnames(nearest_point)[1]!='grid_point'){stop('grid point vs data point alignment not working')}
is_sample_point = coordinate_sources=='sample_point'
return(p[nearest_point[is_sample_point,1]])
}
#' @description Simulate large scale phenology observations.
#'
#' @param n integer The number of samples to return.
#' @param x
#' @param y
#' @param sample_type string Either 'po' (default) for presence only, or 'pa' for presence/absence
#' @param fraction_present numeric Between 0-1, the fraction of presence observations. Ignored if sample_type is 'po'.
#' @param distribution_type string The underlying flowering distribution to simulate. 'a' for a normal derived curve, 'b' for
#' a beta derived curve, 'c' for a uniform derived curve.
#' @param xlimits vector Lower and upper bounds of the x axis spatial domain
#' @param ylimits vector Lower and upper bounds of the y axis spatial domin
#' @param start_doy integer The start day of year (doy) of flowering in the simulation
#' @param flowering_length integer The length of the flowering season
#' @param flowering_gradient numeric The strenght of the flowering gradient
#' @param spatial_gradient_type string Either 'linear', where the spatial gradient is a function of the y axis, or
#' 'non-linear' where it is a function of a random surface. In both cases the day
#' of flowering is shifted by flowering_gradient * beta. Where beta is the y axis
#' for 'linear', or the underlying random surface (scaled 0-1) for 'non-linear'
#' @param clustering bool Whether random samples should be clustered around 1-several points. See generate_clustered_points()
#' @param clustering_uniform_random_percent numeric Percent of samples to be drawn from a spatially uniform, as
#' opposed to clustered, distribution.
#' @param seed numeric Seed to allow reproducable results
#'
#' @return data.frame A 5 column data.frame with columns x,y (the coordinates), doy (day of year of the observation),
#' flower_present (either 0 or 1. Only 1 if sample_type='po'), and true_start_doy (the true simulated start day of the observation)
#'
spatial_flowering_sampler = function(n,
x=NULL,
y=NULL,
sample_type='po',
# po: presence only, pa: presence and absence
fraction_present=0.8,
distribution_type='a',
xlimits = c(0,1),
ylimits = c(0,1),
start_doy = 90,
flowering_length = 30,
flowering_gradient = 10/0.1,
spatial_gradient_type = 'linear',
# 'linear': gradient is a functions of the y axis, 'non-linear': gradient is a
# spatially random.
clustering=FALSE,
clustering_uniform_random_percent=0.2,
seed=NULL){
if(!is.null(seed)){
set.seed(seed)
}
if(start_doy + flowering_length>365){
stop('start_doy plus flowering_length cannot exceed 365')
}
if(clustering) {
clustered_points = generate_clustered_points(n=n,
xlimits=xlimits,ylimits=ylimits,
uniform_random_percent=clustering_uniform_random_percent)
x = clustered_points$x
y = clustered_points$y
} else if(is.null(x) && is.null(y)) {
# Generate spatially uniform points
x=runif(n=n, min=xlimits[1], max=xlimits[2])
y=runif(n=n, min=ylimits[1], max=ylimits[2])
} else {
# User user supplied points
if(length(x)!=length(y)){stop('x and y must have the same length')}
if(length(x)!=n){stop('x and y must be the same as n')}
}
# make weights for each julian day corresponding to the chance of observing an open flower
# corresponds to flower abundance
doy_probabilites = rep(0, 365)
if(distribution_type=='a'){
doy_probabilites[start_doy:(start_doy+flowering_length-1)] = dnorm(seq(from=-3,to=3, length.out = flowering_length))
} else if(distribution_type=='b'){
doy_probabilites[start_doy:(start_doy+flowering_length-1)] = dbeta(seq(from=0,to=1, length.out = flowering_length), shape1=2, shape2=5)
} else if(distribution_type=='c'){
stop('type c not implemented yet')
}
# presences and absences included
if(sample_type=='pa'){
num_presence = ceiling(n*fraction_present)
num_absence = n - num_presence
sampled_doy = sample(1:365, size=num_presence, replace=T, prob = doy_probabilites)
sampled_doy = c(sampled_doy,sample(1:365, size=num_absence, replace=T, prob = max(doy_probabilites) - doy_probabilites))
flower_present = c(rep.int(1,num_presence),rep.int(0,num_absence))
# only presences
} else if(sample_type=='po'){
sampled_doy = sample(1:365, size=n, replace=T, prob = doy_probabilites)
flower_present = rep.int(1,n)
} else {
stop(paste0('unknown sample type: ',sample_type))
}
true_start_doy = rep.int(start_doy, n)
if(spatial_gradient_type == 'linear'){
sampled_doy = sampled_doy + round(y*flowering_gradient, 0)
true_start_doy = true_start_doy + round(y * flowering_gradient, 0)
} else if(spatial_gradient_type == 'non-linear'){
non_linear_gradient = generate_random_spatial_gradient(x=x,y=y, seed=seed)
sampled_doy = sampled_doy + round(non_linear_gradient*flowering_gradient, 0)
true_start_doy = true_start_doy + round(non_linear_gradient * flowering_gradient, 0)
} else {
stop(paste0('unknown spatial gradient: ',spatial_gradient_type))
}
# Sanity check
if(any(sampled_doy < 1)){
stop('negative sample doy generated')
}
if(any(sampled_doy>365)){
warning('sampled doy > 365 generated, consider setting more realistic parameters')
}
if(any(true_start_doy < 1)){
stop('negative true_start_doy generated')
}
if(any(true_start_doy>365)){
warning('true_start_doy > 365 generated, consider setting more realistic parameters')
}
return(dplyr::tibble(x=x,y=y,doy=sampled_doy,flower_present=flower_present, true_start_doy=true_start_doy))
}
#' @description Generate an evenly spaced x,y grid of a flowering gradient. All arguments match those used in
#' spatial_flowering_sampler(). This is usefull for visualising the underlying phenology of a simulated
#' phenology run from spatial_flowering_sampler().
#'
#' @param xlimits vector Lower and upper bounds of the x axis spatial domain
#' @param ylimits vector Lower and upper bounds of the y axis spatial domin
#' @param cell_size numeric The spacing between points within the x,y domain
#' @param start_doy integer The start day of year (doy) of flowering in the simulation
#' @param flowering_length integer The length of the flowering season
#' @param flowering_gradient numeric The strenght of the flowering gradient
#' @param spatial_gradient_type string Either 'linear', where the spatial gradient is a function of the y axis, or
#' 'non-linear' where it is a function of a random surface. In both cases the day
#' of flowering is shifted by flowering_gradient * beta. Where beta is the y axis
#' for 'linear', or the underlying random surface (scaled 0-1) for 'non-linear'
#' @param seed numeric Seed to allow reproducable results
#'
#' @return data.frame A 3 column data.frame consisting of x,y (coordinates), and onset (the true simulated start day of all points)
#TODO: Example here showing usage with ggplot::geom_raster()
spatial_flowering_grid = function(xlimits = c(0,1),
ylimits = c(0,1),
cell_size = 0.05,
start_doy = 180,
flowering_length = 30,
flowering_gradient = 10/0.1,
spatial_gradient_type = 'linear',
seed=NULL){
full_grid = expand.grid(x=seq(xlimits[1], xlimits[2], by=cell_size),
y=seq(ylimits[1], ylimits[2], by=cell_size))
if(spatial_gradient_type == 'linear'){
full_grid$onset = start_doy + round(full_grid$y * flowering_gradient, 0)
} else if(spatial_gradient_type == 'non-linear'){
if(is.null(seed)){stop('Seed must be set if generating non-linear spatial gradient')}
non_linear_gradient = generate_random_spatial_gradient(x=full_grid$x,y=full_grid$y, seed=seed)
full_grid$onset = start_doy + round(non_linear_gradient * flowering_gradient, 0)
} else {
stop(paste0('unknown spatial gradient: ',spatial_gradient_type))
}
return(full_grid)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.