#------------------------------------------------
# Get alpha and beta parameters of inverse-gamma prior on sigma^2 from expectation and variance.
# (not exported)
get_alpha_beta <- function(sigma_mean,sigma_var) {
# define a function that has minimum at correct value of alpha
f_alpha <- function(alpha) {
(sqrt((sigma_var+sigma_mean^2)*(alpha-1))*exp(lgamma(alpha-0.5)-lgamma(alpha))-sigma_mean)^2
}
# search for alpha
alpha <- optim(2,f_alpha,method='Brent',lower=1,upper=1e3)$par
# solve for beta
beta <- (sigma_var+sigma_mean^2)*(alpha-1)
# check that chosen alpha is not at limit of range
if (alpha>(1e3-1))
stop('unable to define prior on sigma for chosen values of sigma_mean and sigma_var. Try increasing the value of sigma_var, or alternatively setting sigma_var=0 (i.e. using fixed-sigma model)')
output <- list(alpha=alpha, beta=beta)
return(output)
}
#------------------------------------------------
#' Convert lat-lon to bearing
#'
#' Calculate bearing and great circle distance between an origin and one or more destination points
#'
#' @param origin_lat latitude of origin point
#' @param origin_lon longitude of origin point
#' @param dest_lat latitude of destination point
#' @param dest_lon longitude of destination point
latlon_to_bearing <- function(origin_lat, origin_lon, dest_lat, dest_lon) {
# convert input arguments to radians
origin_lat <- origin_lat*2*pi/360
dest_lat <- dest_lat*2*pi/360
origin_lon <- origin_lon*2*pi/360
dest_lon <- dest_lon*2*pi/360
delta_lon <- dest_lon-origin_lon
# calculate bearing and great circle distance
bearing <- atan2(sin(delta_lon)*cos(dest_lat), cos(origin_lat)*sin(dest_lat)-sin(origin_lat)*cos(dest_lat)*cos(delta_lon))
gc_angle <- acos(sin(origin_lat)*sin(dest_lat) + cos(origin_lat)*cos(dest_lat)*cos(delta_lon))
# convert bearing from radians to degrees measured clockwise from due north, and convert gc_angle to great circle distance via radius of earth (km)
bearing <- bearing*360/(2*pi)
bearing <- (bearing+360)%%360
earthRad <- 6371
gc_dist <- earthRad*gc_angle
return(list(bearing=bearing, gc_dist=gc_dist))
}
#------------------------------------------------
# Convert lat/lon coordinates to cartesian coordinates by first calculating great circle distance and bearing and then mapping these coordinates into cartesian space. This mapping is relative to the point {centre_lat, centre_lon}, which should be roughly at the midpoint of the observed data.
# (not exported)
latlon_to_cartesian <- function(centre_lat, centre_lon, data_lat, data_lon) {
# calculate bearing and great circle distance of data relative to centre
data_trans <- latlon_to_bearing(centre_lat, centre_lon, data_lat, data_lon)
# use bearing and distance to calculate cartesian coordinates
theta <- data_trans$bearing*2*pi/360
d <- data_trans$gc_dist
data_x <- d*sin(theta)
data_y <- d*cos(theta)
return(list(x=data_x, y=data_y))
}
#------------------------------------------------
# Calculate destination lat/lon given an origin, a bearing and a great circle distance of travel
# Note that bearing should be in degrees relative to due north, and gc_dist should be in units of kilometres
# (not exported)
bearing_to_latlon <- function(origin_lat, origin_lon, bearing, gc_dist) {
# convert origin_lat, origin_lon and bearing from degrees to radians
origin_lat <- origin_lat*2*pi/360
origin_lon <- origin_lon*2*pi/360
bearing <- bearing*2*pi/360
# calculate new lat/lon using great circle distance
earthRad <- 6371
new_lat <- asin(sin(origin_lat)*cos(gc_dist/earthRad) + cos(origin_lat)*sin(gc_dist/earthRad)*cos(bearing))
new_lon <- origin_lon + atan2(sin(bearing)*sin(gc_dist/earthRad)*cos(origin_lat), cos(gc_dist/earthRad)-sin(origin_lat)*sin(new_lat))
# convert new_lat and new_lon from radians to degrees
new_lat <- new_lat*360/(2*pi)
new_lon <- new_lon*360/(2*pi)
return(list(longitude=new_lon, latitude=new_lat))
}
#------------------------------------------------
# Convert cartesian coordinates to lat/lon by using angle and euclidian distance from origin to define a bearing and great-circle distance relative to some centre point.
# (not exported)
cartesian_to_latlon <- function(centre_lat, centre_lon, data_x, data_y) {
# calculate angle and euclidian distance of all points relative to origin
d <- sqrt(data_x^2+data_y^2)
theta <- atan2(data_y,data_x)
# convert theta to bearing relative to due north
theta <- theta*360/(2*pi)
theta <- (90-theta)%%360
# use bearing and great circle distance to calculate lat/lon relative to an origin point
data_trans <- bearing_to_latlon(centre_lat, centre_lon, theta, d)
return(list(longitude=data_trans$longitude, latitude=data_trans$latitude))
}
#------------------------------------------------
# Square-root-inverse-gamma distribution
# If an inverse gamma distribution has shape alpha and rate beta, and hence mean beta/(alpha-1) and variance beta^2/((alpha-1)^2*(alpha-2)), then the square root of this random variable has mean epsilon=sqrt(beta)*gamma(alpha-0.5)/gamma(alpha) and variance v=beta/(alpha-1)-epsilon^2. The variance can also be written purely in terms of alpha and epsilon as follows: v=epsilon^2*(gamma(alpha-1)*gamma(alpha)/gamma(alpha-0.5)^2 - 1).
# (not exported)
dRIG <- function(x,alpha,beta,log=FALSE) {
output <- log(2)+alpha*log(beta)-lgamma(alpha)-(2*alpha+1)*log(x)-beta/x^2
if (!log)
output <- exp(output)
return(output)
}
#------------------------------------------------
# Scaled Student's t distribution. Used in kernel density smoothing.
# (not exported)
dts <- function(x, df, scale=1, log=FALSE) {
ret <- lgamma((df+1)/2)-lgamma(df/2)-0.5*log(pi*df*scale^2) - ((df+1)/2)*log(1 + x^2/(df*scale^2))
if (!log) { ret <- exp(ret) }
return(ret)
}
#------------------------------------------------
#' Draw from Dirichlet process mixture model
#'
#' Provides random draws from a 2D spatial Dirichlet process mixture model. Both sigma and tau are defined in units of kilometres, representing the average distance that an observations lies from a source, and the average distance that a source lies from the centre point respectively. In contrast, the location of the centre point and the locations of the final output crime sites are defined in units of degrees lat/long to facilitate spatial analysis.
#'
#' Output includes the lat/long locations of the points drawn from the DPM model, along with the underlying group allocation (i.e. which points belong to which sources) and the lat/long locations of the sources.
#'
#' @param n number of draws.
#' @param sigma standard deviation of dispersal distribution, in units of kilometres.
#' @param tau standard deviation of prior on source locations (i.e. average distances of sources from centre point), in units of kilometres.
#' @param priorMean_longitude location of prior mean on source locations in degrees longitude.
#' @param priorMean_latitude location of prior mean on source locations in degrees latitude.
#' @param alpha concentration parameter of Dirichlet process model. Large alpha implies many distinct sources, while small alpha implies only a few sources.
#'
#' @export
#' @examples
#' # produces clusters of points from sources centred on QMUL
#' rDPM(50, priorMean_longitude = -0.04217491, priorMean_latitude = 51.5235505,
#' alpha=1, sigma=1, tau=3)
#' # same, but increasing alpha to generate more clusters
#' rDPM(50, priorMean_longitude = -0.04217491, priorMean_latitude = 51.5235505,
#' alpha=5, sigma=1, tau=3)
rDPM <- function(n, sigma=1, tau=10, priorMean_longitude=-0.1277, priorMean_latitude=51.5074, alpha=1) {
# force n to be a scalar integer
n <- floor(n[1])
# draw grouping
group <- rep(1,n)
freqs <- 1
if (n>1) {
for (i in 2:n) {
group[i] <- sample(length(freqs)+1,1,prob=c(freqs,alpha))
if (group[i]>length(freqs))
freqs <- c(freqs,0)
freqs[group[i]] <- freqs[group[i]] + 1
}
}
group <- sort(group)
# draw source locations
source <- rnorm_sphere(length(freqs), priorMean_latitude, priorMean_longitude, tau)
# draw crime locations from sources
crime <- rnorm_sphere(n, source$latitude[group], source$longitude[group], sigma)
# return results
return(list(longitude=crime$longitude, latitude=crime$latitude, group=group, source_lon=source$longitude, source_lat=source$latitude))
}
#------------------------------------------------
# Draw from normal distribution converted to spherical coordinate system. Points are first drawn from an ordinary cartesian 2D normal distribution. The distances to points are then assumed to be great circle distances, and are combined with a random bearing from the point {centre_lat, centre_lon} to produce a final set of lat/lon points. Note that this is not a truly spherical normal distribution, as the domain of the distribution is not the sphere - rather it is a transformation from one coordinate system to another that is satisfactory when the curvature of the sphere is not severe.
# (not exported)
rnorm_sphere <- function(n, centre_lat, centre_lon, sigma) {
x <- rnorm(n,sd=sigma)
y <- rnorm(n,sd=sigma)
output <- cartesian_to_latlon(centre_lat, centre_lon, x, y)
return(output)
}
#------------------------------------------------
# Bin values in two dimensions
# (not exported)
bin2D <- function(x, y, x_breaks, y_breaks) {
# get number of breaks in each dimension
nx <- length(x_breaks)
ny <- length(y_breaks)
# create table of binned values
tab1 <- table(findInterval(x, x_breaks), findInterval(y, y_breaks))
# convert to dataframe and force numeric
df1 <- as.data.frame(tab1, stringsAsFactors=FALSE)
names(df1) <- c("x", "y", "count")
df1$x <- as.numeric(df1$x)
df1$y <- as.numeric(df1$y)
# subset to within breaks range
df2 <- subset(df1, x>0 & x<nx & y>0 & y<ny)
# fill in matrix
mat1 <- matrix(0,ny-1,nx-1)
mat1[cbind(df2$y, df2$x)] <- df2$count
# calculate cell midpoints
x_mids <- (x_breaks[-1]+x_breaks[-nx])/2
y_mids <- (y_breaks[-1]+y_breaks[-ny])/2
# return output as list
output <- list(x_mids=x_mids, y_mids=y_mids, z=mat1)
return(output)
}
#------------------------------------------------
#' Produce a smooth surface using 2D kernel density smoothing
#'
#' Takes lon/lat coordinates, bins in two dimensions, and smooths using kernel density smoothing. Kernel densities are computed using the fast Fourier transform method, which is many times faster than simple summation when using a large number of points. Each Kernel is student's-t distributed with 3 degrees of freedom, and scaled by the bandwidth lambda. If lambda is set to \code{NULL} then the optimal value of lambda is chosen automatically using the leave-one-out maximum likelihood method.
#'
#' @param longitude longitude of input points
#' @param latitude latitude of input points
#' @param breaks_lon positions of longitude breaks
#' @param breaks_lat positions of latitude breaks
#' @param lambda bandwidth to use in posterior smoothing. If NULL then optimal bandwidth is chosen automatically by maximum-likelihood.
#'
#' @references Barnard, Etienne. "Maximum leave-one-out likelihood for kernel density estimation." Proceedings of the Twenty-First Annual Symposium of the Pattern Recognition Association of South Africa. 2010.
#' @export
#' @examples
#' # create smooth surface based on raw LondonExample_crimes
#' breaks_lon <- seq(-0.25,0.05,l=101)
#' breaks_lat <- seq(51.45,51.6,l=101)
#' m <- geoSmooth(LondonExample_crimes$longitude, LondonExample_crimes$latitude,
#' breaks_lon, breaks_lat)
#'
#' # produce image plot of surface and overlay points
#' image(breaks_lon, breaks_lat, t(m), xlab="longitude", ylab="latitude")
#' points(LondonExample_crimes$longitude, LondonExample_crimes$latitude)
geoSmooth <- function(longitude, latitude, breaks_lon, breaks_lat, lambda=NULL) {
# get properties of cells in each dimension
cells_lon <- length(breaks_lon) - 1
cells_lat <- length(breaks_lat) - 1
centre_lon <- mean(breaks_lon)
centre_lat <- mean(breaks_lat)
cellSize_lon <- diff(breaks_lon[1:2])
cellSize_lat <- diff(breaks_lat[1:2])
# bin lon/lat values in two dimensions and check that at least one value in chosen region
surface_raw <- bin2D(longitude, latitude, breaks_lon, breaks_lat)$z
if (all(surface_raw==0)) { stop('chosen lat/long window contains no posterior draws') }
# temporarily add guard rail to surface to avoid Fourier series bleeding round edges
railSize_lon <- cells_lon
railSize_lat <- cells_lat
railMat_lon <- matrix(0, cells_lat, railSize_lon)
railMat_lat <- matrix(0, railSize_lat, cells_lon + 2*railSize_lon)
surface_normalised <- surface_raw/sum(surface_raw)
surface_normalised <- cbind(railMat_lon, surface_normalised, railMat_lon)
surface_normalised <- rbind(railMat_lat, surface_normalised, railMat_lat)
# calculate Fourier transform of posterior surface
f1 = fftw2d(surface_normalised)
# calculate x and y size of one cell in cartesian space. Because of transformation, this size will technically be different for each cell, but use centre of space to get a middling value
cellSize_trans <- latlon_to_cartesian(centre_lat, centre_lon, centre_lat + cellSize_lat, centre_lon + cellSize_lon)
cellSize_trans_lon <- cellSize_trans$x
cellSize_trans_lat <- cellSize_trans$y
# produce surface over which kernel will be calculated. This surface wraps around in both x and y (i.e. the kernel is actually defined over a torus).
kernel_lon <- cellSize_trans_lon * c(0:floor(ncol(surface_normalised)/2), floor((ncol(surface_normalised) - 1)/2):1)
kernel_lat <- cellSize_trans_lat * c(0:floor(nrow(surface_normalised)/2), floor((nrow(surface_normalised) - 1)/2):1)
kernel_lon_mat <- outer(rep(1,length(kernel_lat)), kernel_lon)
kernel_lat_mat <- outer(kernel_lat, rep(1,length(kernel_lon)))
kernel_s_mat <- sqrt(kernel_lon_mat^2 + kernel_lat_mat^2)
# set lambda (bandwidth) range to be explored
if (is.null(lambda)) {
lambda_step <- min(cellSize_trans_lon, cellSize_trans_lat)/5
lambda_vec <- lambda_step*(1:100)
} else {
lambda_vec <- lambda
}
# loop through range of values of lambda
cat('Smoothing posterior surface')
flush.console()
logLike <- -Inf
for (i in 1:length(lambda_vec)) {
# print dots to screen
if (i>1) {
cat(".")
flush.console()
}
# calculate Fourier transform of kernel
lambda_this <- lambda_vec[i]
kernel <- dts(kernel_s_mat, df=3, scale=lambda_this)
f2 = fftw2d(kernel)
# combine Fourier transformed surfaces and take inverse. f4 will ultimately become the main surface of interest.
f3 = f1*f2
f4 = Re(fftw2d(f3,inverse=T))/length(surface_normalised)
# subtract from f4 the probability density of each point measured from itself. In other words, move towards a leave-one-out kernel density method
f5 <- f4 - surface_normalised*dts(0, df=3, scale=lambda_this)
f5[f5<0] <- 0
f5 <- f5/sum(f4)
# calculate leave-one-out log-likelihood at each point on surface
f6 <- surface_normalised*log(f5)
# break if total log-likelihood is at a local maximum
if (sum(f6,na.rm=T)<logLike) { break() }
# otherwise update logLike
logLike <- sum(f6,na.rm=T)
}
# report chosen value of lambda
if (is.null(lambda)) {
cat(paste('\nmaximum likelihood lambda = ', round(lambda_this,3), sep=''))
}
# remove guard rail
f4 <- f4[,(railSize_lon+1):(ncol(f4)-railSize_lon)]
f4 <- f4[(railSize_lat+1):(nrow(f4)-railSize_lat),]
# return surface
return(f4)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.