#' Example
#'
library(rgeos)
library(sp)
library(ggplot2)
set.seed(123)
survey_xlim = c(0,100) # survey range
survey_ylim = c(0,100) # survey range
quad_width = 1 # Sampling unit width
quad_height = 1 # Sampling unit height
quad_x_spacing = 10 # distance between samples in x-axis
quad_y_spacing = 10 # distance between samples in ys-axis
N = 10000 # True population number
# spatial distribution of population true
beta1 = c(4,1)
beta2 = c(1,4)
## create non-overlapping sampling frame
n_col = max(survey_xlim) / quad_x_spacing # number of sampling units on x-axis
n_row = max(survey_ylim) / quad_y_spacing # number of sampling units on y-axis
n = n_row * n_col # n-sampling units
# Define PSU's, random sample these to define survey
n_l = quad_y_spacing / quad_height # l
n_k = quad_x_spacing / quad_width # k
total_sampling_units = n_l * n_row * n_col * n_k
within_rows = rep(1:n_l, n_k)
within_cols = sort(rep(1:n_k, n_l))
# Randomly draw PSU
first_quad = sample(1:length(within_cols), size = 1)
PSU_x = within_cols[first_quad] * quad_width
PSU_y = within_rows[first_quad] * quad_height
upper_x1 = seq(from = PSU_x, to = survey_xlim[2], by = quad_x_spacing)
upper_y1 = seq(from = PSU_y, to = survey_ylim[2], by = quad_y_spacing)
upper_x = rep(upper_x1, length(upper_y1))
upper_y = sort(rep(upper_y1, length(upper_x1)))
y_coord = upper_y - quad_height / 2
x_coord = upper_x - quad_width / 2
plot(x = x_coord, y = y_coord, xlim = survey_xlim, ylim = survey_ylim, xlab = "", ylab = "", pch = 16)
## generate True population
pop_x_coord = rbeta(N, beta1[1], beta1[2]) * max(survey_xlim)
pop_y_coord = rbeta(N, beta2[1], beta2[2]) * max(survey_ylim)
points(pop_x_coord, pop_y_coord, pch = 16, cex = 0.7, col = adjustcolor(col = "blue", alpha.f = 0.1))
## sample pop based on systematic survey
Data = data.frame(height = rep(quad_height, n), width = rep(quad_width, n))
Data$count = NA
counter = 1;
#plot(xpp)
for(i in 1:length(upper_x)) {
#polygon(x = c(upper_x[i] - quad_width, upper_x[i] - quad_width,upper_x[i] ,upper_x[i] ), y = c(upper_y[i] - quad_height,upper_y[i], upper_y[i],upper_y[i] - quad_height), col = "blue")
Data$count[i] = sum(pop_x_coord >= (upper_x[i] - quad_width) & pop_x_coord <= upper_x[i] & pop_y_coord >= (upper_y[i] - quad_height) & pop_y_coord <= upper_y[i])
}
Data$area = Data$height * Data$width
head(Data)
## convert to spatial data frame
sp_data = SpatialPointsDataFrame(coords = cbind(x_coord, y_coord), data = Data) ## can add coordinate reference system here as well
## Create survey polgon
s_poly = Polygon(cbind(x = c(survey_xlim[1],survey_xlim[1],survey_xlim[2],survey_xlim[2]), y = c(survey_ylim[1],survey_ylim[2],survey_ylim[2],survey_ylim[1])))
s_polys = Polygons(list(s_poly),1)
sp_survey_poly = SpatialPolygons(list(s_polys)) ## add coordinater reference here
survey_area = rgeos::gArea(sp_survey_poly)
survey_polygon = sp_survey_poly
## debug Boxlet function
y = sp_data
xy_coords = coordinates(y)
survey_polygon = sp_survey_poly
dimensions = 2
## Striplet sampling data frame
quad_width = 1 # Sampling unit width
quad_height = 1 # Sampling unit height
h_w = quad_width / 2 # half width
h_h = quad_height / 2 # half height
quad_x_spacing = 10 # distance between samples in x-axis
quad_y_spacing = 10 # distance between samples in ys-axis
##
boxlet_per_sample_width = 2
boxlet_per_sample_height = 2
strip_width = quad_width / boxlet_per_sample_width
strip_height = quad_height / boxlet_per_sample_height
## striplet midpoints
strip_x = seq(from = min(survey_xlim) + h_w, to = max(survey_xlim) - h_w, by = strip_width)
strip_y = seq(from = min(survey_ylim) + h_h, to = max(survey_ylim) - h_h, by = strip_height)
strip_df = expand.grid(strip_x, strip_y)
colnames(strip_df) = c("x", "y")
J = nrow(strip_df)
strip_df$area = strip_width * strip_height
## Now link each striplet to a b in 1:B
b_x = seq(from = min(survey_xlim) + h_w, to = quad_x_spacing - h_w, by = strip_width)
b_y = seq(from = min(survey_ylim) + h_h, to = quad_y_spacing - h_h, by = strip_height)
bx1 = sort(rep(b_x, length(b_y)))
by1 = rep(b_y, length(b_x))
B = length(b_x) * length(b_y)
B
## create an indicator matrix that is J x B
## create an effcient function for this
indicator = matrix(0, nrow = J, ncol = B)
for(b in 1:B) {
if(b %% 10 == 0)
cat("b = ", b, "\n")
## midpoints for sampling units with this B
this_b_x = seq(from = bx1[b], to = survey_xlim[2], by = quad_x_spacing)
this_b_y = seq(from = by1[b], to = survey_ylim[2], by = quad_y_spacing)
this_b_x1 = rep(this_b_x, length(this_b_y))
this_b_y1 = sort(rep(this_b_y, length(this_b_x)))
lb_x = this_b_x1 - quad_width / 2
ub_x = this_b_x1 + quad_width / 2
lb_y = this_b_y1 - quad_height / 2
ub_y = this_b_y1 + quad_height / 2
test_ind = rowSums(sapply(1:length(lb_x), function(i) {findInterval(strip_df$x, vec = c(lb_x[i],ub_x[i])) == 1 & findInterval(strip_df$y, vec = c(lb_y[i],ub_y[i])) == 1}))
#plot(this_b_x1, this_b_y1, pch = 16)
#arrow(x0 = this_b_x1, x1 = this_b_x1, y0 = lb_y, y1 = ub_y, col = "red", lwd = 3)
indicator[which(test_ind == 1), b] = 1
}
table(colSums(indicator))
## check this worked
#plot(this_b_x1, this_b_y1, pch = 16, type = "n")
#for(b in 1:B)
# points(strip_df$x[indicator[,b] == 1], strip_df$y[indicator[,b] == 1], col = adjustcolor(col = "blue", alpha.f = 0.2), cex = 0.3, pch = 16)
if(all(c("x","y") %in% colnames(y@data))) {
y@data$x = xy_coords[,1]
y@data$y = xy_coords[,2]
}
boxlet_frame = BoxletEstimatorSamplingFrame(survey_polygon, quad_width, quad_height,
boxlet_per_sample_width = 2, boxlet_per_sample_height = 2, trace = F)
names(boxlet_frame)
nrow(boxlet_frame$boxlet_df)
dim(boxlet_frame$boxlet_indicator_matrix)
table(colSums(boxlet_frame$boxlet_indicator_matrix))
table(colSums(indicator))
## GAM method used in Boxlet
gam_fit = mgcv::gam(formula = count ~ offset(log(area)) + s(x, y, bs = "tp"), data = y, family = poisson(link = log))
#gam_fit = gam::gam(formula = count ~ s(x_mid,y_mid, df = 20), data = Data, family = poisson(link = log), offset = log(area))
## Predict over all boxlets
strip_df$rate = predict(gam_fit, newdata = strip_df, type = "response")
musum <- sum(strip_df$rate)
## predict the striplet distribution X
ggplot(data = strip_df, aes(x = x, y = y, fill = rate)) +
geom_tile()
## Variance estimator
Abvec = as.vector(strip_df$rate %*% indicator)
pj = strip_df$rate / sum(strip_df$rate)
Q_hat = as.vector(pj %*% indicator) ## doesn't need to sum = 1
strip_area = as.vector(strip_df$area %*% indicator)
Lbvec = rep(100, B)
boxplot(cbind(ABvec, Q_hat * musum))
abline(h = sum(Data$count), col = "red", lwd = 2, lty = 2)
## var(n) is variance in observed count, given constant sampling area of units
var_boxlet = (1/B) * sum(musum * Q_hat * (1 - Q_hat) + musum^2 * Q_hat^2) - (1/B * sum(musum * Q_hat))^2
## covnerte var(n) -> survey_area * var(n) / sample_area
sqrt(survey_area^2 * var_boxlet / (n * quad_width * quad_height)^2)
## alternative, if to account for varying sampling areas var(n/a) - here a is a variable
var_boxlet_alt = 1/B * sum((musum * Q_hat * (1 - Q_hat) + musum^2 * Q_hat^2) / strip_area^2) - ((1/B) * sum((musum * Q_hat) / strip_area))^2
sqrt(survey_area^2 * var_boxlet_alt)
## Code from Fewster
var.striplet <- 1/B * sum((Abvec + Abvec^2*(1-1/musum) )/Lbvec^2)- (1/B * sum(Abvec/Lbvec))^2
sqrt(survey_area^2 * var.striplet / quad_width^2)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.