# Author: Kyle Taylor <kyle.taylor@pljv.org>
# Year : 2017
# Description : various tasks related to attributing and joining spatial datasets
# for modeling with IMBCR datasets
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#' calculate transect effort for a given IMBCR spatial data.frame
#' @export
calc_transect_effort <- function(df = NULL) {
if(inherits(df,"Spatial")){
df <- df@data
}
effort <- sapply(
unique(as.character(df[,transect_fieldname(df)])),
FUN = function(n){
sum(
length(unique(df[ df[,transect_fieldname(df)] == n, ]$point)),
na.rm = T
)
}
)
return(effort)
}
#' calculate time-of-day for a given IMBCR spatial data.frame
#' @export
calc_time_of_day <- function(df=NULL){
df$tod <- as.numeric(df$starttime)
return(df)
}
#' calculate day-of-year for a given IMBCR spatial data.frame
#' @export
calc_day_of_year <- function(df=NULL){
if(inherits(df,"Spatial")){
s <- df
df <- df@data
}
df$doy <- as.numeric(strftime(as.POSIXct(as.Date(as.character(
df$date), "%m/%d/%Y")),format="%j"))
if(exists("s")){
s@data <- df
return(s)
} else {
return(df)
}
}
#' accepts a Spatial*DataFrame object with a distance observation
#' field name. Will attempt to arbitrarily bin the distances into
#' a number of distance bin intervals (specified by breaks). Will
#' reclass raw distances to bin identifier (e.g., distance class 3).
#' @export
calc_dist_bins <- function(df=NULL, p=0.95, breaks=10, force_shoulder=F){
if(inherits(df,"Spatial")){
df <- df@data
}
# Distance at which detections for all birds are considered suspect in IMBCR
EMPIRICAL_DIST_CUTOFF <- 300
# what's our cut-off -- drop distance observations that are outside of the cut-off
# if they are beyond our "difficult to detect" threshold
cutoff <- quantile(df[,distance_fieldname(df)], p=p, na.rm=T)
if (cutoff > EMPIRICAL_DIST_CUTOFF){
df <- df[ is.na(df[,distance_fieldname(df)]) | (df[,distance_fieldname(df)] <= cutoff) , ]
# if our cut-off isn't greater than our "difficult to detect" threshold then don't
# drop any records
} else {
p <- 1
}
# define our bin intervals for a user-specified number of breaks
if (length(breaks) == 1){
if (force_shoulder){
bin_intervals <- c(
0,
as.vector(quantile(
df[,distance_fieldname(df)],
probs = seq(0.5, 1, length.out = breaks-1), na.rm = T)
)
)
} else {
bin_intervals <- seq(
from=0,
to=quantile(df[,distance_fieldname(df)], p=p, na.rm=T),
length.out=breaks
)
}
# or did the user specify explicit breaks for us to use?
} else {
bin_intervals <- breaks
}
# calculate a detections matrix (by transect)
df <- do.call(rbind, lapply(
X = unique(as.character(df[,transect_fieldname(df)])),
FUN = function(x){
matrix(table(
cut(
df[ df[ , transect_fieldname(df)] == x, distance_fieldname(df) ],
breaks = bin_intervals,
labels = paste("dst_class_",1:(length(bin_intervals)-1),sep = "")
)
),
nrow=1
)
}
))
# record names and return to user
return(list(y=df, breaks=bin_intervals))
}
#' hidden function that calculates route centroids and attribute the source data
#' with number of detections for a bird of interest
#' @export
calc_route_centroids <- function(s=NULL, four_letter_code=NULL, use='detections'){
transects <- as.vector(unique(
s$transectnum
))
pts <- lapply(
X=transects,
FUN=function(transect){
transect <- s[s$transectnum == transect, ]
detections <- transect$birdcode == four_letter_code
# use sum of distances > 0 by default
if(grepl(tolower(use), pattern="detections")){
detections <- sum(transect@data[detections, distance_fieldname(s)] > 0, na.rm=T)
# if anything else is specified, try to sum a data.frame by that variable
} else {
detections <- sum(transect@data[detections, use], na.rm=T)
}
pt <- rgeos::gCentroid(transect)
pt$transect <- unique(transect$transectnum)
pt@data[, use] <- detections
return(pt)
})
return(do.call(rbind, pts))
}
#' calculate the latitude and longitude of feature centroids in an input dataset
#' and return an appended Spatial*DataFrame Object with lat/lon entries
calc_lat_lon <- function(s=NULL, proj_str="+init=epsg:4326"){
coords <- as.data.frame(rgeos::gCentroid(sp::spTransform(s,proj_str), byid=T)@coords)
colnames(coords) <- c("lon","lat")
s@data <- cbind(s@data, coords)
return(s)
}
#' hidden function that summarizes imbcr transect covariate data and metadata
#' by year (with list comprehension). This allows you to calculate covariates
#' at the IMBCR station level and then pool (summarize) the observations by
#' transect and year
pool_by_transect_year <- function(x=NULL, df=NULL, breaks=NULL, covs=NULL,
summary_fun=median){
breaks <- length(breaks)
transect_year_summaries <- data.frame()
# summarize focal_transect_year by breaking into counts within
# distance classes and binding effort, year, and covs calculated
# at the transect scale
years <- sort(unique(df[df[,transect_fieldname(df)] == x, "year"]))
for(year in years){
focal_transect_year <- df[
df[,transect_fieldname(df)] == x & df$year == year, ]
# pre-allocate zeros for all bins
distances <- rep(0,(breaks-1))
names(distances) <- 1:(breaks-1)
# build a pivot table of observed bins
dist_classes <- sort(focal_transect_year$dist_class) # drop NA's
dist_classes <- table(as.numeric(dist_classes))
# merge pivot with pre-allocate table and add an NA bin
distances[names(distances) %in% names(dist_classes)] <- dist_classes
distances <- append(distances,
sum(is.na(focal_transect_year$dist_class)))
distances <- as.data.frame(matrix(distances,nrow=1))
names(distances) <- paste("distance_",c(1:(breaks-1),"NA"),sep="")
# summarize each of the covs across the transect-year
summary_covs <- matrix(rep(NA,length(covs)),nrow=1)
colnames(summary_covs) <- covs
for(cov in covs){
# some covs are year-specific; filter accordingly
cov_year <- names(focal_transect_year)[
grepl(names(focal_transect_year),pattern=cov)
]
if(length(cov_year)>1){
cov_year <- cov_year[grepl(cov_year,pattern=as.character(year))]
}
summary_covs[,cov_year] <- summary_fun(
focal_transect_year[,cov_year],
na.rm=T
)
}
# post-process pooled transect-year
# keep most of our vars intact, but drop those that lack meaning at
# the transect scale or that we have summarized above
meta_vars <- colnames(df)[!colnames(df) %in%
c(transect_fieldname(df), "year", "dist_class",
distance_fieldname(df), "timeperiod", "point", "how",
"FID", "visual", "migrant", "cl_count", "cl_id",
"ptvisitzone", "ptvisiteasting", "ptvisitnorthing",
"rank", covs)]
# build our summary transect-year data.frame
focal_transect_year <- cbind(
focal_transect_year[1, meta_vars],
data.frame(transectnum=x, year=year),
distances,
summary_covs
)
# merge into our annual summary table
transect_year_summaries <-
rbind(transect_year_summaries,focal_transect_year)
}
return(transect_year_summaries)
}
#' shorthand vector extraction function that performs a spatial join attributes
#' vector features in x with overlapping features in y. Will automatically
#' reproject to a consistent CRS.
spatial_join <- function(x=NULL, y=NULL, drop=T){
over <- sp::over(
x = sp::spTransform(
x,
sp::CRS(raster::projection(y))
),
y = y
)
x@data <- cbind(x@data, over)
# drop non-overlapping features using the NA values
# attributed to the first column of y@data
if (drop) {
x <- x[ !is.na(x@data[,colnames(y@data)[1]]) , ]
}
return(x)
}
#' generate a square buffer around a single input feature
buffer_grid_unit <- function(x=NULL, row=NULL, radius=1500){
if (!is.null(row)){
x <- x[row, ]
} else {
x <- unlist(x)[1, ]
}
# bogart a centroid for the enclosing grid square
centroid <- rgeos::gCentroid(
x,
byid = F
)@coords
# build a single square polygon
square <- sp::Polygon(
coords = matrix(
data=
c(centroid[1]+radius, # x1
centroid[1]-radius, # x2
centroid[1]-radius, # x3
centroid[1]+radius, # x4
centroid[2]+radius, # y1
centroid[2]+radius, # y2
centroid[2]-radius, # y3
centroid[2]-radius),# y4
ncol=2
),
hole=F
)
# return as a SpatialPolygons object that Raster can comprehend
return(sp::SpatialPolygons(
Srl=list(
sp::Polygons(list(square),
ID=ifelse(is.null(row), 1, row)
)),
proj4string=sp::CRS(raster::projection(x))
))
}
#' testing: generalization of the buffer_grid_units function that uses
#' lapply to buffer across all grid features in an input units
#' data.frame. Parallel implementations for this function are difficult,
#' because you have to push a copy of units onto each node
l_buffer_grid_units <- function(units=NULL, radius=1500){
return(lapply(
X=1:length(units@polygons),
FUN=function(x) buffer_grid_unit(row=x, x=units, radius=radius)
))
}
#' testing: generalization of buffer_grid_unit with parallel comprehension
#' @export
par_buffer_grid_units <- function(units=NULL, radius=1500){
if (!inherits(units, 'list')){
units <- lapply(
X=sp::split(units, 1:length(units@polygons)),
FUN=methods::as,
Class='SpatialPolygons'
)
}
e_cl <- parallel::makeCluster(parallel::detectCores()-1)
parallel::clusterExport(
cl=e_cl,
varlist=c("buffer_grid_unit"),
envir=environment()
)
parallel::clusterCall(
e_cl,
function(x) {
library("rgeos");
library("sp");
library("raster");
}
)
ret <- parallel::parLapply(
cl=e_cl,
X=units,
fun=buffer_grid_unit,
radius=radius
)
parallel::stopCluster(e_cl); rm(e_cl); gc()
return(ret)
}
#' extract an input raster dataset using polygon feature(s). Bulk process list
#' objects using the parallel package. Avoid passing raster files that are
#' loaded on a network filesystem. Prefer local cached data, otherwise parallel
#' may fail unexpectedly.
#' @export
extract_by <- function(polygon=NULL, r=NULL, updatevalue=NA){
if (!inherits(polygon, 'list')){
r <- raster::crop(r, spTransform(polygon, sp::CRS(raster::projection(r))))
return(raster::mask(
x=r,
mask=spTransform(polygon, sp::CRS(raster::projection(r))),
updatevalue=updatevalue
))
}
# simplify our input polygons list to save RAM
if(inherits(polygon[[1]], 'SpatialPolygonsDataFrame')){
polygon <- lapply(polygon, FUN = as, 'SpatialPolygons')
}
# default list comprehension: reproject to a consistent CRS
# and then crop our input raster using a local cluster instance
if(!raster::compareCRS(polygon[[1]],r)){
warning(paste("input polygon= object(s) needed to be reprojected to",
"the CRS of r= this may lead to RAM issues", sep = " "))
polygon <- lapply(
polygon,
FUN=sp::spTransform,
CRSobj=sp::CRS(raster::projection(r))
)
}
e_cl <- parallel::makeCluster(parallel::detectCores()-1)
parallel::clusterExport(cl=e_cl, varlist=c("r","updatevalue"), envir=environment())
ret <- parallel::parLapply(
e_cl,
X=polygon,
fun=function(x){
# save us some ram by cropping our raster
r.local <- try(raster::crop(x=r, y=x));
if(class(r.local) == "try-error"){
return(NA)
}
# now mask out any lurking cell values around our shape
ret <- try(raster::mask(x=r.local, mask=x, updatevalue=updatevalue))
if(class(ret) == "try-error"){
return(NA)
} else {
return(ret)
}
}
)
parallel::stopCluster(cl=e_cl); rm(e_cl); gc();
return(ret)
}
#' testing: chunk out the task of extracting buffers from a raster surface
#' using parallel in the most memory efficient way possible. Useful
#' for very large (>100,000) list of buffer polygons.
extract_by_large_df <- function(polygon=NULL, r=NULL){
usng_extractions <- list();
steps <- round(seq(0, nrow(polygon), length.out=30));
for(i in 1:(length(steps)-1)){
usng_extractions <- append(
usng_extractions,
extract_by(polygon[(steps[i]+1):(steps[i+1]), ], r=r)
)
}
rm(r, polygon, steps); gc();
return(usng_extractions)
}
#' preform a binary reclassification on an input raster, with matching from
#' values substituted with 1's, and non-matching cells set to NA values. The
#' output rasters should be thematically consistent with the patch metric
#' calculations done with the SDMTools package.
binary_reclassify <- function(x=NULL, from=NULL, nomatch=NA){
if (!inherits(x, 'list')){
return(raster::match(x, table=from, nomatch=nomatch) >= 1)
}
return(lapply(lapply(
x,
FUN=raster::match,
table=from,
nomatch=nomatch
),
FUN=raster::calc,
fun=function(x,na.rm=F){x>=1}
))
}
#' shorthand mean calculation function
#' @export
calc_mean <- function(x=NULL, na.rm=T){
# sanity check : is x even valid?
if(is.null(x) || !inherits(x, 'Raster')){
return(NA)
}
# sanity check : are all of our values NA?
if(all(is.na(values(x)))){
return(NA)
}
return(raster::cellStats(x, stat=mean, na.rm=na.rm))
}
#' shorthand total area calculation function
#' @export
calc_total_area <- function(x=NULL, area_of_cell = NULL){
# sanity check : is x even valid?
if(is.null(x) || !inherits(x, 'Raster')){
return(NA)
}
# sanity check : are all of our values NA?
if(all(is.na(values(x)))){
return(NA)
}
if(is.null(area_of_cell)){
warning("no area units specified; will not adjust sum operation")
area_of_cell <- 1
}
ret <- raster::cellStats(x, stat=sum, na.rm=T) *
prod(raster::res(x)) * area_of_cell
if (is.na(ret) | is.null(ret)){
return(0)
} else {
return(ret)
}
}
#' hidden function that will calculate interpatch distance using the raster::distance
#' function
#' @export
calc_interpatch_distance <- function(x=NULL, stat=mean){
# sanity check : is x even valid?
if(is.null(x) || !inherits(x, 'Raster')){
return(NA)
}
# if there are no habitat patches (issolation would theoretically be
# very high), don't try to calc inter-patch distance because distance()
# will throw an error
if (sum(!is.na(raster::values(x))) == 0){
return(9999)
# if the whole raster is pure habitat (i.e., no inter-patches)
} else if (sum(is.na(raster::values(x))) == raster::ncell(x)) {
return(0)
}
else {
ret <- try(stat(raster::values(raster::distance(x)), na.rm = T))
if(class(ret) == "try-error"){
return(NULL)
} else {
return(ret)
}
}
}
#' hidden function that will use SDMTools to calculate the mean patch area
#' of a given cell
#' @export
calc_mean_patch_area <- function(x=NULL, area_of_cell = NULL){
# sanity check : is x even valid?
if(is.null(x) || !inherits(x, 'Raster')){
return(NA)
}
if(is.null(area_of_cell)){
warning("no area_of_cell argument supplied -- area will be calulated as square-kilometers")
area_of_cell <- 10^-6
}
# if there are no habitat patches don't try to calc
if (sum(!is.na(raster::values(x))) == 0){
return(0)
# if the whole raster is a giant patch
} else if (sum(is.na(raster::values(x))) == raster::ncell(x)) {
return(raster::ncell(x) * raster::prod(raster::res(x)) * area_of_cell)
} else {
# default units from SDMTools are m^2
return(SDMTools::ClassStat(x)$mean.patch.area * area_of_cell)
}
}
#' hidden function that will use SDMTools to calculate the number of unique patches
#' on a given raster object
#' @export
calc_patch_count <- function(x=NULL){
# sanity check : is x even valid?
if(is.null(x) || !inherits(x, 'Raster')){
return(NA)
}
# if there are no habitat patches don't try to calc
if (sum(!is.na(raster::values(x))) == 0){
return(0)
# if the whole raster is a giant patch
} else if (sum(is.na(raster::values(x))) == raster::ncell(x)) {
return(1)
} else {
# default units from SDMTools are m^2
return(SDMTools::ClassStat(x)$n.patches)
}
}
#' shorthand function that applies an arbitrary, user-supplied summary
#' statistic function across an input list of raster objects, returning a
#' scalar value attributable to each grid unit. Will optionally
#' reclassify each input raster to a binary using binary_reclassify() if a
#' non-null value is passed to from=
#' @export
par_calc_stat <- function(X=NULL, fun=NULL, from=NULL, backfill_missing_w=0, ...){
stopifnot(inherits(fun, 'function'))
e_cl <- parallel::makeCluster(parallel::detectCores()-1)
# assume our nodes will always need 'raster' and kick-in our
# copy of the binary_reclassify shorthand while we are at it
parallel::clusterExport(
e_cl,
varlist=c('binary_reclassify', ...),
envir=environment()
)
parallel::clusterCall(
e_cl,
function(x) library("raster")
)
# if we have a non-null value for from, assume
# that we want to do a re-classification
ret <- try(unlist(parallel::parLapply(
e_cl,
# assume x is already a binary if from is NULL
X = if(is.null(from)){
X
} else {
# nested parallel call to reclassify our input list, if needed
parallel::parLapply(
cl=e_cl,
X=X,
fun=binary_reclassify,
from=from,
nomatch=0
)
},
fun = function(i, ...) { fun(i, ...) }
)))
if ( class(ret) == "try-error") {
stop(paste("encountered an error trying to process X=;",
"is the function you are specifying compatible with raster",
"objects?", sep=""))
}
# account for any null/na return values from our FUN statistic
if (!is.null(backfill_missing_w) && length(ret < length(X))){
null_ret_values <- seq(1, length(X))[
!seq(1, length(X)) %in% as.numeric(names(ret))
]
null_rets <- rep(backfill_missing_w, length(null_ret_values))
names(null_rets) <- null_ret_values
ret <- c(ret, null_rets)
ret <- ret[order(as.numeric(names(ret)))]
}
# clean-up cluster and return FUN result to user as a vector
parallel::stopCluster(cl=e_cl); rm(e_cl); gc();
return(ret)
}
#' testing: std lapply implementation to benchmark against parLapply
l_calc_stat <- function(x, fun=NULL, from=NULL){
return(lapply(
X=if(!is.null(from)){
lapply(x, FUN=binary_reclassify, from=from)
} else {
x
},
FUN=fun
))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.