R/pointsInPolygons.R
In GmAMisc: 'Gianmarco Alberti' Miscellaneous

Documented in pointsInPolygons

#'R function to test points-in-polygons relationship
#'
#'The function allows to test:\cr -scenario a:\cr if there is a significant spatial association
#'between a set of points and a set of polygons, in terms of points falling within the polygons.
#'In other
#'words, it aims at testing whether a set of points falls inside a set of polygons more often than
#'would be expected by chance. The basic assumption is that the polygons are completely contained
#'within the study plot. If the shapefile (of polygon type) representing the study plot is not
#'provided, the calculations use the bounding polygon based on the union the convex hulls of the
#'point and of the polygon feature.\cr -scenario b:\cr if the distribution of points within a set of
#'polygons totally covering the study area can be considered random, or if the observed points count
#'for each polygon is larger or smaller than expected. P values are also reported.\cr
#'
#'The computations relative to scenario "a" are based on the 'dbinom() and 'pbinom()' functions. The
#'probability of observed count within polygons is \eqn{dbinom(x, size=n.of.points, prob=p)}, where
#''x'
#'is the observed number of points within polygons, 'n.of.points' is the total number of points, and
#''p' is the probability that a single point will be found within a polygon, which is equal to the
#'ratio between the area of the polygons and the total area of the study plot. The probability that
#'x or fewer points will be found within the polygons is \eqn{pbinom(x, size=n.of.points, prob=p)}.
#'
#'The calculations relative to the scenario "b" are again based on the binomial distribution: the
#'probability of the observed counts is \eqn{dbinom(x, size=n.of.points, prob=p)}, where 'x' is the
#'observed number of points within a given polygon, 'n.of.points' is the total number of points, and
#''p' is equal to the size of each polygon relative to sum of the polygons' area. The probability
#'that x or fewer points will be found within a given polygon is \eqn{pbinom(x, size=n.of.points,
#'prob=p)}.\cr
#'
#'@param point.feat Feature (of point type; SpatialPointsDataFrame class) whose spatial association
#'  with the polygons has to be assessed.
#'@param polyg.feat Feature (polygon type; SpatialPolygonsDataFrame) in relation to which the
#'  spatial association of the points has to be assessed.
#'@param studyplot Shapefile (of polygon type; SpatialPolygonsDataFrame) representing the study
#'  area; if not provided, the study area is internally worked out as the bounding polygon based on
#'  the union the convex hulls of the point and of the polygon feature.
#'@param scenario Select one of the two types of analysis available ("a" or "b").
#'@param buffer Add a buffer to the convex hull of the study area (0 by default); the unit depends
#'  upon the units of the input data.
#'@param cex.text Modify the size of the labels in the plot produced by the 'scenario b' option.
#'
#' @return For scenario "a" the function produces a plot of the points and polygons (plus the study
#' area), and relevant information are reported at the bottom of the chart itself.\cr
#'
#' A list is also
#'returned, containing what follows:\cr -$Polygons' area;\cr -$Study area's area;\cr -$Total # of
#'points;\cr -$Observed # of points in polygons;\cr -$Expected # of points in polygons;\cr -$Exact
#'probability of observed count within polygons;\cr -$Probability of <= observed count within
#'polygons;\cr -$Probability of >= observed count within polygons.
#'
#'For scenario "b" the function returns a plot showing the polygons plus the dots; in each polygon
#'the observed and expected counts are reported, and the p-value of the observed count is
#'indicated.\cr
#'
#'A matrix is also returned, containing what follows:\cr -polygons' area;\cr
#'-percentage area (size of each polygon relative to sum of the polygons' area; it corresponds to
#'the probability (p) fed into the binomial distribution function);\cr -observed number of
#'points;\cr -expected number of points;\cr -probability of observed counts;\cr -probability of
#'observed counts <= than expected;\cr -probability of observed counts >= than expected.\cr
#'
#'@keywords pointsInPolygons
#'
#'@export
#'
#'@importFrom rgeos gContains
#'
#' @examples
#' data(points)
#' data(polygons)
#'
#' result <- pointsInPolygons(points, polygons, scenario="a")
#'
#' data(events)
#' data(thiessenpolyg)
#'
#' result <- pointsInPolygons(events, thiessenpolyg, scenario="b")
#'
#'@seealso \code{\link{pointsToPointsTess}} ,
#'  \code{\link[stats]{dbinom}} , \code{\link[stats]{pbinom}}
#'
pointsInPolygons <- function(point.feat, polyg.feat, studyplot=NULL, scenario, buffer=0, cex.text=0.7){
  options(scipen=999)
  if(scenario=="a"){
    if(is.null(studyplot)==TRUE){
      region <- gConvexHull(union(gConvexHull(point.feat), gConvexHull(polyg.feat)))  #'union()' requires 'raster'; build the convex hull of the union of the convex hulls of the two features
    } else {
      region <- studyplot
    }
    ch <- gBuffer(region, width=buffer)                                                              #add a buffer to the convex hull, with width is set by the 'buffer' parameter; the unit depends upon the units of the input data
    ch.area <- gArea(ch)
    bypoly.area <- gArea(polyg.feat, byid=TRUE)                                                      #area of each single polygon
    n.of.points <- length(point.feat)                                                                       #total number of points
    p <- sum(bypoly.area)/ch.area                                                                           #probability that a single point will be found within a polygon
    q <- 1-p                                                                                                #probability that a point will be found outside of the polygons
    exp.n.points <- round(n.of.points * p, 2)                                                               #for n total points, expected number of points within the polygons
    x <- sum(colSums(gContains(polyg.feat, point.feat, byid = TRUE)))                                #observed number of points within polygons
    p.x <- round(dbinom(x, size=n.of.points, prob=p),5)                                                     #probability of observed count within polygons
    p.lessORequal <- round(pbinom(x, size=n.of.points, prob=p),5)                                           #cumulative probability from 1 to x
    p.largerORequal <- round(1-p.lessORequal,5)
    raster::plot(ch, col=NA, border="red", lty=2, main="Points-in-Polygons relationship", cex.main=0.9, sub=paste0("Polygons' area: ", round(sum(bypoly.area),3),"; Study area's area: ", round(ch.area,3), "\nTotal number of points: ", n.of.points, "; Obs. number of points in polygons: ", x, "; Expect. number of points within polygons: ", exp.n.points, "\nExact prob. of obs. count within polygons: ", p.x, "; Prob. of <= obs. count within polygons: ", p.lessORequal, "; Prob. of >= obs. count within polygons: ", p.largerORequal ), cex.sub=0.80)
    raster::plot(polyg.feat, add=TRUE)
    raster::plot(point.feat, pch=20, col="#00000088", add=TRUE)
    results <- list("Polygons' area"=round(sum(bypoly.area),3), "Study area's area"=round(ch.area,3), "Total # of points"=n.of.points, "Observed # of points in polygons"=x , "Expected # of points in polygons" = exp.n.points, "Exact probability of observed count within polygons"=p.x, "Probability of <= observed count within polygons"=p.lessORequal, "Probability of >= observed count within polygons"=p.largerORequal)
    return(results) } else {
      bypoly.area <- gArea(polyg.feat, byid = TRUE)
      perc.area <- bypoly.area / sum(bypoly.area)
      tot.n.points <- length(point.feat)
      bypoly.points <- colSums(gContains(polyg.feat, point.feat, byid = TRUE))
      exp.n.points <- tot.n.points * perc.area
      p.obs <- dbinom(bypoly.points, tot.n.points, perc.area)
      pLessOrEqual <- pbinom(bypoly.points, tot.n.points, perc.area)
      pLargerOrEqual <- 1 - pLessOrEqual
      df <- matrix(nrow=length(polyg.feat), ncol=7)
      colnames(df)<- c("polygon.area", "%area", "obs.n.points", "exp.n.points", "p.obs", "p.<=exp", "p.>=exp")
      df[,1] <- bypoly.area
      df[,2] <- round(perc.area,2)
      df[,3] <- bypoly.points
      df[,4] <- round(exp.n.points,2)
      df[,5] <- round(p.obs,5)
      df[,6] <- round(pLessOrEqual,5)
      df[,7] <- round(pLargerOrEqual,5)
      raster::plot(polyg.feat, main="Points-Polygons relationship", sub="By polygon observed/expected counts, and p-value of obs. counts", cex.main=0.9, cex.sub=0.8)
      raster::plot(point.feat, pch=20, col="#ff000088", add=TRUE)
      raster::plot(polyg.feat, add=TRUE)
      p.to.report <- ifelse(p.obs < 0.001, "< 0.001", ifelse(p.obs < 0.01, "< 0.01", ifelse(p.obs < 0.05, "< 0.05", round(p.obs, 3))))
      labls <- paste0(p.to.report, "\n(obs: ", df[,3],"; exp: ", round(df[,4],2), ")")
      text(x=coordinates(polyg.feat)[,1], y=coordinates(polyg.feat)[,2], labels=labls, cex=cex.text)
      return(df)
    }
}