R/osh_filter.R

# add filter on nd ref and member and vice versa.
# for geo filter - ask if to apply to all object (or to just apply to nodes)

#' A class used to store filtering conditions for oshex objects
#'
#' An \code{oshExpression} is an S4 class used in \code{\link{oshExList-class}} to store oshex filtering
#' conditions. For more details see \code{\link{oshExList-class}}.
#'
#' @slot condition A named character vector with filtering codntions information.
#' @slot type A character vector specifying the condition type. See \code{\link{oshExList-class}} for more details.
#'

oshExpression <- setClass("oshExpression",
         slots = c("condition" = "character",
                   "type" = "character"))

#' @template oshExList
setClass("oshExList")

#' @template oshExList
oshExListTemp <- setClass("oshExListTemp",
                          slots = c("conditions" = "list"),
                          contains = "oshExList")

#' @template oshExList
oshExListTag <- setClass("oshExListTag",
                         slots = c("conditions" = "list"),
                         contains = "oshExList")

#' @template oshExList
oshExListAttrs <- setClass("oshExListAttrs",
                           slots = c("conditions" = "list"),
                           contains = "oshExList")

#' @template oshExList
oshExListGeo <- setClass("oshExListGeo",
                         slots = c("conditions" = "list"),
                         contains = "oshExList")


# No Documentation
checkExprs <- function(object, exprs){
  unlist(lapply(object, function(z) {
    if(!is.null(z)) {
      if(class(exprs) != "list") exprs <- list(exprs)
      boolean <- unlist(lapply(exprs, function(x) {

        if (x@type %in% c("txt")) {
          col <- grep(x@condition["k"], names(z))
          if(length(col) > 0) {
            if(length(x@condition) > 1) {
              return(length(grep(x@condition["v"], z[col])) > 0)
            } else {
              return(TRUE)
            }
          }
          return(FALSE)

        } else if (x@type %in% "number") {
          col <- grep(x@condition["k"], names(z))
          if(length(col) > 0) {
            if(length(x@condition) > 1) {
              return(eval(parse(text = sprintf("%s%s", z[col], x@condition["v"]))))
            } else {
              return(TRUE)
            }
          } else {
            return(FALSE)
          }
        } # else if ..temp...geog
      }))
      all(boolean)
    } else {
      FALSE
    }
  }))}


# No Documentation
inPoint <- function(coords, query) {
  latCon <- coords[["lat"]] >= query["minlat"] & coords[["lat"]] <= query["maxlat"]
  lonCon <- coords[["lon"]] >= query["minlon"] & coords[["lon"]] <= query["maxlon"]

  return(all(c(latCon, lonCon)))
}

#' @export
#' @template bin_op

`%eq%`   <- function(k, v) {
  args <- as.list(match.call())[-1]
  args <- lapply(args, function(x) {
    if(class(x) %in% c("name", "call")) return(deparse(x))
    return(x)
  })

  if(!args$v %in% ".") args$v <- sprintf("^%s$", args$v)

  args <- unlist(args)
  return(args)
} # for equality

#' @export
#' @template bin_op

`%inc%`  <- function(k, v) {
  args <- as.list(match.call())[-1]
  args <- lapply(args, function(x) {
    if(class(x) %in% c("name", "call")) return(deparse(x))
    return(x)
  })

  args <- unlist(args)
  return(args)
} # for inclusion

#' @export
#' @template bin_op

`%l%`    <- function(k, v) {
  args <- as.list(match.call())[-1]
  args <- lapply(args, function(x) {
    if(class(x) %in% c("name", "call")) return(deparse(x))
    return(x)
  })

  args$v <- sprintf("<%s", args$v)

  args <- unlist(args)
  return(args)
}

#' @export
#' @template bin_op

`%m%`    <- function(k, v) {
  args <- as.list(match.call())[-1]
  args <- lapply(args, function(x) {
    if(class(x) %in% c("name", "call")) return(deparse(x))
    return(x)
  })

  args$v <- sprintf(">%s", args$v)

  args <- unlist(args)
  return(args)
}

#' @export
#' @template bin_op

`%meq%`   <- function(k, v) {
  args <- as.list(match.call())[-1]
  args <- lapply(args, function(x) {
    if(class(x) %in% c("name", "call")) return(deparse(x))
    return(x)
  })

  args$v <- sprintf(">=%s", args$v)

  args <- unlist(args)
  return(args)
}

#' @export
#' @template bin_op

`%leq%`   <- function(k, v) {
  args <- as.list(match.call())[-1]
  args <- lapply(args, function(x) {
    if(class(x) %in% c("name", "call")) return(deparse(x))
    return(x)
  })

  args$v <- sprintf("<=%s", args$v)

  args <- unlist(args)
  return(args)
}

# types: tag.n, tag.t
#' Build an \code{oshExListTag} condition
#'
#' The function is used to create an \code{\link{oshExListTag}} condition that can be used by the \code{\link{osh_filter}} function.
#'
#' @param ... One or more key-value pairs binary operators. See 'Details' below for a description of each operator.
#'
#' @section Details:
#' The \code{tag} function can be used to create one or multiple conditions bundeled together within an \code{\link{oshExList-class}} object.
#' \code{tag} can be also used to filter data based on numeric relations (i.e. inequalities)
#' Use concatenate to create a filter for a key-value pair (see examples).
#'
#' @export
#' @examples
#' \dontrun{
#'   # Filter highway of type primary only
#'   cond <- tag(highway %eq% primary)
#'
#'   # Filter only buildings with house number equal or higher than 30
#'   # Use k %eq% ".", to get all objects with the k tag.
#'   cond <- tag(building %eq% ".", addr:housenumber %meq% 30)
#'   }

tag <- function(...) {
  conds <- list(...)
  oexpl <- oshExListTag()
  oexpl@conditions <- lapply(conds, function(x) {
    tag.type <- "txt"
    if(length(grep(">|<|<=|>=", x)) > 0) tag.type <- "number"
    oexp <- oshExpression()
    oexp@condition <- x
    oexp@type <- tag.type
    return(oexp)
  })
  return(oexpl)
}

#types: attrs
#' Build an \code{oshExListAttrs} condition
#'
#' The function is used to create an \code{\link{oshExListAttrs}} condition that can be used by the \code{\link{osh_filter}} function.
#'
#' @param ... One or more key-value pairs binary operators. See 'Details' below for a description of each operator.
#'
#' @section Details:
#' The \code{attrs} function can be used to create one or multiple conditions bundeled together within an \code{\link{oshExList-class}} object.
#' Currently \code{attrs} can't filter by numeric relations.
#' Use concatenate to create a filter for a key-value pair (see examples).
#' @export
#' @examples
#' \dontrun{
#'   # Filter edits by osm user name
#'   cond <- attrs(k %eq% wew84)
#'
#'   # Use quotation marks for multiple words quarries
#'   cond <- attrs(k %eq% "JumpStart International")
#'   }

attrs <- function(...) {
  conds <- list(...)
  oexpl <- oshExListAttrs()
  oexpl@conditions <- lapply(conds, function(x) {
    tag.type <- "txt"
    oexp <- oshExpression()
    oexp@condition <- x
    oexp@type <- tag.type
    return(oexp)
  })
  return(oexpl)
}

#types: temp
#' Build an \code{oshExListTemp} condition
#'
#' The function is used to create an \code{\link{oshExListTemp}} condition that can be used by the \code{\link{osh_filter}} function.
#' Extracts a temporal subset from an oshex object.
#'
#' @param from A character vector represents the subset's starting date.
#' @param to A character vector represents the subset's ending date.
#' @param format A character vector represents the date format,
#' defaults to: \code{\%d-\%m-\%Y}.
#' @export
#' @examples
#' \dontrun{
#'   # Subset only object that have been editted after the from date.
#'   cond <- temp(from = "1-10-2017", format = "%d-%m-%Y")
#'   }
#'

temp <- function(from = NA, to = NA, format = "%d-%m-%Y") {
  oexpl <- oshExListTemp()
  oexpl@conditions <- mapply(function(x, f) {
    if(!is.na(x)) {
      tag.type <- "time"
      oexp <- oshExpression()
      oexp@condition <- c("operator" = f, "date" = sprintf("as.Date('%s', '%s')", x, format))
      oexp@type <- tag.type
      return(oexp)
    }}, c(from, to), c(">=", "<="))

  names(oexpl@conditions) <- c("from", "to")
  return(oexpl)
}

#types:  geobox
#' Build an \code{oshExListGeo} condition
#'
#' The function is used to create an \code{\link{oshExListGeo}} condition that can be used by the \code{\link{osh_filter}} function.
#' Extracts a geographic subset from an oshex object.
#'
#' @param minlat A numeric minimum latitude.
#' @param minlon A numeric minimum longtitude.
#' @param maxlat A numeric maximum latitude.
#' @param maxlon A numeric maximum longtitude.
#' @export
#' @examples
#' \dontrun{
#'   # Subset only object that have been editted after the from date.
#'   cond <- geobox(minlat = 31.2614, minlon = 34.7880, maxlat = 31.2635, maxlon = 34.8250)
#'   }
#'

geobox <- function(minlat, minlon, maxlat, maxlon) {
  oexp <- oshExpression()

  if(maxlat <= minlat || maxlon <= minlon) stop("max lat/lon must be bigger then min lat/lon")

  oexp@condition <- setNames(as.character(c(minlat, minlon, maxlat, maxlon)),
                             nm = c("minlat", "minlon", "maxlat", "maxlon"))
  oexp@type <- "geo"

  oexpl <- oshExListGeo()
  oexpl@conditions <- list(oexp)

  return(oexpl)
}


# checkExprs as generic

setGeneric("checkExprs", function(object, exprs) {
  standardGeneric("checkExprs")
})


setClassUnion("listORmissing", c("list", "NULL", "missing", "oshExListAttrs", "oshExListTag", "oshExListTemp", "oshExListGeo"))
setClassUnion("logicalORmissing", c("logical", "missing"))

## OSH filter

#' Apply filters to an \code{oshex-class}
#'
#' Filter an \code{oshex-class} based on temporal, tags and attributes expression Lists.
#'
#' @param object An \code{oshex} object as given by \code{\link[OSHEX]{as_oshex}}.
#' @param conditions One condition given as an \code{\link{oshExList-class}} object, or
#' a list with several conditions.
#' @param union A logical to determine if the result is a union of all conditions (if TRUE) or
#' their intersection (if FALSE). Defaults to FALSE.
#' @param full.parse A logical to determine if filter should keep interrelated objects (See 'Details' below).
#' Defaults to TRUE.
#' @section Details:
#' Information in an \code{oshex} object is split by type between \code{nodes}, \code{ways}, and \code{relations}.
#' Nodes function both as independent objects (e.g. bus-stops) or as building blocks of a way (line or polygons).
#' In addition, individual objects play a role as members of a relation object. For example, bus-stops (nodes),
#' and bus-routes (ways) form together a public transport relation.
#' \code{osh_filter} can  account for these interrelated objects by setting \code{full.parse = TRUE}.
#' @export
#' @examples
#' \dontrun{
#' highway <- osh_filter(object = osh, conditions = tag(c(k %eq% highway, v %eq% primary), union = FALSE)
#' }

setGeneric("osh_filter", function(object,
                                  conditions = NULL,
                                  union = FALSE,
                                  full.parse = TRUE){
  standardGeneric("osh_filter")
})




###
#' @describeIn osh_filter Filter an oshex object.
setMethod("osh_filter",
          signature(object = "oshex", conditions = "listORmissing", union = "logicalORmissing",
                    full.parse = "logicalORmissing"), # should get a list of conditions
          function(object, conditions = NULL, union = FALSE, full.parse = TRUE) {
            if(class(conditions) != "list") conditions <- list(conditions)
            slts <- slotNames(object)[!slotNames(object) %in% "meta"]

            filterTable <- setNames(lapply(slts, function(slt) {rep(0, length(slot(object, slt)))}), slts)

            updateMetaTemp <- FALSE
            updateMetaGeo  <- FALSE

            for (cond. in conditions) {
              if (class(cond.) %in% "oshExListTemp") updateMetaTemp <- TRUE
              if (class(cond.) %in% "oshExListGeo")  updateMetaGeo  <- TRUE

              for (slt in slts) {
                if((class(cond.) %in% "oshExListTag" | full.parse) & is.null(slot(object, slt)@tags)) {  # check if tags  null

                    slotsData <- lapply(slot(object, slt)@data, function(x) {
                      tmpData <- XML::xmlApply(x, function(x) {
                        iconv(XML::xmlAttrs(x) ,"UTF-8", "UTF-8")
                      })

                      outData <- list()

                      if(!is.null(tmpData) & "tag" %in% names(tmpData)) {
                        objTags <- t(do.call("rbind", tmpData[names(tmpData) %in% "tag"]))
                        outData <- c(outData, list("objTags" = objTags))

                      }  # tags

                      if(!is.null(tmpData) & "nd" %in% names(tmpData)) {
                        objRefs <- as.character(t(do.call("rbind", tmpData[names(tmpData) %in% "nd"])))
                        outData <- c(outData, list("objRefs" = objRefs))
                      }  # nodes references

                      if(!is.null(tmpData) & "member" %in% names(tmpData)) {
                        objMembers <- do.call("rbind",tmpData[names(tmpData) %in% "member"])
                        outData <- c(outData, list("objMembers" = objMembers))
                      } # members objects

                      return(outData)
                    })

                    slot(object, slt)@tags <- lapply(slotsData, function(l) {
                      if(!is.null(l) && class(l$objTags) %in% "matrix") {
                        setNames(l[names(l) %in% "objTags"]$objTags["v", ],
                                 nm = l[names(l) %in% "objTags"]$objTags["k", ])
                      } else {
                        l$objTags
                      }
                    })

                    if(class(slot(object, slt)) %in% "wayList") {
                      slot(object, slt)@ndrefs <- lapply(slotsData, function(l) {
                        unlist(unname(l[names(l) %in% "objRefs"]))
                        })
                    }

                    if(class(slot(object, slt)) %in% "relationList") {
                      slot(object, slt)@members <- lapply(slotsData, function(l) {
                        unname(l[names(l) %in% "objMembers"])
                        })
                    }
                }

                tmpBoolVect <- osh_filter(slot(object, slt),
                                  conditions = cond.,
                                  full.parse = full.parse)
                tmpBoolVect[is.na(tmpBoolVect)] <- 0

                filterTable[[slt]] <-  filterTable[[slt]] + tmpBoolVect


                if(full.parse) {

                  if(class(cond.) %in% "oshExListGeo" & slt %in% "node") {
                    ndids  <- unique(do.call("rbind", slot(object, slt)@attrs[filterTable[[slt]] == 1])[ , "id"])
                    wayBool <- unlist(lapply(object@way@ndrefs, function(refs) {
                      any(refs %in% ndids)
                    }))
                    wayids <- unique(unlist(do.call("rbind", object@way@attrs[wayBool]))[ , "id"])
                    relBool <- unlist(lapply(object@relation@members, function(refs) {
                      any(refs$ref %in% c(ndids, wayids))
                    }))
                    relids <- unique(unlist(do.call("rbind", object@relation@attrs[relBool]))[ , "id"])

                    filterTable$way <-  filterTable$way + wayBool
                    filterTable$relation <-  filterTable$relation + relBool
                  }


                  if("ndrefs" %in% slotNames(slot(object, slt))) { # find noderefs
                    ndids <- unique(unlist(slot(object, slt)@ndrefs[which(filterTable[[slt]] > 0)]))
                    if(!is.null(ndids)) {
                      ndattrs <- do.call("rbind", object@node@attrs)[ , "id"]
                      ndids <- ndids[!ndids %in% ndattrs[filterTable[["node"]] > 0]]

                      filterTable[["node"]] <- filterTable[["node"]] + (ndattrs %in% ndids)
                    }
                  }

                  if("members" %in% slotNames(slot(object, slt))) { # find memberrefs
                    memids <- unlist(slot(object, slt)@members[which(filterTable[[slt]] > 0)], recursive = FALSE)
                    if(!is.null(memids)) {
                      memids <- do.call("rbind", memids)
                      for (type. in unique(memids[ ,"type"])) {

                        typeattrs <- do.call("rbind", slot(object, type.)@attrs)[ , "id"]
                        memids_ref <- memids[!memids[memids[ ,"type"] %in% type. ,"ref"] %in% typeattrs[filterTable[[type.]] > 0], "ref"]

                        filterTable[[type.]] <- filterTable[[type.]] + typeattrs %in% unique(memids_ref)
                      }

                      if("way" %in% unique(memids[ ,"type"])) { # finde nodes from ref ways found in members
                        addndids <- unique(unlist(object@way@ndrefs[filterTable$way]))
                        addndids <- addndids[!addndids %in% do.call("rbind", object@node@attrs)[ ,"id"][filterTable[["node"]]]]
                        filterTable[["node"]] <- filterTable[["node"]] + do.call("rbind", object@node@attrs)[ , "id"] %in% addndids
                      }
                    }
                }
              }
              }
            }

            for(slt in slts) {

              thresh <- 1
              if (!union) thresh <- length(conditions)

              indsVector <- which(filterTable[[slt]] >= thresh) # if intersect than exact >= length(conditons)

              slot(object, slt)@data <- slot(object, slt)@data[indsVector]

              if(!is.null(slot(object, slt)@tags))  slot(object, slt)@tags <- slot(object, slt)@tags[indsVector]
              if(!is.null(slot(object, slt)@attrs)) slot(object, slt)@attrs <- slot(object, slt)@attrs[indsVector]

              if(slt %in% "way" && !is.null(slot(object, slt)@ndrefs))        slot(object, slt)@ndrefs  <- slot(object, slt)@ndrefs[indsVector]
              if(slt %in% "relation"  && !is.null(slot(object, slt)@members)) slot(object, slt)@members <- slot(object, slt)@members [indsVector]
            }

            if(updateMetaTemp) {
              # update Meta Temp
              if(length(object@node) == 0 & length(object@way) == 0 & length(object@relation) == 0)  {
                object@meta@temporal <- NA
              } else {
                timestamp <- do.call("rbind", lapply(c("node", "way", "relation"), function(slt) {
                  tmp <- as.Date(do.call("rbind", slot(object, slt)@attrs)[ , "timestamp"], format = "%Y-%m-%d")
                  return(c("from" = min(tmp), "to" = max(tmp)))
                }))

                object@meta@temporal <- c("from" = as.Date(min(timestamp[ , "from"]), origin = "1970-01-01"),
                                          "to" = as.Date(max(timestamp[ ,"to"]), origin = "1970-01-01"))
              }
            }

            if(updateMetaGeo) {
              # update Meta Geo
              if(length(object@node) == 0)  {
                object@meta@extent <- NA
              } else {
                tmp <- apply(do.call("rbind", object@node@attrs)[ , c("lat", "lon")], 2, as.numeric)
                object@meta@extent <- setNames(c(min(tmp[ ,"lat"]),
                                                 min(tmp[ ,"lon"]),
                                                 max(tmp[ ,"lat"]),
                                                 max(tmp[ ,"lon"])) ,nm = names(object@meta@extent))
              }
            }

            object@meta@version <- object@meta@version + 1

            return(object)
          })

#"oshExListTag"
#' @describeIn osh_filter Filter the tags slot of an oshElement object.
setMethod("osh_filter",
          signature(object = "oshElement", conditions = "oshExListTag", union = "logicalORmissing",
                    full.parse = "logicalORmissing"),
          function(object, conditions = NULL, union = FALSE, full.parse = TRUE) {

            indsVector <- rep(FALSE, length(object@data))
            threshold  <- 0

            for(condition in conditions@conditions) {
              indsVector <- indsVector + checkExprs(object@tags, condition)
              threshold <- threshold + 1
            }


            indsVector <- indsVector == threshold

            return(indsVector)
          })

#"oshExListAttrs"
#' @describeIn osh_filter Filter the attrs slot of an oshElement object
setMethod("osh_filter", # should also differentiate between text and numeric; may be also added to the class
          signature(object = "oshElement", conditions = "oshExListAttrs", union = "logicalORmissing",
                    full.parse = "logicalORmissing"),
          function(object, conditions = NULL, union = FALSE, full.parse = TRUE) {

            # check if tags  exist
            if(is.null(object@attrs)) {
              object@attrs <- lapply(object@data, function(x) {
                iconv(xmlAttrs(x), "UTF-8", "UTF-8")
              })
            }

            indsVector <- rep(FALSE, length(object@data))
            threshold  <- 0

            for(condition in conditions@conditions) {
              indsVector <- indsVector + checkExprs(object@attrs, condition)
              threshold <- threshold + 1
            }


            indsVector <- indsVector == threshold

            return(indsVector)

          })

#"oshExListTemp"
#' @describeIn osh_filter Filter the timestamp of an oshElement object
setMethod("osh_filter",
          signature(object = "oshElement", conditions = "oshExListTemp", union = "logicalORmissing",
                    full.parse = "logicalORmissing"),
          function(object, conditions = NULL, union = FALSE, full.parse = TRUE) {

            # check if attrs  exist
            if(is.null(object@attrs)) {
              object@attrs <- lapply(object@data, function(x) {
                iconv(xmlAttrs(x), "UTF-8", "UTF-8")
              })
            }

            indsVector <- rep(FALSE, length(object@data))
            threshold  <- 0
            dates <-  unlist(lapply(object@attrs, function(x) {as.Date(x["timestamp"], format = "%Y-%m-%d")}))

            for(condition in conditions@conditions) {
              if(!is.null(condition)) {

                indsVector <- indsVector + do.call(condition@condition["operator"], list(dates, as.numeric(eval(parse(text = condition@condition["date"])))))
                threshold <- threshold + 1

              }

            }


            indsVector <- indsVector == threshold

            return(indsVector)

          })


#"oshExListGeo"
#' @describeIn osh_filter Filters an osh nodeList based on a geographic query.
setMethod("osh_filter",
          signature(object = "nodeList", conditions = "oshExListGeo", union = "logicalORmissing",
                    full.parse = "logicalORmissing"),
          function(object, conditions = NULL, union = FALSE, full.parse = TRUE) {

            indsVector <- rep(FALSE, length(object@data))
            threshold  <- 0

            for(condition in conditions@conditions) {

              coords <- do.call("rbind", lapply(object@attrs, function(row) {
                setNames(as.numeric(row[(c("lat", "lon"))]), c("lat", "lon"))
              }))


              boolList <- unlist(lapply(1:nrow(coords), function(row) {
                inPoint(coords[row, ], query = condition@condition)
              }))


              indsVector <- indsVector + boolList
              threshold <- threshold + 1
            }


            indsVector <- indsVector == threshold

            return(indsVector)
          })


#"oshExListGeo"
#' @describeIn osh_filter Filters an oshElement object based on a geographic query.
setMethod("osh_filter",
          signature(object = "oshElement", conditions = "oshExListGeo", union = "logicalORmissing",
                    full.parse = "logicalORmissing"),
          function(object, conditions = NULL, union = FALSE, full.parse = TRUE) {

            indsVector <- rep(FALSE, length(object@data))
            threshold  <- 0

            for(condition in conditions@conditions) {

              coords <- do.call("rbind", lapply(object@attrs, function(row) {
                setNames(as.numeric(row[(c("lat", "lon"))]), c("lat", "lon"))
              }))


              boolList <- unlist(lapply(1:nrow(coords), function(row) {
                inPoint(coords[row, ], query = condition@condition)
              }))


              indsVector <- indsVector + boolList
              threshold <- threshold + 1
            }


            indsVector <- indsVector == threshold


            return(indsVector)
          })
dof1985/OSHEX documentation built on May 28, 2019, 3:36 p.m.