R/generateSites.R

Defines functions generateSites

Documented in generateSites

#' Simulate observed and/or predicted points on an empty SpatialStreamNetwork.
#'
#' @description
#'   
#' This function takes an empty SpatialStreamNetwork and creates a set of proposed design points on the stream network. 
#' 
#' @param ssn an object of class SpatialStreamNetwork
#' @param obsDesign a design function from the package SSN.
#' @param predDesign a design function from the package SSN. By default, no prediction points are generated using the function \code{noPoints}. 
#' @param o.write a logical indicating whether any existing observed and/or predicted sites in \code{ssn@path} should be overwritten. Defaults to \code{FALSE}. 
#' @return An object of class SpatialStreamNetwork
#' 
#' @details
#'
#' This function works in a similar fashion to \code{createSSN} from the package SSN. This function uses one of the design functions provided in SSN to generate a set of sites on an empty SpatialStreamNetwork. The differences with this function are that (1) this function assumes the stream network already exists and concentrates only on generating the site locations and associated data, and (2) locates the sites in physical space with physical coordinates. The function \code{createSSN} only locates sites on an abstract network which is not associated with a coordinate reference system, hence limiting its applications to real data.
#' 
#' The new observed and/or predicted sites will appear as shapefiles in \code{ssn@path}.  
#' 
#' @examples 
#' 
#' # SSN with small random design
#' s <- createSSN(100, binomialDesign(20), 
#' path = tempPath("example02.ssn"), importToR = TRUE)
#' 
#' # Overwrite design with a systematic design
#' s2 <- generateSites(s, systematicDesign(0.5), o.write = TRUE) 
#' 
#' @export
generateSites <- function(ssn, obsDesign, predDesign = SSN:::noPoints, o.write = FALSE){
  
  # Check inputs
  if(class(ssn)[1] != "SpatialStreamNetwork"){
    stop("The argument ssn must be an object of class SpatialStreamNetwork.")
  }
  path <- ssn@path
  exst <- length(dir(path, "sites")) > 0
  if(!o.write & exst){
    stop("The argument o.write is false but a set of observed sites (possibly also prediction sites) already exists in the ssn path.")
  }
  if(o.write & exst){
    message("Please note that the existing observed sites (possibly also prediction sites) will be overwritten by the sites generated by this function.")
  }
  
  # Grab current working directory
  old.wd <- getwd()
  on.exit(setwd(old.wd)) # Make sure this happens even if process dies.
  
  # Set new working directory to ssn path for convenience
  setwd(path)
  
  # Now define some anonymous functions to use throughout:
  pmf <- function(binary_id1, binary_id2) {
    min_len <- min(nchar(binary_id1), nchar(binary_id2))
    for (j in 1:min_len) {
      if (substr(binary_id1, j, j) != substr(binary_id2,j, j)) 
        return(j - 1)
    }
    return(min_len)
  } # Used to create the distance matrix for the edges
  f <- function(row){
    rid <- as.integer(row[1])
    ratio <- as.numeric(row[2])
    location <- locatePointOnEdge(ssn, rid, ratio) 
    ret <- c(
      location[1:2], 
      edge.updist.net[names(edge.updist.net) == as.character(rid)] - location[3]
    )
    return(ret)
  } # Used to assign physical coordinates to points
  
  ##### THE HAPPY PATH -- EVERYTHING BEGINS HERE
  
  # Number of networks in ssn
  n.n <- nnetwork(ssn)
  
  # Initialise empty lists for everything to store info needed to create sites
  edges <- tree.graphs <- locations <- rids <- edge.lengths <- edge.updist <- dist.mats <- vector("list", n.n)
  
  # Establish SQL connections to the binaryid.db 
  drvr <- dbDriver("SQLite")
  conn <- dbConnect(drvr, "binaryID.db")
  on.exit(dbDisconnect(conn), add = TRUE)
  
  # Coerce netid columns to numeric to begin with...
  ssn@network.line.coords$NetworkID <- anum(ssn@network.line.coords$NetworkID)
  ssn@data$netID <- anum(ssn@data$netID)
  
  # Construct distance matrices for edges in network
  for(netid in 1:n.n){
    # Extract table from database and reorder rows
    bid.tab <- dbReadTable(conn, paste0("net", netid))
    bid.tab <- bid.tab[order(as.numeric(bid.tab$rid)), ]
    # Create empty distance matrix
    nr <- nrow(bid.tab)
    dist.mat.net <- matrix(0, nr, nr)
    colnames(dist.mat.net) <- rownames(dist.mat.net) <- bid.tab$rid
    # Extract out data for these edges, including RID and NETID and UPSTREAM DISTANCE
    edges.net <- subset(ssn@network.line.coords, NetworkID == netid)
    edge.updist.net <- edges.net$DistanceUpstream
    edge.rid.net <- edges.net$rid
    # Derive network distances for edges by partial string matching on the BIDs
    bids <- bid.tab$binaryID
    for(i in 1:nrow(bid.tab)){
      c.bid <- bid.tab$binaryID[i]
      c.rid <- bid.tab$red[i]
      c.upd <- edge.updist.net[which(edge.rid.net == c.rid)]
      m.chr <- sapply(
        bids,
        pmf,
        c.bid
      )
      m.sbs <- substr(bids, 1, m.chr)
      m.ind <- match(m.sbs, bids)
      if(any(is.na(m.ind))){
        stop("Internal error!")
      }
      d.upd <- edge.updist.net[m.ind]
      dist.mat.net[c.rid, ] <- pmax(c.upd - d.upd, rep(0, length(d.upd)))
    }
    ridx <- match(bid.tab$rid, edges.net$SegmentID)
    dist.mat.net <- dist.mat.net[ridx, ridx]
    dist.mats[[netid]] <- dist.mat.net + t(dist.mat.net)
  }
  
  # Turn the SSN into a graph so that the SSN design functions can operate on them
  graph.info <- readshpnw(ssn)
  graphs <- nel2igraph(graph.info[[2]], graph.info[[3]], eadf = graph.info[[5]], Directed = TRUE)
  
  # Extract information from graphs to make the design functions usable
  for(i in 1:n.n){
    tree.graphs[[i]] <- subgraph.edges(graphs, which(E(graphs)$netID == i))
    attributes.i <- E(tree.graphs[[i]])
    edge.lengths[[i]] <- attributes.i$Length # MAY NOT WORK FOR REAL SHAPEFILES ... NEED A SOLUTION
    rids[[i]] <- attributes.i$rid
    edge.updist[[i]] <- attributes.i$upDist
    names(edge.updist[[i]]) <- rids[[i]]
    locations[[i]] <- layout.auto(tree.graphs[[i]])
    edges[[i]] <- get.edgelist(tree.graphs[[i]])
  }
  
  # Use the design functions to generate observed and predicted sites
  obs.sites <- obsDesign(tree.graphs, edge.lengths, locations,
                          edge.updist, dist.mats) 
  pred.sites <- predDesign(tree.graphs, edge.lengths, locations,
                           edge.updist, dist.mats)
  
  # Get info on number of sites generated and adjust locIDs accordingly
  max.locID <- max(unlist(lapply(obs.sites, function(x) max(x$locID))))
  for (i in 1:length(pred.sites)) {
    pred.sites[[i]]$locID <- pred.sites[[i]]$locID + max.locID
  }
  n.obs.sites <- unlist(lapply(obs.sites, function(x) return(dim(x)[1])))
  n.pred.sites <- unlist(lapply(pred.sites, function(x) return(dim(x)[1])))
  cum.pids <- 0
  
  # Create empty data frames for obs and preds sites
  obs.site.data <- obs.loc.data <- data.frame()
  pred.site.data <- pred.loc.data <- data.frame()
  
  # Attach physical coordinates to all points, i.e. map from graphical to physical space
  for (netid in 1:n.n) {
    edge.lengths.net <- edge.lengths[[netid]]
    edges.net <- edges[[netid]]
    edge.updist.net <- edge.updist[[netid]]
    locations.net <- locations[[netid]]
    n.locations.net <- n.obs.sites[netid] + n.pred.sites[netid]
    obs.pid <- 1:n.obs.sites[netid] + cum.pids
    pred.pid <- 1:n.pred.sites[netid] + cum.pids
    rids.net <- rids[[netid]]
    preds.net <- pred.sites[[netid]]
    obs.net <- obs.sites[[netid]] 
    obs.location.data <- data.frame(
      rid = obs.net$edge,
      ratio = obs.net$ratio, 
      locID = obs.net$locID,
      stringsAsFactors = FALSE
    )
    pred.location.data <- data.frame(
      rid = preds.net$edge,
      ratio = preds.net$ratio,
      locID = preds.net$locID,
      stringsAsFactors = FALSE
    )
    # Several cases
    # 1. No locations in network
    # 2. Obs only
    # 3. Preds only
    # 4. Both obs and preds
    if(n.locations.net == 0){
      next()
    } else {
      n.obs.net <- nrow(obs.location.data)
      n.pred.net <- nrow(pred.location.data)
      if(n.obs.net > 0){
        obs.location.data.net <- t(apply(obs.location.data, 1, f))
        colnames(obs.location.data.net) <- c("NEAR_X", "NEAR_Y", "upDist")
        obs.data.net <- data.frame(
          locID = obs.location.data[,"locID"],
          upDist = obs.location.data.net[,"upDist"],
          pid = obs.pid,
          netID = rep(netid, nrow(obs.location.data.net)),
          rid = obs.location.data[,"rid"],
          ratio = obs.location.data[,"ratio"],
          stringsAsFactors = FALSE
        )
        if(ncol(obs.net) > 3){
          obs.data.net <- cbind(
            obs.data.net,
            obs.net[, 4]
          )
          names(obs.data.net)[7] <- names(obs.net)[4]
        }
        row.names(obs.data.net) <- row.names(obs.location.data.net) <- obs.pid
        obs.site.data <- rbind(obs.site.data, obs.data.net)
        obs.loc.data <- rbind(obs.loc.data, obs.location.data.net)
      }
      if(n.pred.net > 0){
        pred.location.data.net <- t(apply(pred.location.data, 1, f))
        colnames(pred.location.data.net) <- c("NEAR_X", "NEAR_Y", "upDist")
        pred.data.net <- data.frame(
          locID = pred.location.data[,"locID"],
          upDist = pred.location.data.net[,"upDist"],
          pid = pred.pid,
          netID = rep(netid, nrow(pred.location.data.net)),
          rid = pred.location.data[,"rid"],
          ratio = pred.location.data[,"ratio"],
          stringsAsFactors = FALSE
        )
        if(ncol(preds.net) > 3){
          pred.data.net <- cbind(
            pred.data.net,
            preds.net[, 4]
          )
          names(pred.data.net)[7] <- names(preds.net)[4]
        }
        row.names(pred.data.net) <- row.names(pred.location.data.net) <- pred.pid
        pred.site.data <- rbind(pred.site.data, pred.data.net)
        pred.loc.data <- rbind(pred.loc.data, pred.location.data.net)
      }
    }
    cum.pids <- cum.pids + n.locations.net - n.pred.sites[netid]
  }
  
  # Count number of observed and prediction sites
  total.obs <- nrow(obs.site.data)
  total.pred <- nrow(pred.site.data)
  
  # Check that number of observed sites is legal
  if(total.obs < 1){
    stop("There must be at least one observed site.")
  }
  if(total.pred > 0){
    pred.site.data$pid <- max(obs.site.data$pid) + pred.site.data$pid
    row.names(pred.site.data) <- row.names(pred.loc.data) <- pred.site.data$pid
  }
  
  # Deal with observed sites first since these will always exist
  ind.order.obs <- order(as.numeric(row.names(obs.site.data)))
  obs.site.data <- obs.site.data[ind.order.obs, ]
  obs.loc.data <- obs.loc.data[ind.order.obs, ]
  sites.shp <- SpatialPointsDataFrame(
    obs.loc.data[, c("NEAR_X", "NEAR_Y")],
    obs.site.data,
    match.ID = TRUE
  )
  writeOGR(sites.shp, "sites.shp", "sites", driver = "ESRI Shapefile", overwrite_layer = TRUE)
  
  # Check all required attributes exist in table
  ind1 <- colnames(sites.shp@data) == c("netID")
  ind2 <- colnames(sites.shp@data) == c("rid")
  ind3 <- colnames(sites.shp@data) == c("upDist")
  if (sum(ind1) == 0) {
    stop("netID is missing from sites attribute table")
  }
  if (sum(ind2) == 0) {
    stop("rid is missing from sites attribute table")
  }
  if (sum(ind3) == 0) {
    stop("upDist is missing from sites attribute table")
  }
  network.point.coords <- data.frame(
    sites.shp@data[, "netID"], 
    sites.shp@data[, "rid"], 
    sites.shp@data[, "upDist"]
  )
  colnames(network.point.coords) <- c("NetworkID", "SegmentID", "DistanceUpstream")
  network.point.coords <- as.data.frame(network.point.coords)
  row.names(network.point.coords) <- row.names(sites.shp@data)
  attributes(network.point.coords)$locID <- as.numeric(as.character(sites.shp@data$locID))
  op <- new("SSNPoint", network.point.coords = network.point.coords, 
            point.coords = sites.shp@coords, point.data = sites.shp@data, 
            points.bbox = sites.shp@bbox)
  ssn@obspoints@SSNPoints[[1]] <- op
  ssn@obspoints@ID[[1]] <- "Obs"
  
  # Do the same for the prediction preds
  if(total.pred > 0){
    ind.order.pred <- order(as.numeric(row.names(pred.site.data)))
    pred.site.data <- pred.site.data[ind.order.pred, ]
    pred.loc.data <- pred.loc.data[ind.order.pred, ]
    preds.shp <- SpatialPointsDataFrame(
      pred.loc.data[, c("NEAR_X", "NEAR_Y")],
      pred.site.data,
      match.ID = TRUE
    )
    writeOGR(preds.shp, "preds.shp", "preds", driver = "ESRI Shapefile", overwrite_layer = TRUE)
    ind1 <- colnames(preds.shp@data) == c("netID")
    ind2 <- colnames(preds.shp@data) == c("rid")
    ind3 <- colnames(preds.shp@data) == c("upDist")
    if (sum(ind1) == 0) {
      stop("netID is missing from preds attribute table")
    }
    if (sum(ind2) == 0) {
      stop("rid is missing from preds attribute table")
    }
    if (sum(ind3) == 0) {
      stop("upDist is missing from preds attribute table")
    }
    network.point.coords <- data.frame(preds.shp@data[, "netID"], preds.shp@data[, "rid"], preds.shp@data[, "upDist"])
    colnames(network.point.coords) <- c("NetworkID", "SegmentID", "DistanceUpstream")
    network.point.coords <- as.data.frame(network.point.coords)
    row.names(network.point.coords) <- row.names(preds.shp@data)
    attributes(network.point.coords)$locID <- as.numeric(levels(preds.shp@data$locID))
    pp <- new("SSNPoint", network.point.coords = network.point.coords, 
              point.coords = preds.shp@coords, point.data = preds.shp@data, 
              points.bbox = preds.shp@bbox)
    ssn@predpoints@SSNPoints[[1]] <- pp
    ssn@predpoints@ID[[1]] <- "preds"
  }
  
  # Finally, return the result
  if(total.pred > 0){
    return(importSSN(path, predpts = "preds"))
  } else {
    return(importSSN(path))
  }
}
apear9/SSNdesign documentation built on Feb. 19, 2020, 4:29 a.m.