R/gen_hsa.r

Defines functions gen_hsa

Documented in gen_hsa

#' Generate HSAs
#'
#' @param shp shapefile SpatialPolygonsDataFrame
#' @param from patients home region
#' @param to patients destination
#' @param li_threshold localization index threshold
#' @param plot logical; display the iterations of the HSAs (defaults to TRUE)
#' @param verbose logical; display information during iterations (defaults to FALSE)
#' @param savePlots character with path and name to save plots to. Will be appended with the iteration number. Plots are saved as PNGs
#' @param ... additional options to png (only necessary when savePlots is not NULL)
#' @details Rownames of \code{shp} should be in common with \code{from} and \code{to}. If this is not the case, warnings (if some are missing) or errors will be issued (if no names are in common). Rownames can be set via, e.g., \code{spChFIDs(shp, shp@data$name)}.
#' @return object of class hsa containing the following elements:
#' \describe{
#'   \item{\code{call}}{call to \code{gen_hsa}}
#'   \item{\code{lookup}}{final lookup table}
#'   \item{\code{lookups}}{intermediate lookup tables}
#'   \item{\code{original_data}}{list containing from and to originally submitted
#'   
#'     \code{from}: \code{from} as submitted to \code{gen_hsa}
#'     
#'     \code{to}: \code{to} as submitted to \code{gen_hsa}}
#'   \item{\code{reassigned_data}}{list containing from and to as assigned to HSAs
#'   
#'     \code{from}: \code{from} converted to its HSA 
#'   
#'     \code{to}: \code{to} converted to its HSA}
#'   \item{\code{method}}{method used to assign each region to its HSA}
#'   \item{\code{original_shp}}{shapefile submitted to \code{gen_hsa} (via \code{shp})}
#'   \item{\code{shp}}{shapefile containing HSAs}
#'   \item{\code{iterations}}{number of iterations}
#'   \item{\code{li}}{localization index of the HSAs}
#'   \item{\code{li_threshold}}{\code{li_threshold} option}
#'   \item{\code{min_interventions}}{\code{min_interventions} option}
#'   \item{\code{n}}{number of HSAs created}
#'   \item{\code{n_it}}{number of HSAs in each iteration}
#'   \item{\code{n_interventions}}{number of interventions in each HSA after the final iteration}
#'   \item{\code{names}}{name of each HSA}
#'   \item{\code{flow}}{flow amongst HSAs (as generated by \code{flows})}
#'   
#' }
#' @export gen_hsa
#' @examples


gen_hsa <- function(shp, from, to, 
                    li_threshold = .4, min_interventions = 10, 
                    plot = TRUE, verbose = FALSE, debug = 0, 
                    savePlots = NULL, plotOpts = list(height = 10, width = 15, units = "cm", res = 600)){
  
  # call <- sys.call()
  # li_threshold = .4
  # debug = 0         
  # plot = TRUE     
  # verbose = TRUE  
  # min_interventions = 10
  # savePlots = NULL
  if(plot){ 
    pars <- par("mai")
    on.exit(suppressWarnings(par(mai = pars)))
    par(mai = c(0,0,0,0))
  }
  
  # function to collapse shp ----
  collapse <- function(){
    dat <- shp0@data
    if(debug == 2) { 
      print(head(dat))
      print(str(shp, 2))
      print(length(shp0@data$assignment))
    }
    shp_poly <- unionSpatialPolygons(shp0, dat$assignment)
    dat <- dat[row.names(dat) %in% dat$assignment, ]
    shp <- SpatialPolygonsDataFrame(shp_poly, dat)
    shp_poly <<- shp_poly
    shp
  }
  
  iflows <- function(from = NULL, to = NULL, ...){
    ffrom <- shp0@data$assignment[match(from0, row.names(shp0@data))]
    fto <- shp0@data$assignment[match(to0, row.names(shp0@data))]
    if(!is.null(from)) ffrom <- from
    if(!is.null(from)) fto <- to
    flow <- flows(from = ffrom, to = fto, ...)
    ffrom <<- ffrom
    fto <<- fto
    flow
  }
  
  # log <- data.frame(it = rep(NA, 1e6), 
  #                   region = rep(NA, 1e6), 
  #                   hsa = rep(NA, 1e6))
  # logger <- function(region, hsa){
  #   w <- which(is.na(log$it))[1:length(region)]
  #   if(max(w) > nrow(log)) log <- rbind(log, data.frame(it = rep(NA, 1e6), 
  #                                                       region = rep(NA, 1e6), 
  #                                                       hsa = rep(NA, 1e6)))
  #   log$it[w] <- it
  #   log$region[w] <- region
  #   log$hsa[w] <- hsa
  #   log <<- log
  # }
  
  # warnings - check plausibility of regions ----
  m <- rownames(shp@data) %in% from
  if(FALSE %in% m){
    cat(paste("Warning:", sum(!m), "regions in 'shp' not in 'from'. \n"))
    if(sum(!m) == length(m)) stop("No regions in 'shp' in 'from'. Set spChFIDs.")
  }
  m <- rownames(shp@data) %in% to
  if(FALSE %in% m){
    cat(paste("Warning:", sum(!m), "regions in 'shp' not in 'to'. \n"))
    if(sum(!m) == length(m)) stop("No regions in 'shp' in 'to'. Set spChFIDs.")
  }
  m <- from %in% rownames(shp@data)
  if(FALSE %in% m){
    cat(paste("Warning:", sum(!m), "regions in 'from' not in 'shp'. \n"))
    if(sum(!m) == length(m)) stop("No regions in 'from' in 'shp'. Set spChFIDs.")
  }
  m <- to %in% rownames(shp@data)
  if(FALSE %in% m){
    cat(paste("Warning:", sum(!m), "regions in 'to' not in 'shp'. \n"))
    if(sum(!m) == length(m)) stop("No regions in 'to' in 'shp'. Set spChFIDs.")
  }
  rm(m)
  
  # if(!is.null(savePlots)){
  #   if(!exists("...")) stop("res required when savePlots = TRUE")
  #   if(!exists("height")) stop("height required when savePlots = TRUE")
  #   if(!exists("width")) stop("width required when savePlots = TRUE")
  # }
  
  # originals to keep ----
  orig_shp <- shp
  shp@data$assignment <- row.names(shp@data)
  shp@data$assignment_via <- rep(NA, nrow(shp@data))
  shp@data$assignment_via[rownames(shp@data) %in% to] <- "destination"
  
  shp0 <- shp
  from0 <- from
  to0 <- to
  
  # prepare lookup table and flows ----
  lkup <- data.frame(original = row.names(shp@data), aggregated = row.names(shp@data), stringsAsFactors = FALSE)
  flow <- iflows(zeros = FALSE)
  
  if(plot){ 
    if(!is.null(savePlots)){
      png(paste0(savePlots, "0.png"), 
          height = plotOpts$height, 
          width = plotOpts$width,
          units = plotOpts$units,
          res = plotOpts$res)
      par(mai = c(0,0,0,0))
    }
    plot(shp)
    if(!is.null(savePlots)){
      dev.off()
    }
  }
  
  # iterations ----
  it <- 0
  li <- 0 
  n_it <- NULL
  reas <- NULL
  # shps <- list()
  lookups <- list()
  cat("Iterating\n")
  
  newassignment <- shp0@data$assignment
  
  change <- TRUE
  while(change){
    it <- it + 1
    # print(li)
    palloc <- shp0@data$assignment
    tow <- to
    fromw <- from
    # shpi <- shp
    n_it <- c(n_it, length(unique(shp0@data$assignment)))
    shp <- collapse()
    
    # ..loop through each column ----
    tmp <- unique(shp0@data$assignment[shp0@data$assignment %in% to])
    tmp <- tmp[tmp %in% to]
    
    for(i in tmp){
      # if(!i %in% unique(shp@data$assignment[shp@data$assignment %in% to])){ 
      #   if(debug == 1) print(paste("advance for loop at", i))
      #   next
      # }
      # shp <- collapse()
      # ..possible merges (based on touching regions) ----
      touchmat <- gIntersects(shp, byid = TRUE, returnDense = FALSE)
      touchi <- rownames(shp@data)[unlist(touchmat[i])]
      touchi <- touchi[touchi != i]
      if(!verbose) cat(".")
      
      
      # touchmati <- rownames(shp@data)[unlist(touchmat[to])]
      # combine touching regions if max flow is to a core
      # i <- "ZH05"
      # print(i)
      if(debug > 0) cat(i)
      # ....list of regions to potentially be merged ----
      # print("here?")
      if(debug == 3) print(touchi)
      # print("or here?")
      # ....flow from these regions ----
      fromi <- from[from %in% touchi]
      flowi <- flow[flow$from %in% touchi, ]
      # flowi <- flowi[flowi$Freq > 0, ]
      # n <- tapply(fromi, fromi, length)
      # # names(n)
      # flowi$n <- n[match(flowi$from, names(n))]
      # flowi <- within(flowi, prop <- Freq/n)
      # flowi <- flowi[order(flowi$from, - flowi$prop), ]
      # flowi$rank <- unlist(tapply(flowi$Freq, flowi$from, function(x)1:length(x)))
      
      # ....regions that can be merged ----
      flowi <- flowi[flowi$rank_from == 1 & flowi$to == i, ]
      # ....update shapefile assignment variable with this info ----
      if(debug > 0 | verbose) cat(paste0(i, " + ", length(shp0@data$assignment[shp0@data$assignment %in% flowi$from]), "/", length(touchi)), " ")
      
      x <- shp0@data$assignment %in% flowi$from
      shp0@data$assignment[x] <- i
      shp0@data$assignment_via[x] <- "flow"
      tow[tow %in% flowi$from] <- i
      # if(length(x) > 0) logger(unique(flowi$from), i)
      
      # print(flowi)
    }
    # reconstruct lookup table from spatial data
    alloc <- shp0@data$assignment
    
    nochangei <- all(alloc == palloc)
    
    
    if(nochangei) { # if no changes made, check for valid destination, plurality and LI
      
      ### reassignments ---- 
      changew <- FALSE
      if(verbose) cat("\nReassignment checks\n")
      
      # HSAs that shouldnt exist (no flow to them - artifacts of the method)
      # reassign based on neighbours flow
      hsas <- unique(shp0@data$assignment)
      nondest <- hsas[!hsas %in% to0]
      
      if(verbose){ 
        ### Nondestinations ----
        cat(paste0("N inappropriate proto-HSAs (not destinations) : ", length(nondest), "\n"))
      }
      if(length(nondest) > 0) {
        if(verbose){ 
          print(nondest)
        }
        touchmat <- gIntersects(shp0, byid = TRUE, returnDense = FALSE)
        touchmati <- gIntersects(shp, byid = TRUE, returnDense = FALSE)
        
        for(n in nondest){
          # stop()
          # TI31
          touchi <- rownames(shp0@data)[unlist(touchmat[n])]
          touchi <- touchi[touchi != n]
          touchii <- rownames(shp@data)[unlist(touchmati[n])]
          touchii <- touchii[touchii != n]
          if(verbose) cat(paste0("\n", n, "\n"))
          flow <- iflows(from = from0, to = to, zeros = FALSE, keepFrom = touchi)
          # if 1st order neighbours don't have any flow to a neighbouring HSA, try second order
          if(!any(flow$to %in% touchii)){
            touchi <- rownames(shp0@data)[unlist(touchmat[c(n, touchi)])]
            flow <- iflows(from = from0, to = to, zeros = FALSE, keepFrom = touchi)
            # if second order neighbours doesnt work, break
            if(!any(flow$to %in% touchii)){
              if(verbose) cat(paste0("\nskipping ", n, ": nondestination without any neighbour flow"))
              next
            }
          }
          flow <- aggregate(flow$N, by = list(to = flow$to), sum)
          flow <- flow[flow$to %in% touchii, ]
          flow <- flow[order(flow$x, decreasing = TRUE), ]
          if(verbose) print(head(flow))
          x <- which(shp0@data$assignment == n)
          shp0@data$assignment[x] <- flow$to[1]
          shp0@data$assignment_via[x] <- "neighbour"
          tow[tow %in% n] <- flow$to[1]
          fromw[tow %in% n] <- flow$to[1]
          if(verbose) cat(paste0(" -> ", flow$to[1], "\n"))
          # logger(n, flow$to[1])
          changew <- TRUE
        }
      } 
      shp <- collapse()
      touchmat <- gIntersects(shp0, byid = TRUE, returnDense = FALSE)
      # Check low LI ----
      if(!changew) {
        # stop()
        flow <- iflows() 
        flow <- flow[flow$from == flow$to, ]
        li <- flow$prop_from
        names(li) <- flow$to
        if(verbose) cat(li[-order(li)])
        lowli <- li[li < li_threshold]
        if(verbose){ 
          cat(paste0("N LI proto-HSA < li_threshold : ", length(lowli), "\n"))
        }
        if(length(lowli) > 0){ # low LI
          if(verbose){ 
            print(lowli)
          }
          touchmat <- gIntersects(shp0, byid = TRUE, returnDense = FALSE)
          touchmati <- gIntersects(shp, byid = TRUE, returnDense = FALSE)
          flow <- iflows()
          # stop()
          for(l in names(lowli)){
            # if(l == "JU06") stop()
            reass <- l
            touchi <- rownames(shp@data)[unlist(touchmati[l])]
            flowi <- flow[flow$from == l & flow$N > 0 & flow$to %in% touchi, ] # ensure self flows are removed?
            flowi <- flowi[!flowi$to == l, ]
            flowi <- flowi[order(-flowi$N_from), ]
            if(verbose){
              cat(paste0("\n", l, "\n"))
              print(head(flowi))
            }
            x <- shp0@data$assignment == l
            if(nrow(flowi) == 0 & length(touchi) == 1){ 
              reass <- touchi # islands/edges
              changew <- TRUE
            }
            if(nrow(flowi) > 0){ 
              reass <- flowi$to[1] # multiple possible flows - take most
              changew <- TRUE
            }
            if(nrow(flowi) == 0 & (length(touchi) == 0 | length(touchi) > 1)) next # no suitable flow (leave as is) - should be rare
            shp0@data$assignment[x] <- reass
            shp0@data$assignment_via[x] <- "merge (low LI)"
            tow[tow %in% l] <- reass
            fromw[tow %in% l] <- reass
            if(verbose) cat(paste0(" -> ", reass, "\n"))
            # logger(n, flow$to[1])
            changew <- TRUE
          }
        }
      }
      
      # check minimum interventions ----
      if(!changew){ 
        tos <- as.data.frame(table(to = tow), stringsAsFactors = FALSE)
        tos <- tos[tos$Freq < min_interventions, ]
        if(verbose){ 
          cat(paste0("N too small proto-HSAs: ", length(tos), "\n"))
          print(tos)
        }
        tos <- tos[, 1]
        if(length(tos) > 0){
          if(verbose){ 
            print(tos)
          }
          # stop()
          touchmat <- gIntersects(shp0, byid = TRUE, returnDense = FALSE)
          touchmati <- gIntersects(shp, byid = TRUE, returnDense = FALSE)
          flow <- iflows()
          for(m in tos){
            if(verbose){ 
              cat(paste0("\n", m, "\n"))
            }
            reass <- m
            touchi <- rownames(shp@data)[unlist(touchmati[m])]
            touchi <- touchi[touchi != m]
            # lapply(touchmati, function(x) rownames(shp@data)[x])
            flowi <- flow[flow$from == m & flow$N > 0 & flow$to %in% touchi, ]
            flowi <- flowi[order(-flowi$N_from), ]
            if(nrow(flowi) == 0 & length(touchi) == 1){ 
              reass <- touchi # islands/edges
              changew <- TRUE
              mess <- "merge (N intervention; flow to single neighbour)"
            }
            if(nrow(flowi) > 0){ 
              reass <- flowi$to[1] # multiple possible flows - take most
              changew <- TRUE
              mess <- "merge (N intervention; flow to one of multiple neighbours)"
            }
            if(nrow(flowi) == 0 & (length(touchi) == 0 | length(touchi) > 1)){ 
              # go where most of the neighbours go...
              if(nrow(flow[flow$from %in% touchi, c("N", "N_from", "N_to")]) > 0){
                flowi <- aggregate(flow[flow$from %in% touchi, c("N", "N_from", "N_to")], 
                                   list(to = flow$to[flow$from %in% touchi]), sum)
                flowi <- flowi[flowi$to %in% touchi, ]
                flowi <- flowi[order(-flowi$N), ]
                reass <- flowi$to[1] # multiple possible flows - take most
                changew <- TRUE
                mess <- "merge (N intervention; no flow to neighbours - using all flows amongst neighbours)"
                # next # no suitable flow (leave as is) - should be rare
              } else {
                mess <- "no change - no suitable neighbours"
              }
            }
            if(verbose){ 
              print(head(flowi))
            }
            x <- shp0@data$assignment == m
            shp0@data$assignment[x] <- reass
            shp0@data$assignment_via[x] <- mess
            tow[tow %in% m] <- reass
            fromw[tow %in% m] <- reass
            if(verbose) cat(paste0(" -> ", reass, "\n"))
            # logger(m, reass)
          }
        }
      }
      alloc <- shp0@data$assignment
      change <- any(alloc != palloc)
      
      # if(debug > 0) print(paste("break while loop at", it))
      # break 
      
    }
    
    
    length(unique(shp0@data$assignment))
    # plot(shp)
    # ..collapse shapefile according to new assignments ----
    # shps[[it]] <- shp <- collapse()
    lookups[[it]] <- shp0@data
    length(unique(shp@data$assignment))
    if(plot){ 
      if(!is.null(savePlots)){
        png(paste0(savePlots, it, ".png"), 
            height = plotOpts$height, 
            width = plotOpts$width,
            units = plotOpts$units,
            res = plotOpts$res)
        par(mai = c(0,0,0,0))
      }
      plot(shp)
      if(!is.null(savePlots)){
        dev.off()
      }
    }
    cat(paste("", length(unique(shp0@data$assignment)), "\n"))
    if(verbose) {
      cat("Proto-HSAs:\n")
      print(unique(shp0@data$assignment))
    }
    to <- tow
    from <- fromw
    

    # ..construct flow dataframe based on new proto-HSAs for next iteration ----
    # flow <- iflows()
    # flowli <- flow[flow$to == flow$from, ]
    # li <- with(flowli, N/N_from)

  }
  

  shp <- collapse()
  if(plot) plot(shp)
  
  # localization index ----
  # lookup <- cbind(shp0@data, over(shp0, shp), deparse.level = 1)
  lookup <- shp0@data
  flow <- iflows()
  
  # n <- tapply(ffrom, ffrom, length)
  # names(n)
  # flow$n <- n[match(flow$from, names(n))]
  # flow <- flow[flow$Freq > 0, ]
  selfflow <- flow[flow$from == flow$to, ]
  li <- selfflow$prop_from
  names(li) <- selfflow$to
  
  inv <- which(!lookup$assignment %in% unique(fto))
  invi <- rep(NA, length(inv))
  names(invi) <- lookup$assignment[inv]
  
  li <- c(li, invi)
  
  # shapefile
  # shp0@data <- cbind(shp0@data, lookup[match(lookup$MEDSTAT04, shp0@data$MEDSTAT04),])
  # dat <- shp0@data
  # shp_poly <- unionSpatialPolygons(shp0, dat$assignment)
  # dat <- dat[dat$MEDSTAT04 %in% dat$assignment, ]
  # rownames(dat) <- dat$MEDSTAT04
  # shp <- SpatialPolygonsDataFrame(shp_poly, dat)
  
  # low LI
  low_li <- names(li)[li < li_threshold | is.na(li)]
  if(length(low_li) > 0){ 
    cat(paste("\nHSAs with LI <", li_threshold, "\n"))
    print(low_li)
  }
  cat(paste("\nHSAs produced", length(unique(lookup$assignment)), "\n"))
  method <- shp0@data$assignment_via
  names(method) <- row.names(shp0@data)
  # object to be returned ----
  out <- list(# call = call, 
              lookup = lookup,
              lookups = lookups,
              original_data = list(from = from0,
                                   to = to0),
              reassigned_data = list(from_hsa = ffrom,
                                     to_hsa = fto), 
              method = method,
              original_shp = orig_shp,
              shp = shp,
              # polygon = shp_poly,
              iterations = it,
              li = li, 
              li_threshold = li_threshold,
              min_interventions = min_interventions,
              n = length(unique(lookup$assignment)),
              n_it = n_it,
              n_interventions = table(factor(tow, levels = unique(fto[order(fto)]))),
              names = unique(lookup$assignment), 
              flow = flow #,
              # log = log
              )
  class(out) <- "hsa"
  out
}
aghaynes/HSAr documentation built on Sept. 20, 2021, 5:29 p.m.