R/unstackB.R

Defines functions multipleUnstackB multipleAddressB orthogonalProjectionFatalitiesB unstackFatalitiesB

#' Unstack "stacks" in Snow's cholera map.
#'
#' Unstacks fatalities data by 1) assigning the coordinates of the base case to all cases in a stack and 2) setting the base case as an "address" and making the number of fatalities an attribute.
#' @param multi.core Logical or Numeric. \code{TRUE} uses \code{parallel::detectCores()}. \code{FALSE} uses one, single core. With Numeric, you specify the number logical cores. See \code{vignette("Parallelization")} for details.
#' @param dev.mode Logical. Development mode uses parallel::parLapply().
#' @seealso \code{vignette("unstacking.fatalities")}
#' @return An R list that includes \code{anchor.case}, \code{fatalities.anchor}, \code{fatalities.unstacked} and \code{ortho.proj}.
#' @note This function is computationally intensive. This function documents the code that generates \code{\link{anchor.case}}, \code{\link{fatalities.anchor}}, \code{\link{fatalities.unstacked}} and \code{\link{ortho.proj}}.
#' @noRd

unstackFatalitiesB <- function(multi.core = TRUE, dev.mode = FALSE) {
  cores <- multiCore(multi.core)
  fixed.fatalities <- fixFatalities()
  ortho.proj <- orthogonalProjectionFatalitiesB(fixed.fatalities, cores)

  ## Single and Multiple ##

  road.incidence <- c(table(ortho.proj$road.segment))
  road.incidence <- data.frame(id = names(road.incidence),
    count = road.incidence)

  single.obs <- road.incidence[road.incidence$count == 1, ]
  single.address <- lapply(single.obs$id, function(id) {
    sel <- ortho.proj$road.segment == id
    data.frame(id = id, case = ortho.proj[sel, "case"])
  })

  cutpoint <- 0.05
  multiple.obs <- road.incidence[road.incidence$count > 1, ]
  multiple.address <- multipleAddressB(multiple.obs, ortho.proj,
    fixed.fatalities, cutpoint, cores, dev.mode)

  multiple.unstacked <- multipleUnstackB(multiple.address, cores, dev.mode)

  single.unstacked <- do.call(rbind, single.address)
  single.unstacked[ c("group", "anchor",  "case.count")] <- 1
  single.unstacked$anchor <- single.unstacked$case
  single.unstacked$multiple.obs.seg <- "No"

  unstacked <- rbind(multiple.unstacked, single.unstacked)
  unstacked <- merge(unstacked, fixed.fatalities, by.x = "anchor",
    by.y = "case")

  fatalities.unstacked <- unstacked[, c("case", "x", "y")]
  idx <- order(fatalities.unstacked$case)
  fatalities.unstacked <- fatalities.unstacked[idx, ]
  row.names(fatalities.unstacked) <- NULL

  vars <- c("case", "x", "y", "case.count")
  fatalities.anchor <- unstacked[unstacked$anchor == unstacked$case, vars]
  names(fatalities.anchor)[1] <- "anchor"
  row.names(fatalities.anchor) <- NULL

  anchor.case <- unstacked[, c("anchor", "case")]

  list(anchor.case = anchor.case,
       fatalities = fixed.fatalities,
       fatalities.unstacked = fatalities.unstacked,
       fatalities.anchor = fatalities.anchor,
       ortho.proj = ortho.proj)
}

orthogonalProjectionFatalitiesB <- function(fatality.df, cores, radius = 2) {
  vars <- c("x", "y")
  manual.classification <- caseRoadClassificationFix()

  out <- parallel::mclapply(fatality.df$case, function(case.id) {
    case.data <- fatality.df[fatality.df$case == case.id, vars]
    if (case.id %in% unlist(manual.classification)) {
      sel <- vapply(manual.classification, function(x) {
        case.id %in% x
      }, logical(1L))
      within.radius <- names(manual.classification[sel])
    } else {
      ones <- rbind(case.data[, vars],
        stats::setNames(cholera::road.segments[, paste0(vars, 1)], vars))
      twos <- rbind(case.data[, vars],
        stats::setNames(cholera::road.segments[, paste0(vars, 2)], vars))
      d1 <- as.matrix(stats::dist(ones))[-1, 1]
      d2 <- as.matrix(stats::dist(twos))[-1, 1]

      # choose road segment with _both_ endpoints <= radius
      within.radius <- cholera::road.segments$id[d1 <= radius & d2 <= radius]
    }

    ortho.proj.test <- lapply(within.radius, function(seg.id) {
      sel <- cholera::road.segments$id == seg.id
      xs <- unlist(cholera::road.segments[sel, c("x1", "x2")])
      ys <- unlist(cholera::road.segments[sel, c("y1", "y2")])
      seg.df <- data.frame(x = xs, y = ys)

      ols <- stats::lm(y ~ x, data = seg.df)
      road.intercept <- stats::coef(ols)[1]
      road.slope <- stats::coef(ols)[2]
      ortho.slope <- -1 / road.slope
      ortho.intercept <- case.data$y - ortho.slope * case.data$x
      x.proj <- (ortho.intercept - road.intercept) / (road.slope - ortho.slope)
      y.proj <- road.slope * x.proj + road.intercept

      # segment bisection/intersection test
      distB <- stats::dist(rbind(seg.df[1, ], c(x.proj, y.proj))) +
               stats::dist(rbind(seg.df[2, ], c(x.proj, y.proj)))

      bisect.test <- signif(stats::dist(seg.df)) == signif(distB)

      if (bisect.test) {
        ortho.pts <- data.frame(x.proj, y.proj)
        dat <- rbind(c(case.data$x, case.data$y), c(x.proj, y.proj))
        data.frame(road.segment = seg.id, ortho.pts, dist = c(stats::dist(dat)))
      }
    })

    ortho <- do.call(rbind, ortho.proj.test)

    if (is.null(ortho)) {
      na.vars <-  c("road.segment", "x.proj", "y.proj", "dist", "type")
      na.df <- data.frame(matrix(ncol = length(na.vars)))
      ortho.location <- stats::setNames(na.df, na.vars)
    } else {
      ortho.location <- ortho[which.min(ortho$dist), ]
      ortho.location$type <- "ortho"
    }

    ## nearest endpoint of nearest road segment ##

    unbisected.segs <- setdiff(within.radius, ortho$road.segment)

    if (length(within.radius) <= 1) {
      sel <- cholera::road.segments$id == within.radius
      candidates <- cholera::road.segments[sel, ]

      ones <- stats::setNames(candidates[, paste0(vars, 1)], vars)
      twos <- stats::setNames(candidates[, paste0(vars, 2)], vars)
      ep.data <- rbind(case.data[, vars], ones, twos)
      ep.dist <- as.matrix(stats::dist(ep.data))[-1, 1]

      # nearest endpoint of nearest road segment
      nearest <- which.min(ep.dist)

      if (nearest == 1) {
        var.sel <- paste0(vars, 1)
      } else if (nearest == 2) {
        var.sel <- paste0(vars, 2)
      }

      prox.location <- data.frame(road.segment = candidates$id,
                                  x.proj = candidates[, var.sel[1]],
                                  y.proj = candidates[, var.sel[2]],
                                  dist = ep.dist[nearest],
                                  type = "eucl")

    } else if (length(unbisected.segs) > 1) {
      sel <- cholera::road.segments$id %in% unbisected.segs
      candidates <- cholera::road.segments[sel, ]

      ones <- stats::setNames(candidates[, paste0(vars, 1)], vars)
      twos <- stats::setNames(candidates[, paste0(vars, 2)], vars)
      ep.data <- rbind(case.data[, vars], ones, twos)
      ep.dist <- as.matrix(stats::dist(ep.data))[-1, 1]

      # nearest endpoint of nearest road segment
      nearest <- which.min(ep.dist)

      if (nearest > nrow(candidates)) {
        nearest <- nearest - nrow(candidates)
        var.sel <- paste0(vars, 2)
      } else {
        var.sel <- paste0(vars, 1)
      }

      prox.location <- data.frame(road.segment = candidates[nearest, "id"],
                                  x.proj = candidates[nearest, var.sel[1]],
                                  y.proj = candidates[nearest, var.sel[2]],
                                  dist = ep.dist[nearest],
                                  type = "eucl")
    }

    nearest.sel <- which.min(c(ortho.location$dist, prox.location$dist))

    if (nearest.sel == 1) {
      out <- ortho.location
    } else if (nearest.sel == 2) {
      out <- prox.location
    }

    data.frame(case = case.id, out, row.names = NULL)
  }, mc.cores = cores)
  do.call(rbind, out)
}

multipleAddressB <- function(multiple.obs, ortho.proj, fixed.fatalities,
  cutpoint, cores, dev.mode) {

  multiple_address <- function(id, ortho.proj, fixed.fatalities, cutpoint) {
    cases <- ortho.proj[ortho.proj$road.segment == id, "case"]
    ortho <- ortho.proj[ortho.proj$road.segment == id, c("x.proj", "y.proj")]
    orientation <- sign(fixed.fatalities[cases, c("x", "y")] - ortho)

    sideA <- (orientation$x == -1 & orientation$y ==  1) |
             (orientation$x == -1 & orientation$y == -1) |
             (orientation$x ==  0 & orientation$y ==  1) |
             (orientation$x ==  1 & orientation$y ==  0)

    # side of street
    orientation$side <- ifelse(sideA, 1, 0)

    if (length(unique(orientation$side)) == 2) {
      A <- cases[orientation$side == 1]
      B <- cases[orientation$side == 0]
      dataA <- ortho.proj[ortho.proj$case %in% A, c("x.proj", "y.proj")]
      dataB <- ortho.proj[ortho.proj$case %in% B, c("x.proj", "y.proj")]

      if (nrow(dataA) >= 2) {
        clusterA <- stats::hclust(stats::dist(dataA))
        outA <- stats::cutree(clusterA, h = cutpoint)
      } else {
        outA <- 1
        names(outA) <- row.names(dataA)
      }

      if (nrow(dataB) >= 2) {
        clusterB <- stats::hclust(stats::dist(dataB))
        outB <- stats::cutree(clusterB, h = cutpoint)
      } else {
        outB <- 1
        names(outB) <- row.names(dataB)
      }

      outB <- max(outA) + outB
      census <- c(outA, outB)
      out <- data.frame(case = as.numeric(names(census)), group = census)
      row.names(out) <- NULL

    } else if (length(unique(orientation$side)) == 1) {
      dataA <- ortho.proj[ortho.proj$case %in% cases, c("x.proj", "y.proj")]

      if (nrow(dataA) >= 2) {
        clusterA <- stats::hclust(stats::dist(dataA))
        outA <- stats::cutree(clusterA, h = cutpoint)
      } else {
        outA <- 1
      }

      out <- data.frame(case = cases, group = outA)
      row.names(out) <- NULL
    }

    sel <- ortho.proj$road.segment == id
    out <- merge(out, ortho.proj[sel, c("case", "dist")], by = "case")

    out <- out[order(out$group, out$dist), ]
    out$anchor <- ifelse(duplicated(out$group) == FALSE, 1, 0)
    data.frame(id = id, out)
  }

  if ((.Platform$OS.type == "windows" & cores > 1) | dev.mode) {
    cl <- parallel::makeCluster(cores)
    parallel::clusterExport(cl = cl, envir = environment(),
      varlist = c("ortho.proj", "fixed.fatalities", "cutpoint"))
    addr <- parallel::parLapply(cl, multiple.obs$id, function(id) {
      multiple_address(id, ortho.proj, fixed.fatalities, cutpoint)
    })
    parallel::stopCluster(cl)
  } else {
    addr <- parallel::mclapply(multiple.obs$id, function(id) {
      multiple_address(id, ortho.proj, fixed.fatalities, cutpoint)
    }, mc.cores = cores)
  }
  addr
}

multipleUnstackB <- function(multiple.address, cores, dev.mode) {
  multiple_unstack <- function(x) {
    group.id <- unique(x$group)
    group.data <- lapply(group.id, function(id) {
      tmp <- x[x$group == id, ]
      tmp$case.count <- nrow(tmp)
      tmp$anchor <- tmp[tmp$anchor == 1, "case"]
      tmp$dist <- NULL
      tmp
    })
    do.call(rbind, group.data)
  }

  if ((.Platform$OS.type == "windows" & cores > 1) | dev.mode) {
    cl <- parallel::makeCluster(cores)
    unstacked <- parallel::parLapply(cl, multiple.address, multiple_unstack)
    parallel::stopCluster(cl)
  } else {
    unstacked <- parallel::mclapply(multiple.address, multiple_unstack,
      mc.cores = cores)
  }
  out <- do.call(rbind, unstacked)
  out$multiple.obs.seg <- "Yes"
  out
}
lindbrook/cholera documentation built on Jan. 13, 2025, 3:49 p.m.