In ATFutures/nyped: Model of pedestrian flows throughout New York City

knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>",
  fig.path = "man/figures/README-",
  out.width = "100%"
)

nyped

suppressPackageStartupMessages ({
    library (dplyr)
    library (ggplot2)
    devtools::load_all (".", export_all = FALSE, quiet = TRUE)
})
theme_set (theme_minimal ())
data_dir <- "/data/data/moveability/nyc"

Model of pedestrian flows against empirical pedestrian counts for New York City, constructed from "flow layers" formed from pair-wise matching between the following seven categories of origins and destinations:

subway
residential
transportation
sustenance
entertainment
education
healthcare

dat <- readRDS (file.path (data_dir, "ped-model-final.Rds"))
f_ <- dat$flowvars
mod <- summary (lm (dat$p ~ f_))
r2 <- signif (100 * mod$adj.r.squared, 3)

An eighth category is network centrality, with additional layers modelling dispersal from each of these categories. The model explains R²= r r2 of the observed variation in pedestrian counts. Final results, with significantly explanatory layers named according to the first three letters of the above categories, looks like this:

dat <- readRDS (file.path (data_dir, "ped-model-final.Rds"))
f_ <- dat$flowvars
mod <- summary (lm (dat$p ~ f_))
coeffs <- data.frame (mod$coefficients [2:nrow (mod$coefficients), ])
# for some reason, colnames do not transfer properly:
names (coeffs) <- colnames (mod$coefficients)
coeffs <- cbind ("Layer Name" = gsub ("f_", "", rownames (coeffs)),
                 coeffs)
# Next line is critical, because the vertical lines are interpreted by markdown
# as table column breaks, which mucks the whole thing up!
names (coeffs) [length (names (coeffs))] <- "Pr(>t)"
# order by origin, then by decreasing absolute T value
coeffs$origin <- substr (coeffs$`Layer Name`, 1, 3)
names (coeffs) [which (names (coeffs) == "t value")] <- "t"
coeffs <- arrange (coeffs, origin, desc (t))
rownames (coeffs) <- NULL
coeffs$origin <- NULL
names (coeffs) [which (names (coeffs) == "t")] <- "t value"
knitr::kable (coeffs, digits = c (NA, 0, 0, 2, 4), row.names = FALSE, caption = "Table 1. Statistical parameters of final model of pedestrian flows through New York City.")

A sample of actual flows looks like this:

And a final statistical relationship between modelled and observed pedestrian counts looks like this:

data_dir <- "/data/data/moveability/nyc"
dat <- readRDS (file.path (data_dir, "ped-model-final.Rds"))
mod <- lm (dat$p ~ dat$flowvars)
res <- data.frame (predicted = predict (mod),
                   actual = dat$p)
ggplot (res, aes (x = predicted, y = actual)) +
    geom_point () +
    geom_smooth (method = "lm")