knitr::opts_chunk$set( echo = TRUE, eval = TRUE, cache = TRUE, tidy = FALSE, warning = FALSE, error = FALSE ) library(rlist) library(data.table) library(foreach)
# The good old original R code to run SIM that was used before cppSim ### this file contains the functions used to run the gravity model cost_function <- function(d, beta, type = "exp") { # d is the od distance matrix, taking the exponential # means doing this operation for each individual element # with an exponent beta # the type parameter allows to set it to either exp or pow. # pow means we use a power function as cost, rather than exponential if (type == "exp") { exp(-beta * d) } else if (type == "pow") { d^(-beta) } else { print("provide a type of functino to compute") } } r_2 <- function(d, f) { cor( d |> as.numeric(), f |> as.numeric() )^2 } r <- function(d, f) { cor( d |> as.numeric(), f |> as.numeric() ) } rmse <- function(d, f) sum((d - f)^2) calibration <- function(cost_fun, O, D, delta = 0.05) { B <- rep_len(1, nrow(cost_fun)) eps <- abs(sum(B)) e <- NULL i <- 0 while ((eps > delta) & (i < 50)) { A_new <- 1 / (apply(cost_fun, function(x) sum(B * D * x), MARGIN = 1)) B_new <- 1 / (apply(cost_fun, function(x) sum(A_new * O * x), MARGIN = 2)) eps <- abs(sum(B_new - B)) e <- append(e, eps) A <- A_new B <- B_new i <- i + 1 } list( "A" = A, "B" = B, "e" = e ) } run_model <- function( flows, distance, beta = 0.25, type = "exp" # ,cores = 3 ) { F_c <- cost_function(d = {{ distance }}, beta = {{ beta }}, type = type) print("cost function computed") O <- apply(flows, sum, MARGIN = 1) |> as.integer() D <- apply(flows, sum, MARGIN = 2) |> as.integer() A_B <- calibration( cost_fun = F_c, O = O, D = D, delta = .001 ) print("calibration: over") A <- A_B$A B <- A_B$B flows_model <- foreach( j = c(1:nrow(F_c)), .combine = rbind ) %do% { round(A[j] * B * O[j] * D * F_c[j, ]) } print("model run: over") e_sor <- e_sorensen(flows, flows_model) |> as.numeric() print(paste0("E_sor = ", e_sor)) r2 <- r_2(flows_model, flows) |> as.numeric() print(paste0("r2 = ", r2)) RMSE <- rmse(flows_model, flows) |> as.numeric() print(paste0("RMSE = ", RMSE)) list( "values" = flows_model, "r2" = r2, "rmse" = RMSE, "calib" = A_B$e, "e_sor" = e_sor ) } ### Validation e_sorensen <- function(data, fit) { 2 * sum(apply(cbind( data |> c(), fit |> c() ), MARGIN = 1, FUN = min)) / (sum(data) + sum(fit)) }
library(cppSim) data("distance_test") data("flows_test") distance_test <- (distance_test / 1000 / 14) |> round()
This file will cover the process of building a local version of the gravity model used to predict cycling and/or walking flows across London.
The general version of the doubly constrained gravity model looks the following way: $$ T_{ij} = A_iB_jO_iD_jf(c_{ij}) $$ where $O_i$ is the working population of the origin, and $D_j$ is the available workplaces at the destination location: $$ O_i = \sum_j T_{ij} $$ $$ D_j = \sum_i T_{ij} $$
The terms $A_i$,$B_j$ are factors for each location. The derivation of these factors is based on the relation:
$$A_i = [\sum_j B_jD_jf(c_{ij})]^{-1}$$
$$B_j = [\sum_i A_iO_if(c_{ij})]^{-1}$$
with the derivation made from a recursive chain with initial values 1. Let's refer to the parameters above as vectors $\vec{A}$,$\vec{B}$,$\vec{O}$,$\vec{D}$, and to the cost function and flow as matrices F and T such that $F_{ij}=f(c_{ij})$ and $T_{ij}$ is a flow from i to j.
# creating the O, D vectors. O <- apply(flows_test, sum, MARGIN = 2) |> c() D <- apply(flows_test, sum, MARGIN = 1) |> c()
F_c <- cost_function(distance_test,1,type = "exp")
Next, we need to run the recursive procedure until the values stabilise. We introduce the threshold at which we will stop running the recursion $\delta$. It corresponds to the rate of change of the parameter with respect to the previous iteration.
beta_calib <- foreach::foreach(i = 28:33 ,.combine = rbind) %do% { beta <- 0.1*(i - 1) print(paste0("RUNNING MODEL FOR beta = ",beta)) run <- run_model(flows = flows_test ,distance = distance_test ,beta = beta ,type = "exp" ) cbind(beta, run$r2,run$rmse) }
plot(beta_calib[,1] ,beta_calib[,2] ,xlab = "beta value" ,ylab = "quality of fit, r" ,main = "influence of beta on the goodness of fit" ,pch = 19 ,cex = 0.5 ,type = "b")
beta_best_fit <- beta_calib[which(beta_calib[,2] == max(beta_calib[,2])),1] x <- seq_len(100)/20 plot(x ,exp(-beta_best_fit*x) ,main = "cost function" ,xlab = "distance, km" ,ylab = "decay factor" ,pch = 19 ,cex = 0.5 ,type = "l")
run_best_fit <- run_model(flows = flows_test ,distance = distance_test ,beta = beta_best_fit ,type = "exp" )
plot(seq_along(run_best_fit$calib) ,run_best_fit$calib ,xlab = "iteration" ,ylab = "error" ,main = "calibration of balancing factors" ,pch = 19 ,cex = 0.5 ,type = "b" )
plot(flows_test ,run_best_fit$values ,ylab = "flows model" ,xlab = "flows" ,log = "xy" ,pch = 19 ,cex = 0.5) lines(seq_len(max(run_best_fit$values)) ,seq_len(max(run_best_fit$values)) ,col = "darkred" ,lwd = 2)
## MODEL USING THE GLM And POISSON DISTRIBUTION # flows_london <- rlist::list.load("flows_london.rds") data(flows_london) flows_london <- flows_london sample_od <- sample(unique(flows_london$workplace),100) flows_grav <- flows_london[(workplace %in% sample_od) & (residence %in% sample_od),] flows_grav[,O := sum(bike),by = from_id] flows_grav[,D := sum(bike), by = to_id] # model <- glm(bike ~ workplace+residence+distance -1 ,data = flows_grav ,family = poisson(link = "log") ) ((model$fitted.values - flows_grav$bike)) |> hist(breaks = 100) r2 <- r_2(flows_grav$bike,model$fitted.values) r2
print("cost function:") cost_function print("calibration function:") calibration print("model run") run_model
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.