Gravity model
In cppSim: Fast and Memory Efficient Spatial Interaction Models

knitr::opts_chunk$set(
  echo = TRUE,
  eval = TRUE,
  cache = TRUE,
  tidy = FALSE,
  warning = FALSE,
  error = FALSE
)

library(rlist)
library(data.table)
library(foreach)

# The good old original R code to run SIM that was used before cppSim

### this file contains the functions used to run the gravity model

cost_function <- function(d, beta, type = "exp") {
  # d is the od distance matrix, taking the exponential
  # means doing this operation for each individual element
  #  with an exponent beta
  # the type parameter allows to set it to either exp or pow.
  #  pow means we use a power function as cost, rather than exponential

  if (type == "exp") {
    exp(-beta * d)
  } else if (type == "pow") {
    d^(-beta)
  } else {
    print("provide a type of functino to compute")
  }
}

r_2 <- function(d, f) {
  cor(
    d |> as.numeric(),
    f |> as.numeric()
  )^2
}

r <- function(d, f) {
  cor(
    d |> as.numeric(),
    f |> as.numeric()
  )
}

rmse <- function(d, f) sum((d - f)^2)

calibration <- function(cost_fun, O, D, delta = 0.05) {
  B <- rep_len(1, nrow(cost_fun))
  eps <- abs(sum(B))
  e <- NULL
  i <- 0
  while ((eps > delta) & (i < 50)) {
    A_new <- 1 / (apply(cost_fun, function(x) sum(B * D * x), MARGIN = 1))
    B_new <- 1 / (apply(cost_fun, function(x) sum(A_new * O * x), MARGIN = 2))
    eps <- abs(sum(B_new - B))
    e <- append(e, eps)
    A <- A_new
    B <- B_new
    i <- i + 1
  }
  list(
    "A" = A,
    "B" = B,
    "e" = e
  )
}

run_model <- function(
    flows,
    distance,
    beta = 0.25,
    type = "exp"
    # ,cores = 3
    ) {
  F_c <- cost_function(d = {{ distance }}, beta = {{ beta }}, type = type)
  print("cost function computed")
  O <- apply(flows, sum, MARGIN = 1) |> as.integer()
  D <- apply(flows, sum, MARGIN = 2) |> as.integer()
  A_B <- calibration(
    cost_fun = F_c,
    O = O,
    D = D,
    delta = .001
  )
  print("calibration: over")
  A <- A_B$A
  B <- A_B$B

  flows_model <- foreach(
    j = c(1:nrow(F_c)),
    .combine = rbind
  ) %do% {
    round(A[j] * B * O[j] * D * F_c[j, ])
  }

  print("model run: over")
  e_sor <- e_sorensen(flows, flows_model) |> as.numeric()
  print(paste0("E_sor = ", e_sor))
  r2 <- r_2(flows_model, flows) |> as.numeric()
  print(paste0("r2 = ", r2))
  RMSE <- rmse(flows_model, flows) |> as.numeric()
  print(paste0("RMSE = ", RMSE))

  list(
    "values" = flows_model,
    "r2" = r2,
    "rmse" = RMSE,
    "calib" = A_B$e,
    "e_sor" = e_sor
  )
}

### Validation

e_sorensen <- function(data, fit) {
  2 * sum(apply(cbind(
    data |> c(),
    fit |> c()
  ), MARGIN = 1, FUN = min)) / (sum(data) + sum(fit))
}

library(cppSim)

data("distance_test")
data("flows_test")

distance_test <- (distance_test / 1000 / 14) |> round()

This file will cover the process of building a local version of the gravity model used to predict cycling and/or walking flows across London.

What model to use ?

The general version of the doubly constrained gravity model looks the following way: $$ T_{ij} = A_iB_jO_iD_jf(c_{ij}) $$ where $O_i$ is the working population of the origin, and $D_j$ is the available workplaces at the destination location: $$ O_i = \sum_j T_{ij} $$ $$ D_j = \sum_i T_{ij} $$

The terms $A_i$,$B_j$ are factors for each location. The derivation of these factors is based on the relation:

$$A_i = [\sum_j B_jD_jf(c_{ij})]^{-1}$$

$$B_j = [\sum_i A_iO_if(c_{ij})]^{-1}$$

with the derivation made from a recursive chain with initial values 1. Let's refer to the parameters above as vectors $\vec{A}$,$\vec{B}$,$\vec{O}$,$\vec{D}$, and to the cost function and flow as matrices F and T such that $F_{ij}=f(c_{ij})$ and $T_{ij}$ is a flow from i to j.

# creating the O, D vectors. 
O <- apply(flows_test, sum, MARGIN = 2) |> c()

D <- apply(flows_test, sum, MARGIN = 1) |> c()

F_c <- cost_function(distance_test,1,type = "exp")

Next, we need to run the recursive procedure until the values stabilise. We introduce the threshold at which we will stop running the recursion $\delta$. It corresponds to the rate of change of the parameter with respect to the previous iteration.

beta_calib <- foreach::foreach(i = 28:33
                               ,.combine = rbind) %do% {
                                 beta <- 0.1*(i - 1)
                                 print(paste0("RUNNING MODEL FOR beta = ",beta))
                                 run <- run_model(flows = flows_test
                                                  ,distance = distance_test
                                                  ,beta = beta
                                                  ,type = "exp"
                                 )

                                 cbind(beta, run$r2,run$rmse)
                               }

plot(beta_calib[,1]
     ,beta_calib[,2]
     ,xlab = "beta value"
     ,ylab = "quality of fit, r"
     ,main = "influence of beta on the goodness of fit"
     ,pch = 19
     ,cex = 0.5
     ,type = "b")

beta_best_fit <- beta_calib[which(beta_calib[,2] == max(beta_calib[,2])),1]
x <- seq_len(100)/20
plot(x
     ,exp(-beta_best_fit*x)
     ,main = "cost function"
     ,xlab = "distance, km"
     ,ylab = "decay factor"
     ,pch = 19
     ,cex = 0.5
     ,type = "l")

run_best_fit <- run_model(flows = flows_test
                 ,distance = distance_test
                 ,beta = beta_best_fit
                 ,type = "exp"
                 )

plot(seq_along(run_best_fit$calib)
     ,run_best_fit$calib
     ,xlab = "iteration"
     ,ylab = "error"
     ,main = "calibration of balancing factors"
     ,pch = 19
     ,cex = 0.5
     ,type = "b"

)

plot(flows_test
     ,run_best_fit$values
     ,ylab = "flows model"
     ,xlab = "flows"
     ,log = "xy"
     ,pch = 19
     ,cex = 0.5)
lines(seq_len(max(run_best_fit$values))
      ,seq_len(max(run_best_fit$values))
      ,col = "darkred"
      ,lwd = 2)

## MODEL USING THE GLM And POISSON DISTRIBUTION

# flows_london <- rlist::list.load("flows_london.rds")

data(flows_london)

flows_london <- flows_london

sample_od <- sample(unique(flows_london$workplace),100)

flows_grav <- flows_london[(workplace %in% sample_od) & (residence %in% sample_od),]

flows_grav[,O := sum(bike),by = from_id]

flows_grav[,D := sum(bike), by = to_id]

#

model <- glm(bike ~ workplace+residence+distance -1
             ,data = flows_grav
             ,family = poisson(link = "log")
             )

((model$fitted.values - flows_grav$bike)) |> hist(breaks = 100)

r2 <- r_2(flows_grav$bike,model$fitted.values)
r2

Support functions

print("cost function:")
cost_function

print("calibration function:")
calibration

print("model run")
run_model

Any scripts or data that you put into this service are public.

cppSim documentation built on Sept. 9, 2025, 5:50 p.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

cppSim
Fast and Memory Efficient Spatial Interaction Models

Gravity model
In cppSim: Fast and Memory Efficient Spatial Interaction Models

What model to use ?

Support functions

Try the cppSim package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

cppSim Fast and Memory Efficient Spatial Interaction Models

Gravity model In cppSim: Fast and Memory Efficient Spatial Interaction Models

What model to use ?

Support functions

Try the cppSim package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

cppSim
Fast and Memory Efficient Spatial Interaction Models

Gravity model
In cppSim: Fast and Memory Efficient Spatial Interaction Models