gravity_model.R
In cppSim: Fast and Memory Efficient Spatial Interaction Models

## ----setup, echo=FALSE, include=TRUE, eval=TRUE-------------------------------
knitr::opts_chunk$set(
  echo = TRUE,
  eval = TRUE,
  cache = TRUE,
  tidy = FALSE,
  warning = FALSE,
  error = FALSE
)

library(rlist)
library(data.table)
library(foreach)


## ----echo=FALSE---------------------------------------------------------------

# The good old original R code to run SIM that was used before cppSim

### this file contains the functions used to run the gravity model

cost_function <- function(d, beta, type = "exp") {
  # d is the od distance matrix, taking the exponential
  # means doing this operation for each individual element
  #  with an exponent beta
  # the type parameter allows to set it to either exp or pow.
  #  pow means we use a power function as cost, rather than exponential

  if (type == "exp") {
    exp(-beta * d)
  } else if (type == "pow") {
    d^(-beta)
  } else {
    print("provide a type of functino to compute")
  }
}

r_2 <- function(d, f) {
  cor(
    d |> as.numeric(),
    f |> as.numeric()
  )^2
}

r <- function(d, f) {
  cor(
    d |> as.numeric(),
    f |> as.numeric()
  )
}

rmse <- function(d, f) sum((d - f)^2)

calibration <- function(cost_fun, O, D, delta = 0.05) {
  B <- rep_len(1, nrow(cost_fun))
  eps <- abs(sum(B))
  e <- NULL
  i <- 0
  while ((eps > delta) & (i < 50)) {
    A_new <- 1 / (apply(cost_fun, function(x) sum(B * D * x), MARGIN = 1))
    B_new <- 1 / (apply(cost_fun, function(x) sum(A_new * O * x), MARGIN = 2))
    eps <- abs(sum(B_new - B))
    e <- append(e, eps)
    A <- A_new
    B <- B_new
    i <- i + 1
  }
  list(
    "A" = A,
    "B" = B,
    "e" = e
  )
}

run_model <- function(
    flows,
    distance,
    beta = 0.25,
    type = "exp"
    # ,cores = 3
    ) {
  F_c <- cost_function(d = {{ distance }}, beta = {{ beta }}, type = type)
  print("cost function computed")
  O <- apply(flows, sum, MARGIN = 1) |> as.integer()
  D <- apply(flows, sum, MARGIN = 2) |> as.integer()
  A_B <- calibration(
    cost_fun = F_c,
    O = O,
    D = D,
    delta = .001
  )
  print("calibration: over")
  A <- A_B$A
  B <- A_B$B

  flows_model <- foreach(
    j = c(1:nrow(F_c)),
    .combine = rbind
  ) %do% {
    round(A[j] * B * O[j] * D * F_c[j, ])
  }

  print("model run: over")
  e_sor <- e_sorensen(flows, flows_model) |> as.numeric()
  print(paste0("E_sor = ", e_sor))
  r2 <- r_2(flows_model, flows) |> as.numeric()
  print(paste0("r2 = ", r2))
  RMSE <- rmse(flows_model, flows) |> as.numeric()
  print(paste0("RMSE = ", RMSE))

  list(
    "values" = flows_model,
    "r2" = r2,
    "rmse" = RMSE,
    "calib" = A_B$e,
    "e_sor" = e_sor
  )
}

### Validation

e_sorensen <- function(data, fit) {
  2 * sum(apply(cbind(
    data |> c(),
    fit |> c()
  ), MARGIN = 1, FUN = min)) / (sum(data) + sum(fit))
}



## ----load_data, echo=FALSE----------------------------------------------------
library(cppSim)

data("distance_test")
data("flows_test")

distance_test <- (distance_test / 1000 / 14) |> round()


## -----------------------------------------------------------------------------
# creating the O, D vectors. 
O <- apply(flows_test, sum, MARGIN = 2) |> c()

D <- apply(flows_test, sum, MARGIN = 1) |> c()

## -----------------------------------------------------------------------------
F_c <- cost_function(distance_test,1,type = "exp")


## -----------------------------------------------------------------------------
beta_calib <- foreach::foreach(i = 28:33
                               ,.combine = rbind) %do% {
                                 beta <- 0.1*(i - 1)
                                 print(paste0("RUNNING MODEL FOR beta = ",beta))
                                 run <- run_model(flows = flows_test
                                                  ,distance = distance_test
                                                  ,beta = beta
                                                  ,type = "exp"
                                 )
                             
                                 cbind(beta, run$r2,run$rmse)
                               }


## -----------------------------------------------------------------------------

plot(beta_calib[,1]
     ,beta_calib[,2]
     ,xlab = "beta value"
     ,ylab = "quality of fit, r"
     ,main = "influence of beta on the goodness of fit"
     ,pch = 19
     ,cex = 0.5
     ,type = "b")



## -----------------------------------------------------------------------------

beta_best_fit <- beta_calib[which(beta_calib[,2] == max(beta_calib[,2])),1]
x <- seq_len(100)/20
plot(x
     ,exp(-beta_best_fit*x)
     ,main = "cost function"
     ,xlab = "distance, km"
     ,ylab = "decay factor"
     ,pch = 19
     ,cex = 0.5
     ,type = "l")


## -----------------------------------------------------------------------------

run_best_fit <- run_model(flows = flows_test
                 ,distance = distance_test
                 ,beta = beta_best_fit
                 ,type = "exp"
                 )


## -----------------------------------------------------------------------------

plot(seq_along(run_best_fit$calib)
     ,run_best_fit$calib
     ,xlab = "iteration"
     ,ylab = "error"
     ,main = "calibration of balancing factors"
     ,pch = 19
     ,cex = 0.5
     ,type = "b"
     
)



## -----------------------------------------------------------------------------
plot(flows_test
     ,run_best_fit$values
     ,ylab = "flows model"
     ,xlab = "flows"
     ,log = "xy"
     ,pch = 19
     ,cex = 0.5)
lines(seq_len(max(run_best_fit$values))
      ,seq_len(max(run_best_fit$values))
      ,col = "darkred"
      ,lwd = 2)


## -----------------------------------------------------------------------------

## MODEL USING THE GLM And POISSON DISTRIBUTION

# flows_london <- rlist::list.load("flows_london.rds")

data(flows_london)

flows_london <- flows_london

sample_od <- sample(unique(flows_london$workplace),100)

flows_grav <- flows_london[(workplace %in% sample_od) & (residence %in% sample_od),]

flows_grav[,O := sum(bike),by = from_id]

flows_grav[,D := sum(bike), by = to_id]

#

model <- glm(bike ~ workplace+residence+distance -1
             ,data = flows_grav
             ,family = poisson(link = "log")
             )

((model$fitted.values - flows_grav$bike)) |> hist(breaks = 100)

r2 <- r_2(flows_grav$bike,model$fitted.values)
r2


## -----------------------------------------------------------------------------

print("cost function:")
cost_function

print("calibration function:")
calibration

print("model run")
run_model

Any scripts or data that you put into this service are public.

cppSim documentation built on Sept. 9, 2025, 5:50 p.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

cppSim
Fast and Memory Efficient Spatial Interaction Models

inst/doc/gravity_model.R
In cppSim: Fast and Memory Efficient Spatial Interaction Models

Try the cppSim package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

cppSim Fast and Memory Efficient Spatial Interaction Models

inst/doc/gravity_model.R In cppSim: Fast and Memory Efficient Spatial Interaction Models

Try the cppSim package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

cppSim
Fast and Memory Efficient Spatial Interaction Models

inst/doc/gravity_model.R
In cppSim: Fast and Memory Efficient Spatial Interaction Models