Nothing
## ----setup, echo=FALSE, include=TRUE, eval=TRUE-------------------------------
knitr::opts_chunk$set(
echo = TRUE,
eval = TRUE,
cache = TRUE,
tidy = FALSE,
warning = FALSE,
error = FALSE
)
library(rlist)
library(data.table)
library(foreach)
## ----echo=FALSE---------------------------------------------------------------
# The good old original R code to run SIM that was used before cppSim
### this file contains the functions used to run the gravity model
cost_function <- function(d, beta, type = "exp") {
# d is the od distance matrix, taking the exponential
# means doing this operation for each individual element
# with an exponent beta
# the type parameter allows to set it to either exp or pow.
# pow means we use a power function as cost, rather than exponential
if (type == "exp") {
exp(-beta * d)
} else if (type == "pow") {
d^(-beta)
} else {
print("provide a type of functino to compute")
}
}
r_2 <- function(d, f) {
cor(
d |> as.numeric(),
f |> as.numeric()
)^2
}
r <- function(d, f) {
cor(
d |> as.numeric(),
f |> as.numeric()
)
}
rmse <- function(d, f) sum((d - f)^2)
calibration <- function(cost_fun, O, D, delta = 0.05) {
B <- rep_len(1, nrow(cost_fun))
eps <- abs(sum(B))
e <- NULL
i <- 0
while ((eps > delta) & (i < 50)) {
A_new <- 1 / (apply(cost_fun, function(x) sum(B * D * x), MARGIN = 1))
B_new <- 1 / (apply(cost_fun, function(x) sum(A_new * O * x), MARGIN = 2))
eps <- abs(sum(B_new - B))
e <- append(e, eps)
A <- A_new
B <- B_new
i <- i + 1
}
list(
"A" = A,
"B" = B,
"e" = e
)
}
run_model <- function(
flows,
distance,
beta = 0.25,
type = "exp"
# ,cores = 3
) {
F_c <- cost_function(d = {{ distance }}, beta = {{ beta }}, type = type)
print("cost function computed")
O <- apply(flows, sum, MARGIN = 1) |> as.integer()
D <- apply(flows, sum, MARGIN = 2) |> as.integer()
A_B <- calibration(
cost_fun = F_c,
O = O,
D = D,
delta = .001
)
print("calibration: over")
A <- A_B$A
B <- A_B$B
flows_model <- foreach(
j = c(1:nrow(F_c)),
.combine = rbind
) %do% {
round(A[j] * B * O[j] * D * F_c[j, ])
}
print("model run: over")
e_sor <- e_sorensen(flows, flows_model) |> as.numeric()
print(paste0("E_sor = ", e_sor))
r2 <- r_2(flows_model, flows) |> as.numeric()
print(paste0("r2 = ", r2))
RMSE <- rmse(flows_model, flows) |> as.numeric()
print(paste0("RMSE = ", RMSE))
list(
"values" = flows_model,
"r2" = r2,
"rmse" = RMSE,
"calib" = A_B$e,
"e_sor" = e_sor
)
}
### Validation
e_sorensen <- function(data, fit) {
2 * sum(apply(cbind(
data |> c(),
fit |> c()
), MARGIN = 1, FUN = min)) / (sum(data) + sum(fit))
}
## ----load_data, echo=FALSE----------------------------------------------------
library(cppSim)
data("distance_test")
data("flows_test")
distance_test <- (distance_test / 1000 / 14) |> round()
## -----------------------------------------------------------------------------
# creating the O, D vectors.
O <- apply(flows_test, sum, MARGIN = 2) |> c()
D <- apply(flows_test, sum, MARGIN = 1) |> c()
## -----------------------------------------------------------------------------
F_c <- cost_function(distance_test,1,type = "exp")
## -----------------------------------------------------------------------------
beta_calib <- foreach::foreach(i = 28:33
,.combine = rbind) %do% {
beta <- 0.1*(i - 1)
print(paste0("RUNNING MODEL FOR beta = ",beta))
run <- run_model(flows = flows_test
,distance = distance_test
,beta = beta
,type = "exp"
)
cbind(beta, run$r2,run$rmse)
}
## -----------------------------------------------------------------------------
plot(beta_calib[,1]
,beta_calib[,2]
,xlab = "beta value"
,ylab = "quality of fit, r"
,main = "influence of beta on the goodness of fit"
,pch = 19
,cex = 0.5
,type = "b")
## -----------------------------------------------------------------------------
beta_best_fit <- beta_calib[which(beta_calib[,2] == max(beta_calib[,2])),1]
x <- seq_len(100)/20
plot(x
,exp(-beta_best_fit*x)
,main = "cost function"
,xlab = "distance, km"
,ylab = "decay factor"
,pch = 19
,cex = 0.5
,type = "l")
## -----------------------------------------------------------------------------
run_best_fit <- run_model(flows = flows_test
,distance = distance_test
,beta = beta_best_fit
,type = "exp"
)
## -----------------------------------------------------------------------------
plot(seq_along(run_best_fit$calib)
,run_best_fit$calib
,xlab = "iteration"
,ylab = "error"
,main = "calibration of balancing factors"
,pch = 19
,cex = 0.5
,type = "b"
)
## -----------------------------------------------------------------------------
plot(flows_test
,run_best_fit$values
,ylab = "flows model"
,xlab = "flows"
,log = "xy"
,pch = 19
,cex = 0.5)
lines(seq_len(max(run_best_fit$values))
,seq_len(max(run_best_fit$values))
,col = "darkred"
,lwd = 2)
## -----------------------------------------------------------------------------
## MODEL USING THE GLM And POISSON DISTRIBUTION
# flows_london <- rlist::list.load("flows_london.rds")
data(flows_london)
flows_london <- flows_london
sample_od <- sample(unique(flows_london$workplace),100)
flows_grav <- flows_london[(workplace %in% sample_od) & (residence %in% sample_od),]
flows_grav[,O := sum(bike),by = from_id]
flows_grav[,D := sum(bike), by = to_id]
#
model <- glm(bike ~ workplace+residence+distance -1
,data = flows_grav
,family = poisson(link = "log")
)
((model$fitted.values - flows_grav$bike)) |> hist(breaks = 100)
r2 <- r_2(flows_grav$bike,model$fitted.values)
r2
## -----------------------------------------------------------------------------
print("cost function:")
cost_function
print("calibration function:")
calibration
print("model run")
run_model
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.