library(mobility)
library(dplyr)
library(reshape2)

knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>",
  autodep=TRUE
)

There are many sources of travel data that researchers wish to fit models to. So, we have designed a generalized data frame template to standardize travel data from various sources into a long-form format that is compatible with the modeling and simulation tools in this package. The travel_data_sim() object contains a simulated example to illustrate the structure of the data. This example data set contains simulated values of location information and observed number of trips among origin and destination locations and within home locations. The travel_data_template() object is an empty template that can be populated from scratch.

Since the long-form data structure is designed to accomodate different types of data, some columns may be left blank. For example, in a travel survey the rows may represent indivdiduals compared with call data records where the rows may represent total trip counts for an origin and destination.

In terms of spatial data, if your data contain coordinate locations down to administrative level 3, then level 4 and 5 can be left blank and the functions will ignore them. Likewise, if all administrative units are in the same country, then admin_0 can be left blank.

A simulated example

str(travel_data_sim)

Detailed variable descriptions

tab <- matrix(
  c('date_start', 'date', 'beginning of the time interval for the trip count',
    'date_stop', 'date', 'end of the time interval for the trip count',
    'date_span', 'integer','time span in days',
    'indiv_id', 'numeric', 'unique individual identifier',
    'indiv_age', 'numeric',  'age of participant',
    'indiv_sex', 'logical', 'gender of participant',
    'indiv_type', 'character', 'if individual participants belong to different groups',
    'orig_adm0', 'character', 'name of highest administration level of origin location (Country)',
    'orig_adm1', 'character', 'name of administration level 1 of origin location (e.g. Division, State)',
    'orig_adm2', 'character', 'name of administration level 2 of origin location (e.g. District, County)',
    'orig_adm3', 'character', 'name of administration level 3 of origin location (e.g. Sub-district, Province)',
    'orig_adm4', 'character', 'name of administration level 4 of origin location (e.g. City, Municipality)',
    'orig_adm5', 'character', 'name of administration level 5 of origin location (e.g. Town, Village, Community, Ward)',
    'orig_type', 'character', 'administrative type for the origin location (e.g. sub-district, community vs town, or urban vs rural)',
    'orig_x', 'numeric', 'longitude of origin location centroid in decimal degrees (centroid of smallest admin unit',
    'orig_y', 'numeric', 'latitude of origin location centroid in decimal degrees (centroid of smallest admin unit)',
    'orig_pop', 'numeric', 'population size of lowest administrative unit for origin location',
    'dest_adm0', 'character', 'name of highest administration level of destination location (Country)',
    'dest_adm1', 'character', 'name of administration level 1 of destination location (e.g. Division, State)',
    'dest_adm2', 'character', 'name of administration level 2 of destination location (e.g. District, County)',
    'dest_adm3', 'character', 'name of administration level 3 of destination location (e.g. Sub-district, Province)',
    'dest_adm4', 'character', 'name of administration level 4 of destination location (e.g. City, Municipality)',
    'dest_adm5', 'character', 'name of administration level 5 of destination location (e.g. Town, Village, Community, Ward)',
    'dest_type', 'character', 'administrative type for the destination location (e.g. sub-district, community vs town, or urban vs rural)',
    'dest_x', 'numeric', 'longitude of destination location in decimal degrees (centroid of smallest admin unit)',
    'dest_y', 'numeric', 'latitude of destination location centroid in decimal degrees (centroid of smallest admin unit)',
    'dest_pop', 'numeric', 'population size of lowest administrative unit for destination location',
    'trips', 'numeric', 'total number of observed trips made from origin to destination during time span'
  ),
  ncol=3,
  byrow=TRUE)

colnames(tab) <- c('Variable', 'Class', 'Description')

knitr::kable(tab, "html") 
#%>% kableExtra::row_spec(1:nrow(tab), color='black', background = 'white')

Populating a travel data template from scratch

This data template can be populated by starting with the travel_data_template object and adding rows. The code below starts by adding information on trips from an origin to a destination.

# Travel among some locations
trip <- travel_data_template

n <- 30 # number of locations
trip[1:n,] <- NA # add rows for each location

# Time span of travel data
trip$date_start <- as.Date("2020-01-01")
trip$date_stop <- trip$date_start + 7
trip$date_span <- difftime(trip$date_stop, trip$date_start, units='days')

# Origin info: some counties within the same state
trip$orig_adm0 <- trip$dest_adm0 <- 'A' # Country
trip$orig_adm1 <- trip$dest_adm1 <- 'B' # State
trip$orig_adm2 <- sample(LETTERS, n, replace=T)
trip$dest_adm2 <- sample(LETTERS, n, replace=T)
trip$orig_type <- trip$dest_type <- 'County' # Type of admin unit for lowest admin level

# Some fake coordinates in decimal degrees
trip$orig_x <- rnorm(n, -90, 2)
trip$orig_y <- rnorm(n, 30, 1)
trip$dest_x <- rnorm(n, -90, 2)
trip$dest_y <- rnorm(n, 30, 1)

# Population sizes of the origins and destinations
trip$orig_pop <- rnbinom(n, size=5, mu=5000)
trip$dest_pop <- rnbinom(n, size=10, mu=10000)

trip$trips <- rnbinom(n, size=1, mu=100) # Number of reported trips
trip <- trip[!(trip$orig_adm2 == trip$dest_adm2),]

In some cases it may be easier to fill in stays (the number of trips within the origin or home location) in a different data frame and then merge the two.

# Stays in home location
stay <- travel_data_template
origins <- unique(c(trip$orig_adm2, trip$orig_adm2)) # all the
stay[1:length(origins),] <- NA

# Time span of travel survey
stay$date_start <- trip$date_start[1]
stay$date_stop <- trip$date_stop[1]
stay$date_span <- difftime(stay$date_stop, stay$date_start, units='days')

stay$orig_adm0 <- stay$dest_adm0 <- 'A' # Country
stay$orig_adm1 <- stay$dest_adm1 <- 'B' # State
stay$orig_adm2 <- stay$dest_adm2 <- origins
stay$orig_type <- stay$dest_type <- 'County'

for (i in 1:length(origins)) {

  sel <- which(trip$orig_adm2 == stay$orig_adm2[i])[1]
  stay$orig_x[i] <- stay$dest_x[i] <- trip$orig_x[sel]
  stay$orig_y[i] <- stay$dest_y[i] <- trip$orig_y[sel]
  stay$orig_pop[i] <- stay$dest_pop[i] <- trip$orig_pop[sel]
}

# Number of reported trip within home county
stay$trips <- rnbinom(length(origins), size=10, mu=1000)

# Combine trips and stays
suppressMessages(
  travel_data <- dplyr::full_join(trip, stay)
)

head(travel_data, n=3)


COVID-19-Mobility-Data-Network/mobility documentation built on Nov. 22, 2021, 12:17 a.m.