library(mobility) library(dplyr) library(reshape2) knitr::opts_chunk$set( collapse = TRUE, comment = "#>", autodep=TRUE )
There are many sources of travel data that researchers wish to fit models to. So, we have designed a generalized data frame template to standardize travel data from various sources into a long-form format that is compatible with the modeling and simulation tools in this package. The travel_data_sim()
object contains a simulated example to illustrate the structure of the data. This example data set contains simulated values of location information and observed number of trips among origin and destination locations and within home locations. The travel_data_template()
object is an empty template that can be populated from scratch.
Since the long-form data structure is designed to accomodate different types of data, some columns may be left blank. For example, in a travel survey the rows may represent indivdiduals compared with call data records where the rows may represent total trip counts for an origin and destination.
In terms of spatial data, if your data contain coordinate locations down to administrative level 3, then level 4 and 5 can be left blank and the functions will ignore them. Likewise, if all administrative units are in the same country, then admin_0
can be left blank.
str(travel_data_sim)
tab <- matrix( c('date_start', 'date', 'beginning of the time interval for the trip count', 'date_stop', 'date', 'end of the time interval for the trip count', 'date_span', 'integer','time span in days', 'indiv_id', 'numeric', 'unique individual identifier', 'indiv_age', 'numeric', 'age of participant', 'indiv_sex', 'logical', 'gender of participant', 'indiv_type', 'character', 'if individual participants belong to different groups', 'orig_adm0', 'character', 'name of highest administration level of origin location (Country)', 'orig_adm1', 'character', 'name of administration level 1 of origin location (e.g. Division, State)', 'orig_adm2', 'character', 'name of administration level 2 of origin location (e.g. District, County)', 'orig_adm3', 'character', 'name of administration level 3 of origin location (e.g. Sub-district, Province)', 'orig_adm4', 'character', 'name of administration level 4 of origin location (e.g. City, Municipality)', 'orig_adm5', 'character', 'name of administration level 5 of origin location (e.g. Town, Village, Community, Ward)', 'orig_type', 'character', 'administrative type for the origin location (e.g. sub-district, community vs town, or urban vs rural)', 'orig_x', 'numeric', 'longitude of origin location centroid in decimal degrees (centroid of smallest admin unit', 'orig_y', 'numeric', 'latitude of origin location centroid in decimal degrees (centroid of smallest admin unit)', 'orig_pop', 'numeric', 'population size of lowest administrative unit for origin location', 'dest_adm0', 'character', 'name of highest administration level of destination location (Country)', 'dest_adm1', 'character', 'name of administration level 1 of destination location (e.g. Division, State)', 'dest_adm2', 'character', 'name of administration level 2 of destination location (e.g. District, County)', 'dest_adm3', 'character', 'name of administration level 3 of destination location (e.g. Sub-district, Province)', 'dest_adm4', 'character', 'name of administration level 4 of destination location (e.g. City, Municipality)', 'dest_adm5', 'character', 'name of administration level 5 of destination location (e.g. Town, Village, Community, Ward)', 'dest_type', 'character', 'administrative type for the destination location (e.g. sub-district, community vs town, or urban vs rural)', 'dest_x', 'numeric', 'longitude of destination location in decimal degrees (centroid of smallest admin unit)', 'dest_y', 'numeric', 'latitude of destination location centroid in decimal degrees (centroid of smallest admin unit)', 'dest_pop', 'numeric', 'population size of lowest administrative unit for destination location', 'trips', 'numeric', 'total number of observed trips made from origin to destination during time span' ), ncol=3, byrow=TRUE) colnames(tab) <- c('Variable', 'Class', 'Description') knitr::kable(tab, "html") #%>% kableExtra::row_spec(1:nrow(tab), color='black', background = 'white')
This data template can be populated by starting with the travel_data_template
object and adding rows. The code below starts by adding information on trips from an origin to a destination.
# Travel among some locations trip <- travel_data_template n <- 30 # number of locations trip[1:n,] <- NA # add rows for each location # Time span of travel data trip$date_start <- as.Date("2020-01-01") trip$date_stop <- trip$date_start + 7 trip$date_span <- difftime(trip$date_stop, trip$date_start, units='days') # Origin info: some counties within the same state trip$orig_adm0 <- trip$dest_adm0 <- 'A' # Country trip$orig_adm1 <- trip$dest_adm1 <- 'B' # State trip$orig_adm2 <- sample(LETTERS, n, replace=T) trip$dest_adm2 <- sample(LETTERS, n, replace=T) trip$orig_type <- trip$dest_type <- 'County' # Type of admin unit for lowest admin level # Some fake coordinates in decimal degrees trip$orig_x <- rnorm(n, -90, 2) trip$orig_y <- rnorm(n, 30, 1) trip$dest_x <- rnorm(n, -90, 2) trip$dest_y <- rnorm(n, 30, 1) # Population sizes of the origins and destinations trip$orig_pop <- rnbinom(n, size=5, mu=5000) trip$dest_pop <- rnbinom(n, size=10, mu=10000) trip$trips <- rnbinom(n, size=1, mu=100) # Number of reported trips trip <- trip[!(trip$orig_adm2 == trip$dest_adm2),]
In some cases it may be easier to fill in stays (the number of trips within the origin or home location) in a different data frame and then merge the two.
# Stays in home location stay <- travel_data_template origins <- unique(c(trip$orig_adm2, trip$orig_adm2)) # all the stay[1:length(origins),] <- NA # Time span of travel survey stay$date_start <- trip$date_start[1] stay$date_stop <- trip$date_stop[1] stay$date_span <- difftime(stay$date_stop, stay$date_start, units='days') stay$orig_adm0 <- stay$dest_adm0 <- 'A' # Country stay$orig_adm1 <- stay$dest_adm1 <- 'B' # State stay$orig_adm2 <- stay$dest_adm2 <- origins stay$orig_type <- stay$dest_type <- 'County' for (i in 1:length(origins)) { sel <- which(trip$orig_adm2 == stay$orig_adm2[i])[1] stay$orig_x[i] <- stay$dest_x[i] <- trip$orig_x[sel] stay$orig_y[i] <- stay$dest_y[i] <- trip$orig_y[sel] stay$orig_pop[i] <- stay$dest_pop[i] <- trip$orig_pop[sel] } # Number of reported trip within home county stay$trips <- rnbinom(length(origins), size=10, mu=1000) # Combine trips and stays suppressMessages( travel_data <- dplyr::full_join(trip, stay) ) head(travel_data, n=3)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.