#' Compute Prevalence
#'
#' Returns prevalence (P1) for each location. This represents proportion of travelers in a one-day travel survey sample with nonzero travel activity.
#'
#' @param ts A TravelSurvey object
#'
#' @return A data frame with P1 values across locations
#' @export
getPrevalence <- function(ts){
validObject(ts)
# Calculate proportion that are inactive (P0), subtract from 1
p1.df <- getDurationByMode(ts) %>%
group_by(location) %>%
summarise(n0 = sum(walk + cycle == 0), n = n()) %>%
mutate(p1 = 1 - n0/n) %>%
select(location, p1)
return(p1.df)
}
#' Compute Participation
#'
#' Returns participation (pi1) for each location. This represents proportion of the population that travel actively at minimum weekly.
#'
#' @param ts A TravelSurvey object
#'
#' @return A data frame with participation (pi1) values across locations
#' @export
getParticipation <- function(ts){
validObject(ts)
participation.df <- ts@location %>% select(location, participation) %>% dplyr::filter(!is.na(participation))
return(participation.df)
}
#' Compute Travel Activity Parameters
#'
#' Calculate and return a data frame of active travel frequencies across locations. This represents the probability that an active traveler is active on a given day. Estimated as the ratio between prevalence and participation, where prevalence is the proportion of travelers in a one-day travel survey sample with nonzero travel
#'
#' @param ts A TravelSurvey object
#'
#' @return A data frame with active travel frequency
#' @export
getFrequency <- function(ts){
validObject(ts)
# Calculate prevalence and participation
p1.df <- getPrevalence(ts)
pi1.df <- getParticipation(ts)
fAT.df <- full_join(p1.df, pi1.df, by = "location")
# Frequency = Prevalence / Participation
fAT.df <- fAT.df %>%
mutate( frequency = p1/participation ) %>%
mutate( frequency = ifelse(frequency > 1, 1, frequency) ) %>%
select(location, frequency)
return(fAT.df)
}
#' Compute Travel Activity
#'
#' @param ts A TravelSurvey object
#' @param sd.walk Standard deviation of distribution of walk activity (minutes)
#' @param sd.cycle Standard deviation of distribution of cycle activity (minutes)
#'
#' @return A data frame with location and total travel activity (MET hours/week)
#' @export
getTravelActivity <- function(ts, sd.walk = 0, sd.cycle = 0, activeTravelers = TRUE){
validObject(ts)
if( activeTravelers ){ ts.df <- getDurationByMode(ts) %>% dplyr::filter(walk + cycle > 0 ) }
else{ ts.df <- getDurationByMode(ts) }
# Account for error in duration reporting, scale by METs
ts.df <- ts.df %>%
mutate( walk = walk + rnorm(length(walk), mean = 0, sd = sd.walk), cycle = cycle + rnorm(length(cycle), mean = 0, sd = sd.cycle) ) %>%
mutate( dailyTA = 3*walk/60 + 6*cycle/60 ) %>%
select(location, houseID, subjectID, dailyTA)
# Scale from daily to weekly using frequency
ts.df <- full_join(ts.df, getFrequency(ts)) %>%
mutate( TA = 7*frequency*dailyTA ) %>%
dplyr::filter(!is.na(frequency)) %>%
select(location, houseID, subjectID, TA)
return(ts.df)
}
#' Compute Mean Total Activity
#'
#' Calculate intensity (conditional mean of active travelers) for each location. Within this distribution, return the mean and standard deviation for each location. User may differentiate between sample of only active travelers, or all travelers.
#'
#' @param ts A TravelSurvey object
#' @param activeTravelers Boolean demonstrating that the data reflect active travelers
#'
#' @return A data frame with travel activity mean and standard deviation for each location
#' @export
getIntensity <- function(ts, activeTravelers = TRUE){
# Get TA
TA.df <- getTravelActivity(ts, activeTravelers = activeTravelers)
# Calculate mean TA
ts.df <- TA.df %>%
group_by(location) %>%
summarise(mean = mean(TA), sd = sd(TA))
return(ts.df)
}
#' Compute Duration by Mode
#'
#' Computes the duration of travel, stratified by mode, given a TravelSurvey object
#'
#' @param ts A TravelSurvey object
#'
#' @return A data frame with duration by mode
#' @export
getDurationByMode <- function(ts){
ts.df <- full_join(ts@trip, full_join(ts@person, ts@house, by = "houseID"), by = c("houseID","subjectID"))
# Sum duration by mode
ts.df <- ts.df %>%
mutate( mode = fct_explicit_na(mode, na_level = "NA")) %>%
group_by(location, houseID, subjectID, mode) %>%
summarise(T = sum(duration)) %>%
spread(mode, T, fill = 0, drop = TRUE)
# Account for scenarios with no trips of a given mode after spread
if( !("walk" %in% unique(ts@trip$mode)) ){ ts.df <- ts.df %>% mutate( walk = 0 ) }
if( !("cycle" %in% unique(ts@trip$mode)) ){ ts.df <- ts.df %>% mutate( cycle = 0 ) }
if( !("other" %in% unique(ts@trip$mode)) ){ ts.df <- ts.df %>% mutate( other = 0 ) }
ts.df <- ts.df %>% select(location, houseID, subjectID, walk, cycle, other) %>% ungroup()
return(ts.df)
}
#' Compute Mean Duration by Mode
#'
#' For each houseID/subjectID, calculates the mean individual time spent across modes of transportation (walk, cycle, other). User may differentiate between sample of only active travelers, or all travelers.
#'
#' @param ts A TravelSurvey object
#' @param activeTravelers Boolean demonstrating that the data reflect active travelers
#'
#' @return A data frame with mean daily values for each mode of transportation (walk, cycle, other)
#' @export
getMeans <- function(ts, activeTravelers = TRUE){
validObject(ts)
if( !activeTravelers ){ ts.df <- getDurationByMode(ts) }
else{ ts.df <- getDurationByMode(ts) %>% dplyr::filter(walk + cycle > 0 ) }
daily.df <- ts.df %>%
select(location, houseID, subjectID, walk, cycle, other) %>%
group_by(location) %>%
summarise(walk = mean(walk), cycle = mean(cycle), other = mean(other))
return(daily.df)
}
#' Compute Mean Trips
#'
#' For each houseID/subjectID, calculates the mean individual number of trips across modes of transportation (walk, cycle, other). User may differentiate between sample of only active travelers, or all travelers.
#'
#' @param ts A TravelSurvey object
#'
#' @return A data frame with mean daily values for each mode of transportation (walk, cycle, other)
#' @export
getTrips <- function(ts){
validObject(ts)
ts.df <- left_join(ts@trip, ts@house, by = "houseID") # merge location (and year) into trip data
ts.df <- ts.df %>%
dplyr::filter(mode %in% c("walk", "cycle", "other")) %>%
group_by(location, houseID, subjectID, mode) %>%
summarise(count = n()) %>%
spread(mode, count, fill = 0, drop = TRUE) %>%
select(location, houseID, subjectID, walk, cycle, other) %>%
group_by(location) %>%
summarise(walk = mean(walk), cycle = mean(cycle), other = mean(other))
return(ts.df)
}
#' Compute the proportion of the population that is meeting physical activity guidelines through active transport
#'
#' @param ts A TravelSurvey object
#' @param minimum The minimum activity threshold of MET-hrs/week that persons should meet in order to avoid health risks. Default is 7.5 based on LTPA recommendation.
#' @param byLocation Boolean dictating whether proportion returned will be singular value for overall TS object, or values for each location
#'
#' @return Percentage of sample meeting minimum activity. If stratified by location, a dataframe with columns location and proportion. If not, a singular value.
#' @export
getProportion <- function(ts, minimum = 7.5, byLocation = TRUE){
# Calculate travel activity, find proportion above activity level stratified either by location or overall TS object
if( byLocation ){ results <- getTravelActivity(ts) %>% group_by(location) %>% summarise( proportion = length(TA[TA >= minimum])/length(TA) ) }
else{
activity <- getTravelActivity(ts)
results <- length(activity$TA[which(activity$TA >= minimum)])/length(activity$TA)
}
return(results)
}
#' Computes outliers from TravelSurvey object for given parameter
#'
#' This function reports outliers from TravelSurvey object based on the IQR of the given parameter. See boxplot.stats().
#'
#' @param ts TravelSurvey object
#' @param parameter Parameter for which to calculate outliers in given TravelSurvey object. Options are: "participation", "frequency", "intensity", "TA" (Travel Activity).
#'
#' @return A data frame of outliers corresponding to the given parameter.
#' @export
getOutliers <- function(ts, parameter){
# Intialise additional variable to account for intensity/mean variable names
parameter.m <- parameter
# Use HOT functions to calculate values of given parameter
if( parameter == "participation" ){ df <- getParticipation(ts) }
else if( parameter == "frequency" ){ df <- getFrequency(ts) }
else if( parameter == "intensity" ){ df <- getIntensity(ts) ; parameter <- "mean" }
else if( parameter == "TA" ){ df <- getTravelActivity(ts) }
else{ message("Problem with parameter argument: ", parameter) }
# Compute outliers and return subset, if any
outliers <- boxplot.stats(df[[parameter]])$out
if( length(outliers) > 0 ){
index <- which(df[[parameter]] %in% outliers)
results <- df[index,]
}else{ message("No outliers for parameter: ", parameter.m) }
return(results)
}
#' Removes outliers from TravelSurvey object
#'
#' This function removes outliers from TravelSurvey object based on given lower/upper bound arguments.
#'
#' @param ts TravelSurvey object
#'
#' @return A TravelSurvey object.
#' @export
removeOutliers <- function(ts, lower = 1, upper = 50){
# Calculate TA
TA.df <- getTravelActivity(ts, sd.walk = 0, sd.cycle = 0, activeTravelers = FALSE)
# Create data frames of TS values outside given lower/upper bounds
zero.df <- subset(TA.df, TA < lower)
exclude.df <- subset(TA.df, TA > upper)
# Re-initialize TS slots, excluding those TA values outside given upper/lower bounds
person <- anti_join(ts@person, exclude.df, by = c("houseID","subjectID")) %>% select(houseID, subjectID, sex, age)
trip <- anti_join(ts@trip, exclude.df, by = c("houseID","subjectID")) %>% select(houseID, subjectID, duration, mode)
trip_nonzero <- anti_join(trip, zero.df, by = c("houseID","subjectID")) %>% select(houseID, subjectID, duration, mode)
trip_zero <- inner_join(trip, zero.df, by = c("houseID","subjectID")) %>% select(houseID, subjectID, duration, mode)
trip_zero <- within(trip_zero, duration <- ifelse(mode != "other", 0, duration))
trip <- rbind(trip_nonzero, trip_zero)
# Re-initialize TS object
ts2 <- initialize(ts, house = ts@house, person = person, trip = trip, location = ts@location)
return(ts2)
}
#' Return the sample size
#'
#' Return the sample size of a TravelSurvey object stratified by location. Both the number of houses and subjects is returned.
#'
#' @param ts TravelSurvey object
#'
#' @return A tibble containing the sample size by location
#' @export
getSampleSize <- function(ts){
# Calculate individual and household-level sample sizes
n.person.df <- inner_join(ts@house, ts@person, by = "houseID") %>% group_by(location) %>% summarise(person = n())
n.house.df <- ts@house %>% group_by(location) %>% summarise(house = n())
ss.df <- full_join(n.person.df, n.house.df, by = "location")
return(ss.df)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.