HOT: The Health-Oriented Transportation (HOT) Model

#' Compute Prevalence
#'
#' Returns prevalence (P1) for each location. This represents proportion of travelers in a one-day travel survey sample with nonzero travel activity.
#'
#' @param ts A TravelSurvey object
#'
#' @return A data frame with P1 values across locations
#' @export
getPrevalence <- function(ts){
  validObject(ts)

  # Calculate proportion that are inactive (P0), subtract from 1
  p1.df <- getDurationByMode(ts) %>%
    group_by(location) %>%
    summarise(n0 = sum(walk + cycle == 0), n = n()) %>%
    mutate(p1 = 1 - n0/n) %>%
    select(location, p1)

  return(p1.df)
}
#' Compute Participation
#'
#' Returns participation (pi1) for each location. This represents proportion of the population that travel actively at minimum weekly.
#'
#' @param ts A TravelSurvey object
#'
#' @return A data frame with participation (pi1) values across locations
#' @export
getParticipation <- function(ts){
  validObject(ts)

  participation.df <- ts@location %>% select(location, participation) %>% dplyr::filter(!is.na(participation))
  return(participation.df)
}
#' Compute Travel Activity Parameters
#'
#' Calculate and return a data frame of active travel frequencies across locations. This represents the probability that an active traveler is active on a given day. Estimated as the ratio between prevalence and participation, where prevalence is the proportion of travelers in a one-day travel survey sample with nonzero travel
#'
#' @param ts A TravelSurvey object
#'
#' @return A data frame with active travel frequency
#' @export
getFrequency <- function(ts){
  validObject(ts)

  # Calculate prevalence and participation
  p1.df <- getPrevalence(ts)
  pi1.df <- getParticipation(ts)
  fAT.df <- full_join(p1.df, pi1.df, by = "location")

  # Frequency = Prevalence / Participation
  fAT.df <- fAT.df %>%
    mutate( frequency = p1/participation ) %>%
    mutate( frequency = ifelse(frequency > 1, 1, frequency) ) %>%
    select(location, frequency)

  return(fAT.df)
}
#' Compute Travel Activity
#'
#' @param ts A TravelSurvey object
#' @param sd.walk Standard deviation of distribution of walk activity (minutes)
#' @param sd.cycle Standard deviation of distribution of cycle activity (minutes)
#'
#' @return A data frame with location and total travel activity (MET hours/week)
#' @export
getTravelActivity <- function(ts, sd.walk = 0, sd.cycle = 0, activeTravelers = TRUE){
  validObject(ts)

  if( activeTravelers ){ ts.df <- getDurationByMode(ts) %>% dplyr::filter(walk + cycle > 0 ) }
  else{ ts.df <- getDurationByMode(ts) }

  # Account for error in duration reporting, scale by METs
  ts.df <- ts.df %>%
    mutate( walk = walk + rnorm(length(walk), mean = 0, sd = sd.walk), cycle = cycle + rnorm(length(cycle), mean = 0, sd = sd.cycle) ) %>%
    mutate( dailyTA = 3*walk/60 + 6*cycle/60 ) %>%
    select(location, houseID, subjectID, dailyTA)

  # Scale from daily to weekly using frequency
  ts.df <- full_join(ts.df, getFrequency(ts)) %>%
    mutate( TA = 7*frequency*dailyTA ) %>%
    dplyr::filter(!is.na(frequency)) %>%
    select(location, houseID, subjectID, TA)

  return(ts.df)
}
#' Compute Mean Total Activity
#'
#' Calculate intensity (conditional mean of active travelers) for each location. Within this distribution, return the mean and standard deviation for each location. User may differentiate between sample of only active travelers, or all travelers.
#'
#' @param ts A TravelSurvey object
#' @param activeTravelers Boolean demonstrating that the data reflect active travelers
#'
#' @return A data frame with travel activity mean and standard deviation for each location
#' @export
getIntensity <- function(ts, activeTravelers = TRUE){

  # Get TA
  TA.df <- getTravelActivity(ts, activeTravelers = activeTravelers)

  # Calculate mean TA
  ts.df <- TA.df %>%
    group_by(location) %>%
    summarise(mean = mean(TA), sd = sd(TA))

  return(ts.df)
}
#' Compute Duration by Mode
#'
#' Computes the duration of travel, stratified by mode, given a TravelSurvey object
#'
#' @param ts A TravelSurvey object
#'
#' @return A data frame with duration by mode
#' @export
getDurationByMode <- function(ts){

  ts.df <- full_join(ts@trip, full_join(ts@person, ts@house, by = "houseID"), by = c("houseID","subjectID"))

  # Sum duration by mode
  ts.df <- ts.df %>%
    mutate( mode = fct_explicit_na(mode, na_level = "NA")) %>%
    group_by(location, houseID, subjectID, mode) %>%
    summarise(T = sum(duration)) %>%
    spread(mode, T, fill = 0, drop = TRUE)

  # Account for scenarios with no trips of a given mode after spread
  if( !("walk" %in% unique(ts@trip$mode)) ){ ts.df <- ts.df %>% mutate( walk = 0 ) }
  if( !("cycle" %in% unique(ts@trip$mode)) ){ ts.df <- ts.df %>% mutate( cycle = 0 ) }
  if( !("other" %in% unique(ts@trip$mode)) ){ ts.df <- ts.df %>% mutate( other = 0 ) }

  ts.df <- ts.df %>% select(location, houseID, subjectID, walk, cycle, other) %>% ungroup()

  return(ts.df)
}
#' Compute Mean Duration by Mode
#'
#' For each houseID/subjectID, calculates the mean individual time spent across modes of transportation (walk, cycle, other). User may differentiate between sample of only active travelers, or all travelers.
#'
#' @param ts A TravelSurvey object
#' @param activeTravelers Boolean demonstrating that the data reflect active travelers
#'
#' @return A data frame with mean daily values for each mode of transportation (walk, cycle, other)
#' @export
getMeans <- function(ts, activeTravelers = TRUE){
  validObject(ts)

  if( !activeTravelers ){ ts.df <- getDurationByMode(ts) }
  else{ ts.df <- getDurationByMode(ts) %>% dplyr::filter(walk + cycle > 0 ) }

  daily.df <- ts.df %>%
    select(location, houseID, subjectID, walk, cycle, other) %>%
    group_by(location) %>%
    summarise(walk = mean(walk), cycle = mean(cycle), other = mean(other))

  return(daily.df)
}
#' Compute Mean Trips
#'
#' For each houseID/subjectID, calculates the mean individual number of trips across modes of transportation (walk, cycle, other). User may differentiate between sample of only active travelers, or all travelers.
#'
#' @param ts A TravelSurvey object
#'
#' @return A data frame with mean daily values for each mode of transportation (walk, cycle, other)
#' @export
getTrips <- function(ts){
  validObject(ts)

  ts.df <- left_join(ts@trip, ts@house, by = "houseID") # merge location (and year) into trip data

  ts.df <- ts.df %>%
    dplyr::filter(mode %in% c("walk", "cycle", "other")) %>%
    group_by(location, houseID, subjectID, mode) %>%
    summarise(count = n()) %>%
    spread(mode, count, fill = 0, drop = TRUE) %>%
    select(location, houseID, subjectID, walk, cycle, other) %>%
    group_by(location) %>%
    summarise(walk = mean(walk), cycle = mean(cycle), other = mean(other))

  return(ts.df)
}
#' Compute the proportion of the population that is meeting physical activity guidelines through active transport
#'
#' @param ts A TravelSurvey object
#' @param minimum The minimum activity threshold of MET-hrs/week that persons should meet in order to avoid health risks. Default is 7.5 based on LTPA recommendation.
#' @param byLocation Boolean dictating whether proportion returned will be singular value for overall TS object, or values for each location
#'
#' @return Percentage of sample meeting minimum activity. If stratified by location, a dataframe with columns location and proportion. If not, a singular value.
#' @export
getProportion <- function(ts, minimum = 7.5, byLocation = TRUE){

  # Calculate travel activity, find proportion above activity level stratified either by location or overall TS object
  if( byLocation ){ results <- getTravelActivity(ts) %>% group_by(location) %>% summarise( proportion = length(TA[TA >= minimum])/length(TA) ) }
  else{
    activity <- getTravelActivity(ts)
    results <- length(activity$TA[which(activity$TA >= minimum)])/length(activity$TA)
  }

  return(results)

}
#' Computes outliers from TravelSurvey object for given parameter
#'
#' This function reports outliers from TravelSurvey object based on the IQR of the given parameter.  See boxplot.stats().
#'
#' @param ts TravelSurvey object
#' @param parameter Parameter for which to calculate outliers in given TravelSurvey object. Options are: "participation", "frequency", "intensity", "TA" (Travel Activity).
#'
#' @return A data frame of outliers corresponding to the given parameter.
#' @export
getOutliers <- function(ts, parameter){

  # Intialise additional variable to account for intensity/mean variable names
  parameter.m <- parameter

  # Use HOT functions to calculate values of given parameter
  if( parameter == "participation" ){ df <- getParticipation(ts) }
  else if( parameter == "frequency" ){ df <- getFrequency(ts) }
  else if( parameter == "intensity" ){ df <- getIntensity(ts) ; parameter <- "mean" }
  else if( parameter == "TA" ){ df <- getTravelActivity(ts) }
  else{ message("Problem with parameter argument: ", parameter) }

  # Compute outliers and return subset, if any
  outliers <- boxplot.stats(df[[parameter]])$out
  if( length(outliers) > 0 ){
    index <- which(df[[parameter]] %in% outliers)
    results <- df[index,]
  }else{ message("No outliers for parameter: ", parameter.m) }

  return(results)
}
#' Removes outliers from TravelSurvey object
#'
#' This function removes outliers from TravelSurvey object based on given lower/upper bound arguments.
#'
#' @param ts TravelSurvey object
#'
#' @return A TravelSurvey object.
#' @export
removeOutliers <- function(ts, lower = 1, upper = 50){

  # Calculate TA
  TA.df <- getTravelActivity(ts, sd.walk = 0, sd.cycle = 0, activeTravelers = FALSE)

  # Create data frames of TS values outside given lower/upper bounds
  zero.df <- subset(TA.df, TA < lower)
  exclude.df <- subset(TA.df, TA > upper)

  # Re-initialize TS slots, excluding those TA values outside given upper/lower bounds
  person <- anti_join(ts@person, exclude.df, by = c("houseID","subjectID")) %>% select(houseID, subjectID, sex, age)
  trip <- anti_join(ts@trip, exclude.df, by = c("houseID","subjectID")) %>% select(houseID, subjectID, duration, mode)
  trip_nonzero <- anti_join(trip, zero.df, by = c("houseID","subjectID")) %>% select(houseID, subjectID, duration, mode)
  trip_zero <- inner_join(trip, zero.df, by = c("houseID","subjectID")) %>% select(houseID, subjectID, duration, mode)
  trip_zero <- within(trip_zero, duration <- ifelse(mode != "other", 0, duration))
  trip <- rbind(trip_nonzero, trip_zero)

  # Re-initialize TS object
  ts2 <- initialize(ts, house = ts@house, person = person, trip = trip, location = ts@location)
  return(ts2)
}
#' Return the sample size
#'
#' Return the sample size of a TravelSurvey object stratified by location. Both the number of houses and subjects is returned.
#'
#' @param ts TravelSurvey object
#'
#' @return A tibble containing the sample size by location
#' @export
getSampleSize <- function(ts){

  # Calculate individual and household-level sample sizes
  n.person.df <- inner_join(ts@house, ts@person, by = "houseID") %>% group_by(location) %>% summarise(person = n())
  n.house.df <- ts@house %>% group_by(location) %>% summarise(house = n())
  ss.df <- full_join(n.person.df, n.house.df, by = "location")

  return(ss.df)
}