HOT: The Health-Oriented Transportation (HOT) Model

#' Perform Comparative Risk Assesment
#'
#' This function accepts a TravelSurvey class object for performing CRA across locations within that object. A user may provide a 'scenario' dataframe for calculating such health outcomes. If no scenario is provided, the CRA wrapper function is run returning a data frame of results for Scenarios 1 (participation = 1) and 2 (participation = 0). If a scenario is provided, the user may specify boolean argument 'compare' to dictate whether their scenario results should be compared against Scenarios 1 and 2.
#'
#' @param ts TravelSurvey object
#' @param scenario Data frame of scenario values: location, participation (pi1), intensity.mean (mu1), intensity.sd (sigma1)
#' @param compare Boolean dictating whether results from given scenario will be compared against Scenarios 1 (pi1 = 0) and 2 (pi1 = 1).
#'
#' @return Data frame of location, physical activity associated with country's income level (Lear), risk function, and risk (or health burden aversion) for both Scenarios 1 and 2
#' @export
CRA.ts <- function(ts, scenario, compare = TRUE){
  validObject(ts)

  # Initialize participation data frame of locations and associated participation values
  part.df <- getParticipation(ts)

  if( !missing(scenario) ){ # if scenario argument provided

    # Sort the scenario data frame to ensure values match ts object across locations (rows of the data frame)
    scenario <- scenario[match(ts@location$location, scenario$location),]

    # Run CRA wrapper function to iterate through all combinations of parameters
    results <- CRA(location = part.df$location,
                   participation.b = part.df$participation,
                   intensity.b = getIntensity(ts),
                   participation.s = scenario$participation,
                   intensity.s = data.frame(mean = scenario$intensity.mean, sd = scenario$intensity.sd))

    if( compare ){
      # Scenario 1: participation = 0 (default of CRA function)
      results.s1 <- CRA(location = part.df$location,
                        participation.b = part.df$participation,
                        intensity.b = getIntensity(ts))

      # Scenario 2: participation = 1
      results.s2 <- CRA(location = part.df$location,
                        participation.b = part.df$participation,
                        intensity.b = getIntensity(ts),
                        participation.s = rep(1,length(part.df$location)))

      # Combine results
      results <- data.frame(location = results.s1$location,
                            author = results.s1$author,
                            income = results.s1$income,
                            rho = results$rho,
                            rho.s.1 = results.s1$rho,
                            rho.s.2 = results.s2$rho)
    }
  }else{ # no scenario provided, use defaults of Scenarios 1 and 2

    # Scenario 1: participation = 0 (default of CRA function)
    results.s1 <- CRA(location = part.df$location,
                      participation.b = part.df$participation,
                      intensity.b = getIntensity(ts))

    # Scenario 2: participation = 1
    results.s2 <- CRA(location = part.df$location,
                      participation.b = part.df$participation,
                      intensity.b = getIntensity(ts),
                      participation.s = rep(1,length(part.df$location)))

    # Combine results
    results <- data.frame(location = results.s1$location,
                          author = results.s1$author,
                          income = results.s1$income,
                          rho.s.1 = results.s1$rho,
                          rho.s.2 = results.s2$rho)
  }

  return(results)
}
#' Perform Comparative Risk Assesment
#'
#' This function accepts up 5 parameters, all comprised of vectors of values through which to be iterated in order to compute rho (risk, or health burden aversion) using CRA.function.

#' @param location Vector of location values
#' @param participation.b Vector of corresponding baseline participation (pi1) values
#' @param intensity.b Data frame of corresponding baseline intensity values: mean (mu1), sd (sigma1)
#' @param participation.b Vector of corresponding scenario participation (pi1) values
#' @param intensity.b Data frame of corresponding scenario intensity values: mean (mu1), sd (sigma1)
#' @param literature Vector of related literature authors
#' @param income Vector of country income levels, per Lear et al
#'
#' @return Data frame of locations, participation, intensity, leisure levels, Risk functions, and corresponding values of rho (risk)
#' @export
CRA <- function(location,
                participation.b,
                intensity.b,
                participation.s = rep(0,length(location)), # Default of Scenario 1: participation is 0.
                intensity.s = intensity.b, # Default of Scenarios 1 and 2: conditional mean/sd remain the same as baseline.
                literature = c("Arem", "Wen", "Lear"),
                income = c("High", "Upper-middle", "Lower-middle", "Low")){

  # Initialize accumulator vectors
  loc.append <- factor()
  author.append <- c()
  income.append <- c()
  rho.append <-c()

  # Iterate through corresponding rows of location, participation, intensity (mean, sd)
  for(i in 1:length(location)) {

    # Initialize values for efficient calculation
    loc <- location[i]
    part.b <- participation.b[i]
    part.s <- participation.s[i]
    mean.b <- intensity.b$mean[i]
    sd.b <- intensity.b$sd[i]
    mean.s <- intensity.s$mean[i]
    sd.s <- intensity.s$sd[i]

    # Iterate through given author(s).
    # Each author has an assoicated physical activity mean and sd value(s), as well as Risk function.
    for (author in literature) {

      # For Lear, iterate through given level(s) of income. Otherwise, income level not considered.
      if( author == "Lear" ){ incomeVec <- income }
      else{ incomeVec <- NA }

      for (income.val in incomeVec) {

        # Account for NA value(s) of intensity (e.g. location has no active travelers)
        if( NA %in% c(mean.b, sd.b, mean.s, sd.s) ){rho.value <- NA}
        else{

          # Calculate Physical Activity (P) mean and sd given author and income, if applicable (Lear)
          mean.P <- getPAIntensity(author,income.val)$mean
          sd.P <- getPAIntensity(author,income.val)$sd

          # Account for Arem and Wen -- authors' values are for leisure activity, not physical activity
          if( author == "Lear" ){
            mean.Tc <- mean.P - part.b*mean.b # use mean/sd of baseline T
            sd.Tc <- mean.Tc*(sd.P/mean.P) } # assume cv from P to calculate standard deviation of Non-travel Activity (Tc)
          else{ mean.Tc <- mean.P ; sd.Tc <- sd.P }

          # Perform CRA for each set of parameters
          rho.value <- CRA.function(participation.baseline = part.b,
                                    participation.scenario = part.s,
                                    meanlog.baseline = log(mean.b^2 / sqrt(sd.b^2 + mean.b^2)),
                                    sdlog.baseline = sqrt(log(1 + (sd.b / mean.b)^2)),
                                    meanlog.scenario = log(mean.s^2 / sqrt(sd.s^2 + mean.s^2)),
                                    sdlog.scenario = sqrt(log(1 + (sd.s / mean.s)^2)),
                                    mean.Tc = mean.Tc,
                                    sd.Tc = sd.Tc,
                                    Tc.method = "Multinomial", # future avenue: iterate through values of Tc.method
                                    author = author,
                                    income = income.val,
                                    R = getRiskFun(author))
        }

        # Append values to accumulation vectors
        loc.append <- unlist(list(loc.append, loc))
        author.append <- c(author.append, author)
        if( author == "Lear" ){ income.append <- c(income.append, income.val) }else{ income.append <- c(income.append, author) }
        rho.append <- c(rho.append, -rho.value)
      }}}

  # Combine accumulation vectors into final results data frame
  results <- data.frame(location = loc.append,
                        author = author.append,
                        income = income.append,
                        rho = rho.append)
  return(results)
}
#' Perform Comparative Risk Assesment
#'
#' This function accepts up to eight parameters for use with the
#' parameteric physical activity model, or two vectors of quantiles
#' for the non-parametric model.  The function returns the population
#' attributable fraction.
#'
#' @param meanlog.baseline Mean of exposure in baseline (log-scale, parameteric model)
#' @param sdlog.baseline Standard deviation of exposure in baseline (log-scale, parameteric model)
#' @param participation.baseline Participation, or proportion of active travelers in baseline (pi1)
#' @param meanlog.scenario Mean of exposure in scenario (log-scale, parameteric model)
#' @param sdlog.scenario Standard deviation of exposure in scenario (log-scale, parameteric model)
#' @param participation.scenario Participation, or proportion of active travelers in scenario (pi1)
#' @param meanlog.leisure Mean of exposure in leisure physical activity (log-scale, parameteric model)
#' @param sdlog.leisure Standard deviation of leisure physical activity in baseline (log-scale, parameteric model)
#' @param n Number of quantiles (parametric)
#' @param B Sample size (parametric model)
#' @param P Quantiles of exposure in baseline (non-parametric model)
#' @param Q Quantiles of exposure in scenario (non-parametric model)
#' @param R Relative risk function in terms of total exposure (physical activity)
#'
#' @return The population attributable fraction
CRA.function <- function(meanlog.baseline = log(5),
                         sdlog.baseline = 1,
                         participation.baseline = 0.5,
                         meanlog.scenario = log(5),
                         sdlog.scenario = 1,
                         participation.scenario = 0.5,
                         Tc.method = "Multinomial", # Non-travel (Tc) distribution
                         author = "Arem",
                         income = "Upper-middle",
                         mean.Tc = 10,
                         sd.Tc = 1,
                         n = 1e4,
                         B = 1e4,
                         P,
                         Q,
                         R = R.arem.fun)# Arem exposure-response function
    {


    # Uniform distribution used for random sampling
    runif <- runif(n = B)

    # Generate baseline PA/LTPA (dependent on author, method)
    # Tc means the complement of Travel activity, i.e., non-travel activity
        if( Tc.method == "Non-random" ){
            PA.baseline <- mean.Tc
        }else if( Tc.method == "Multinomial" ){ # preferred method - 3/20/2019 HCF
            P <- getPActivity(author, income) # activity distribution dictated by author selection
            PA.baseline <- sample(P$bins, size = B, prob = P$probs, replace = TRUE)  # generate baseline values
        }else if( Tc.method == "Normal" ){
            PA.baseline <- rnorm(n = B, mean = mean.Tc, sd = sd.Tc)
        }else if( Tc.method == "Log-normal" ){
            meanlog.Tc <- log(mean.Tc^2 / sqrt(sd.Tc^2 + mean.Tc^2))
            sdlog.Tc <- sqrt(log(1 + (sd.Tc / mean.Tc)^2))
            PA.baseline <- rlnorm(n = B, meanlog = meanlog.Tc, sdlog = sdlog.Tc)
             # Simulated distribution for activity
        }else{
            stop("Error with Tc.method argument: ", Tc.method)
        }

    # Initialize P distribution for relative risk calculation
    P <- quantile(PA.baseline, probs = (1:(n-1))/n)

    # Generate scenario PA/LTPA (dependent on author)
        TA.sample.scenario <- rlnorm(n = B, meanlog = meanlog.scenario, sdlog = sdlog.scenario)
        # generate TA values
        participation.change <- participation.scenario - participation.baseline
        # calculate change in participation (scenario vs baseline)
        if( participation.change < 0 ){
            TA.sample.scenario <- -1*TA.sample.scenario
            # if decrease in participation, decrease in activity
        }
        PA.scenario <- ifelse(PA.baseline + TA.sample.scenario > 0, PA.baseline + TA.sample.scenario, 0)
    PA.scenario <- ifelse(runif > abs(participation.change), PA.baseline, PA.scenario)
    # for proportion of population (diff in participation baseline vs
    # scenario) incorporate change in participation/activity
    # Initialize Q distribution for relative risk calculation
    Q <- quantile(PA.scenario, probs = (1:(n-1))/n)

  return((sum(R(Q)) - sum(R(P)))/sum(R(P)))
}