trialcostR: Trial Cost-Effectiveness in R

#' @import methods
#' @import dplyr
#' @import tidyr
#' @include Trial.R
NULL

#' Trial data with beyond-trial extension
#'
#' An S4 class of the data from a trial with hazards supplied for each
#' outcome in the trial.
#'
#' Hazard data will be used to extend the life-year measurements beyond the
#' follow-up times of the trial.
#'
#' @slot df_mu a tbl_df of beyond-trial hazard data
#' @slot hazard a formula describing the hazard
#' @slot censor a logical vector of the censoring status at follow up
#' @slot s numeric value of the survival time in the hazard data 
#' @slot s_max numeric value of participant follow up time given the hazard data
#'
#' @name ExtendTrial-class
#' @rdname ExtendTrial-class
#' @exportClass ExtendTrial
setClass('ExtendTrial',
    contains='Trial',
    slots = list(df_mu='tbl_df',
                 hazard='formula',
                 censor='logical',
                 s='numeric',
                 s_max='numeric'),
    validity = function(.Object) {
        msg <- character(0)

        if(!valid_response_formula(.Object@hazard))
            msg <- c(msg,
                     'Invalid data-specification expression for hazard.')

        if(!df_has_all_terms(.Object@df_mu,
                             .Object@hazard))
            msg <- c(msg,
                     paste('Hazard data does not contain required variables',
                           'given the data specification for the hazard.'))

        if(!df_has_linear_terms(.Object@df,
                                .Object@hazard))
            msg <- c(msg,
                     paste('Trial data does not contain required variables',
                           'given the data specification for the hazard.'))

        # TODO: check compatibility of levels and data types between
        # trial data and hazard data
        # - should check that data for hazard is complete? i.e. no missing
        #    levels
        # Check that time horizon is greater or equal to follow up
        if(length(.Object@s_max) != nrow(.Object@df))
            msg <- c(msg,
                     paste('Length of extended follow-up data must be',
                           'equal to length of trial data.'))

        if(length(.Object@s) != nrow(.Object@df_mu))
            msg <- c(msg,
                     paste('Length of s (time) data must be equal to length',
                           'of hazard data.'))

        if(length(.Object@censor) != nrow(.Object@df))
            msg <- c(msg,
                     paste('Lengths of censoring and trial data',
                           'must be equal.'))

        if(any(is.na(as.logical(.Object@censor))))
            msg <- c(msg, 'Censoring data must not contain missing values')

        if (length(msg) == 0)
            TRUE
        else
            msg
    }
)

#' Create an ExtendTrial
#'
#' TODO documentation
#'
#' @param df a data.frame of the trial data (long format)
#' @param df_mu a data.frame of beyond-trial hazard data
#' @param participant a formula describing the variables related to
#'    participants
#' @param outcome a formula describing the variables related to outcomes
#' @param hazard a formula describing the variables of the hazard
#' @param followup expression for follow-up in data.frame (NSE)
#' @param censor an expression for censoring status in trial data (NSE)
#' @param s the name of the time variable in the hazard data (NSE)
#' @param s_max name of extended follow-up time variable in participant
#'     data
#' @param discount numeric (scalar) value of lifeyear discount rate
#'
#' @name ExtendTrial
#' @rdname ExtendTrial-class
#'
#' @export
ExtendTrial <- function(df,
                        df_mu,
                        participant,
                        outcome,
                        hazard,
                        followup,
                        censor,
                        s,
                        s_max,
                        discount) {
    new('ExtendTrial',
        df=select_(
               tbl_df(df),
               .dots=as.list(unlist(lapply(c(terms(participant),
                                             terms(outcome)),
                                    labels)))
           ),
        df_mu=select_(tbl_df(df_mu), 
                      .dots=as.list(all.vars(hazard))),
        participant=participant,
        outcome=outcome,
        hazard=hazard,
        followup=eval(substitute(followup), df, parent.frame()),
        censor=(eval(substitute(censor), df, parent.frame())),
        s=eval(substitute(s), df_mu, parent.frame()),
        s_max=eval(substitute(s_max), df, parent.frame()),
        discount=discount
    )
}

#     df columns; x1, ..., xn, y1, ..., yn, followup, s_max
# df_mu columns; x1, ..., xn, y1, ..., yn, mu, s
beyond_trial_life_years <- function(df,
                                    df_mu) {

    # Check if df is trivially empty
    if (nrow(df) == 0) {
        return(c())
    }

    mu_vars <- names(df_mu)[!(names(df_mu) %in% c('s', 'mu'))]

    if (length(mu_vars) == 0) {
        mu_vars <- 'k'
        df_mu <- df_mu %>% mutate(k=1)
    }

    # Determine the time intervals of the hazard data
    df_mu %>%
        arrange_(~s) %>%
        rename_(s.a=~s) %>%
        group_by_(.dots=mu_vars) %>%
        mutate_(s.b=~c(s.a[-1], Inf)) %>%
        ungroup() %>%
    # Join hazard and trial data
    inner_join(df %>%
                   mutate(id = row_number(),
                          k=1),
               by=mu_vars) %>% #,
              # suffix=c('.a', '.b')) %>%
    # variables needed from here on
        select_(.dots=c(~id, ~mu, ~s.a, ~s.b, ~s_max, ~followup)) %>%
        filter_(.dots=c(~s.b >= followup,
                        ~s.a < s_max)) %>%
    # Calculate the survival by individual and time interval
    # - perform ds calculation _before_ grouping (seems more efficient)
        mutate_(ds=~pmin(s.b, s_max) - pmax(s.a, followup)) %>%
        arrange_(~s.a) %>%
        group_by_(~id) %>%
        mutate_(S=~exp(-cumsum(mu * ds))) %>%
    # Calculate life-expectancy using interval-individual survival terms
    # - TODO ifelse is slow here
        summarise_(
            LE=~sum(ifelse(abs(mu) < 1E-14,
                               ds,
                               (c(1, S[-length(S)]) - S) / mu))
        ) %>%
        complete(id=1:nrow(df), fill=list(LE=0)) %>%
        select_(~LE) %>%
        unlist(use.names=F)

}

#' @rdname lifeyear-methods
#' @aliases lifeyear,ExtendTrial-method
#' @importFrom stats setNames
setMethod('lifeyear',
    c(trial = 'ExtendTrial'),
    function(trial) {
        name_lookup <- c(standard_covariate_names(trial),
                         setNames('mu', all.vars(trial@hazard)[1]))

        callNextMethod() +
        c(rep(0, sum(!trial@censor)),
          beyond_trial_life_years(
              setNames(trial@df,
                       name_lookup[c(all.vars(trial@participant),
                                     all.vars(trial@outcome))]) %>%
              mutate(followup=trial@followup,
                     s_max=trial@s_max),
              setNames(trial@df_mu[,all.vars(trial@hazard)],
                       name_lookup[all.vars(trial@hazard)]) %>%
              mutate(s=trial@s)
        ))[order(c(which(!trial@censor),
                      which(trial@censor)))]

    }
)

#' @rdname dlifeyear-methods
#' @aliases dlifeyear,ExtendTrial-method
#' @importFrom stats setNames
setMethod('dlifeyear',
    c(trial = 'ExtendTrial'),
    function(trial) {
        name_lookup <- c(standard_covariate_names(trial),
                         setNames('mu', all.vars(trial@hazard)[1]))

        callNextMethod() +
            c(rep(0, sum(!trial@censor)),
              beyond_trial_life_years(
                  setNames(trial@df[trial@censor,],
                           name_lookup[c(all.vars(trial@participant),
                                         all.vars(trial@outcome))]) %>%
                      mutate(followup=trial@followup[trial@censor],
                             s_max=trial@s_max[trial@censor]),
                  setNames(trial@df_mu[,all.vars(trial@hazard)],
                           name_lookup[all.vars(trial@hazard)]) %>%
                      mutate_(.dots=c(s=~trial@s,
                                    mu=~mu + trial@discount))
              ) * exp(-trial@discount*trial@followup[trial@censor])
            )[order(c(which(!trial@censor),
                      which(trial@censor)))]
    }
)