# define variable to split by year
change_df$start_yr <- as.numeric(format(change_df$hiv_tx_status_date, "%Y"))

# filter those starting within the last year
sc <- split(change_df, f = factor(change_df$patient_id))
keep_pts <- lapply(sc, FUN = function(x)  ifelse(min(x["start_yr"]) < params$reporting_year - 1,
                                                 x$patient_id,
                                                 NA_character_)) %>% unlist()
sc_filtered <- change_df[change_df$patient_id %in% keep_pts, ]

# lists for each year of enrollment
patient_list <- split(sc_filtered, 
                      f = factor(sc_filtered$patient_id))

# df for each patient with start year and final status
final_status_lst <- lapply(patient_list, 
       FUN = function(x) {

        min_yr <- min(x$start_yr)
        filtered_x <- x[x$hiv_tx_status_date < min(x$hiv_tx_status_date + 365), ]
        final_status <- x[x$hiv_tx_status_date == max(x$hiv_tx_status_date), "hiv_status"]

        data.frame(patient_id = unique(x$patient_id),
                   start_yr = min_yr,
                   status_365 = final_status,
                   stringsAsFactors = FALSE)
       })

# bind all lists into df 
fs_df <- do.call(rbind, final_status_lst)

# split into year cohorts
yr_cohorts <- split(fs_df, f = factor(fs_df$start_yr))

column_labels <- names(yr_cohorts)

create_rmd_table(x = yr_cohorts, 
                 formula = "~ status_365",
                 column_labels = column_labels, 
                 var_names = "1-yr status")


JayAchar/hisreportr documentation built on March 18, 2020, 5:57 a.m.