library(dplyr)
library(lubridate)
library(magrittr)
library(rlang)
library(tidyr)
library(stringr)
library(ggplot2)
library(ggthemr)
require(janitor)

# define ggplot theme
ggthemr("fresh")
    to_swap <- swatch()[c(2,4)]

logo_path <- system.file("image", "MSF_logo.png", package = "tbreportr")

# data <- mmr_compiler()
month <- rollback(Sys.Date()) %>% month()       # take results from previous month
month_quo <- quo(month)

# if January (roll back to December) and report previous year's figures  
        if (month == 12) {
          year <- year(Sys.Date()) - 1
        } else {
          year <- year(Sys.Date())  
        }
        year_quo <- quo(year)

# number of unique patients in admission data        
        unique_records <- length(unique(data$adm$id))

# hiv prevalence amongst cohort
        hiv_prev <- data$adm %>% 
                filter(! is.na(.data$hiv)) %>% 
                count(.data$hiv) %>% 
                mutate(prev = n / sum(n, na.rm = TRUE)) %>% 
                filter(.data$hiv == "Positive") %>% 
                select(.data$prev)

# tidy change data
change <- data$change %>% 
        # convert from wide to long
        tidyr::gather(key = "drug_change", value = "change_value",
                      setdiff(starts_with("change_"), ends_with("date"))) %>% 
        # remove unnecessary start of string
        mutate(drug_change = str_replace(drug_change, pattern = "change_", replacement = "")) %>% 
        # only keep rows where a drug was started
        filter(change_value == 1) %>% 
        # for each drug, only keep one instance of ID
        group_by(drug_change) %>% 
        distinct(id) %>%  
        group_by(id) %>% 
        # if any ID starts Dlm and Bdq then categorise them as "bdq_dlm"
        mutate(drug_change = case_when(any(drug_change == "bdq") & any(drug_change == "dlm") ~ "bdq_dlm", 
                                       TRUE ~ drug_change)) %>% 
        ungroup() %>% 
        distinct(id, .keep_all = TRUE)


# merge dataframes
adm <- data$adm %>% 
        # gen var to categorise starting regimen
                # SCR defined as starting E, Cfz and (H or hdH)
                # long MDR Tx defined by treatment type (regimen2l) and dst (cdstrainprofil)
        mutate(drug_start = case_when(Bdq == "Yes" & Dlm == "Yes" ~ "bdq_dlm",
                                      Bdq == "Yes" ~ "bdq",
                                      Dlm == "Yes" ~ "dlm", 
                                      E == "Yes" & (H == "Yes" | hdH == "Yes") & Cfz == "Yes" ~ "scr", 
                                      rx_type %in% c("MDR-standard", "XDR") |
                                          (rx_type == "Empiric" & dst %in% c("R-res", "MDRTB") &
                                                   !is.na(as.character(dst))) ~ "mdr",
                                      TRUE ~ "other")) %>% 
        # merge with cleaned change data
        left_join(change, by = "id") %>% 
        # remove starting drugs to clean up
        select(id, gender, drug_start, drug_change, everything(), -c(E, H, Bdq, Cfz, Dlm, hdH)) %>% 
        # detect final treatment categorisation by paste0 then detecting drugs
        # note any patient receiving Bdq and Dlm, regardless of treatment overlap, will be "bdq_dlm"
        mutate( drug_string = paste0(drug_start, drug_change),
                drug_tx = factor(case_when(str_detect(drug_string, "bdq") & str_detect(drug_string, "dlm") ~ "bdq_dlm",
                                   drug_start == "bdq" | drug_change == "bdq" ~ "bdq", 
                                   drug_start == "dlm" | drug_change == "dlm" ~ "dlm",
                                   TRUE ~ drug_start))) %>%  
        # gen var for out of cohort date
        mutate(out_date = pmax(datedeat, dateend, dateout, na.rm = TRUE)) %>% 
        # clean up 
        select(id, age, everything(), -drug_string, -drug_start, -drug_change, -starts_with("date"))

# check all patients are included
stopifnot(unique_records == length(unique(adm$id)))      

# attach dlm drug durations
tidy <- data %>% 
        drug_timer(drug = dlm, duration = 180) %>% 
        right_join(adm, by = "id")

# attach bdq drug durations
tidy <- data %>% 
        drug_timer(drug = bdq, duration = 180) %>% 
        right_join(tidy, by = "id") %>% 
        select(id, age, gender, everything())

# add age factor variable - >18 or <= 18 years
tidy <- tidy %>%
                mutate(Age = factor(case_when(age <= 18 ~ 0, 
                                                  age >18 ~ 1), 
                                        levels = c(0:1),
                                        labels = c("<= 18 years", "> 18 years"))) %>% 
        # add start treatment by quarter
                mutate(quarter = zoo::as.yearqtr(Starttre)) %>% 
        # add in/out of cohort factor var
                mutate(out_bin = factor(ifelse(is.na(out_date), 0, 1),
                                        levels = c(0:1)))

Report details

Date and time of report: r format(Sys.time(), format = "%F %R %Z")
Database date: r max(max(data$adm$Starttre), max(data$change$change_date))
Project: r data$project
Comments or questions to: jay.achar@london.msf.org


Definitions

Cohort description {.tabset .tabset-fade .tabset-pills}

Programme Entry {.tabset .tabset-fade .tabset-pills}

Histogram

p_entry <- tidy %>%
                count(quarter) %>% 
        ggplot(aes(x = as.factor(quarter), y = n, fill = as.factor(year(quarter)))) +
            geom_bar(stat = "identity") +
            labs(title = "TB cohort inclusion by quarter",
                 y = "Number",
                 x = "Year and quarter") +
                        theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
                scale_fill_brewer(palette = "Reds", guide = FALSE)

p_entry

Table

tidy %>%
        count(start_year) %>% 
        rename(Year = start_year, Patients = n)

Age {.tabset .tabset-fade .tabset-pills}

All age groups

age_all <- ggplot(tidy, aes(x = age)) +
                geom_histogram(bins = 15, fill = "#a50f15") +
        labs(title = "Age at entry",
                 y = "Number",
                 x = "Age (years)")
age_all

Under 18 years

age_u18 <- tidy %>% filter(age <= 18) %>% 
                ggplot(aes(x = age)) +
                geom_histogram(bins = 8, fill = "#a50f15") +
            labs(title = "Age in children",
                 y = "Number",
                 x = "Age (years)")
age_u18

Over 18 years

age_o18 <- tidy %>% filter(age > 18) %>% 
                ggplot(aes(x = age)) +
                geom_histogram(bins = 8, fill = "#a50f15") +
                labs(title = "Age in adults",
                 y = "Number",
                 x = "Age (years)")
age_o18

By year

age_by_year <- ggplot(tidy, aes(x = as.factor(start_year), y = age)) +
        geom_boxplot(fill = "#a50f15") +
        labs(title = "Age at entry by year",
             subtitle = "Median, IQR, range and outliers",
                y = "Age (years)",
            x = "Year of entry") 
age_by_year

Gender

p_gender <- tidy %>%
        ggplot(aes(x = as.factor(start_year), fill = as.factor(gender))) +
            geom_bar(position = "fill") +
            labs(title = "Gender - proportion on entry",
                 y = "Proportion",
                 x = "Year") +
                        scale_fill_manual(values = c("Male" = "#4393c3",
                                                     "Female" = "#d6604d"), "Gender")
p_gender

HIV status

Definition: HIV status at cohort entry - missing values removed.

target_hiv <- 0.05
if (hiv_prev >= target_hiv) {
    p_hiv_prev <- tidy %>% 
                filter(!is.na(.data$hiv)) %>% 
                count(.data$quarter, .data$hiv) %>% 
                group_by(.data$quarter) %>% 
                mutate(hiv_prev = n / sum(n, na.rm = T)) %>% 
                filter(hiv_prev == "Positive") %>% 
    ggplot(aes(x = quarter, y = hiv_prev * 100)) +
        geom_line() +
        labs(title = "HIV prevalence at entry",
                     subtitle = "Missing results excluded.",
                 y = "Percent HIV positive (%)",
                 x = "Year & Quarter")

    p_hiv_prev
    low_hiv <- ""

} else {

     low_hiv <- paste0("HIV prevalence in entry cohort is below 5% after removing missing results, so no longitudinal description is presented.")

}

r low_hiv

Outcome cohort description {.tabset .tabset-fade .tabset-pills}

Outcome by year

p_outcome <- tidy %>%
        filter(Starttre <= Sys.Date() - 365 * 2) %>% 
        ggplot(aes(x = as.factor(start_year), fill = as.factor(outcome))) +
                geom_bar(position = "fill") +
                labs(title = "Treatment outcome - cohort reporting",
                 y = "Proportion",
                 x = "Year") +
                scale_fill_brewer(palette = "RdBu", direction = 1, "Outcome")

p_outcome

Outcome by year ex. LTFU/Tf Out

p_outcome_ex <- tidy %>%
        filter(Starttre <= Sys.Date() - 365 * 2) %>% 
        filter(! outcome %in% c("On treatment", "LTFU", "Transfer out")) %>% 
        mutate(outcome_bin = ifelse(outcome %in% c("Cured", "Completed"), 
                                    "Success", "Unsuccessful")) %>% 
        ggplot(aes(x = as.factor(start_year), fill = as.factor(outcome_bin))) +
                geom_bar(position = "fill") +
                labs(title = "Treatment outcome - cohort reporting",
                     subtitle = "Ex. On treatment, LTFU, Tr Out",
                 y = "Proportion",
                 x = "Year") +
                scale_fill_manual(values = c("Success" = "#d6604d",
                                             "Unsuccessful" = "#4393c3"),
                                                "Outcome")
p_outcome_ex

Data

tidy %>% 
        count(start_year, outcome) %>% 
        tidyr::spread(start_year, n) %>% 
        janitor::adorn_totals("row") %>% 
        replace(is.na(.), 0)

Treatment specific data {.tabset .tabset-fade .tabset-pills}

Bedaquiline {.tabset .tabset-fade .tabset-pills}

Programme total

bdq <- tidy %>% 
                filter(drug_tx == "bdq" | drug_tx == "bdq_dlm") 
total_bdq <- drug_tabler(bdq)
total_bdq[[2]]

Year total

print(year)
year_bdq <- drug_tabler(bdq, year = year)
year_bdq[[2]]

Month total

print(paste(month.abb[month], year))

month_bdq <- drug_tabler(bdq, year = year, month = month)
month_bdq[[2]]

Bdq >6 months

bdq_6m <- bdq %>% filter(duration_bdq == 1) %>% drug_tabler()
bdq_6m[[2]]

All ID numbers

bdq$id

Delamanid {.tabset .tabset-fade .tabset-pills}

Programme total

dlm <- tidy %>% 
                filter(drug_tx == "dlm" | drug_tx == "bdq_dlm") 
total_dlm <- drug_tabler(dlm)
total_dlm[[2]]

Year total

print(year)
year_dlm <- drug_tabler(dlm, year = year)
year_dlm[[2]]

Month total

print(paste(month.abb[month], year))

month_dlm <- drug_tabler(dlm, year = year, month = month)
month_dlm[[2]]

Dlm >6 months

dlm_6m <- dlm %>% filter(duration_dlm == 1) %>% drug_tabler()
dlm_6m[[2]]

All ID numbers

dlm$id

SCR {.tabset .tabset-fade .tabset-pills}

Programme total

scr <- tidy %>% 
                filter(drug_tx == "scr") 
total_scr <- drug_tabler(scr)
total_scr[[2]]

Year total

print(year)
year_scr <- drug_tabler(scr, year = year)
year_scr[[2]]

Month total

print(paste(month.abb[month], year))

month_scr <- drug_tabler(scr, year = year, month = month)
month_scr[[2]]

All ID numbers

scr$id

Bdq & Dlm {.tabset .tabset-fade .tabset-pills}

Programme total

Defining this category remains a work in progress. The estimates provided in this section are for illustrative purposes rather than a definitive figures.

bdq_dlm <- tidy %>% 
                filter(drug_tx == "bdq_dlm") 

total_bdq_dlm <- drug_tabler(bdq_dlm)
total_bdq_dlm[[2]]

Year total

print(year)
year_bdq_dlm <- drug_tabler(bdq_dlm, year = year)
year_bdq_dlm[[2]]

Month total

print(paste(month.abb[month], year))

month_bdq_dlm <- drug_tabler(bdq_dlm, year = year, month = month)
month_bdq_dlm[[2]]

Bdq & Dlm >6 months

This remains a work in progress.

ID numbers

bdq_dlm$id

Standard MDR-TB {.tabset .tabset-fade .tabset-pills}

Programme total

mdr <- tidy %>% 
                filter(drug_tx == "mdr") 
total_mdr <- drug_tabler(mdr)
total_mdr[[2]]

Year total

print(year)
year_mdr <- drug_tabler(mdr, year = year)
year_mdr[[2]]

Month total

print(paste(month.abb[month], year))

month_mdr <- drug_tabler(mdr, year = year, month = month)
month_mdr[[2]]

ID numbers

mdr$id

Other DR-TB {.tabset .tabset-fade .tabset-pills}

Programme total

other <- tidy %>% 
                filter(drug_tx == "other") 

total_other <- drug_tabler(other)
total_other[[2]]

Year total

print(year)
year_other <- drug_tabler(other, year = year)
year_other[[2]]

Month total

print(paste(month.abb[month], year))

month_other <- drug_tabler(other, year = year, month = month)
month_other[[2]]

ID numbers

other$id

Custom Summary Tables {.tabset .tabset-fade .tabset-pills}

Tajikistan {.tabset .tabset-fade .tabset-pills}

Table 5

tab5 <- mmr_table_generator(table = "Table 5")

# Add MDR standard data
tab5[3, 2] <- as.numeric(month_mdr$freq_table[1,2])
tab5[3, 3] <- as.numeric(month_mdr$freq_table[2,2])
tab5[3, 4] <- as.numeric(year_mdr$freq_table[1,2])
tab5[3, 5] <- as.numeric(year_mdr$freq_table[2,2])
tab5[3, 6] <- as.numeric(total_mdr$freq_table[1,2])
tab5[3, 7] <- as.numeric(total_mdr$freq_table[2,2])


# Add SCR data
tab5[4, 2] <- as.numeric(month_scr$freq_table[1,2])
tab5[4, 3] <- as.numeric(month_scr$freq_table[2,2])
tab5[4, 4] <- as.numeric(year_scr$freq_table[1,2])
tab5[4, 5] <- as.numeric(year_scr$freq_table[2,2])
tab5[4, 6] <- as.numeric(total_scr$freq_table[1,2])
tab5[4, 7] <- as.numeric(total_scr$freq_table[2,2])

# add Dlm data
tab5[5, 2] <- as.numeric(month_dlm$freq_table[1,2])
tab5[5, 3] <- as.numeric(month_dlm$freq_table[2,2])
tab5[5, 4] <- as.numeric(year_dlm$freq_table[1,2])
tab5[5, 5] <- as.numeric(year_dlm$freq_table[2,2])
tab5[5, 6] <- as.numeric(total_dlm$freq_table[1,2])
tab5[5, 7] <- as.numeric(total_dlm$freq_table[2,2])

# add Bdq data
tab5[6, 2] <- as.numeric(month_bdq$freq_table[1,2])
tab5[6, 3] <- as.numeric(month_bdq$freq_table[2,2])
tab5[6, 4] <- as.numeric(year_bdq$freq_table[1,2])
tab5[6, 5] <- as.numeric(year_bdq$freq_table[2,2])
tab5[6, 6] <- as.numeric(total_bdq$freq_table[1,2])
tab5[6, 7] <- as.numeric(total_bdq$freq_table[2,2])

# add Bdq-Dlm data
tab5[7, 2] <- as.numeric(month_bdq_dlm$freq_table[1,2])
tab5[7, 3] <- as.numeric(month_bdq_dlm$freq_table[2,2])
tab5[7, 4] <- as.numeric(year_bdq_dlm$freq_table[1,2])
tab5[7, 5] <- as.numeric(year_bdq_dlm$freq_table[2,2])
tab5[7, 6] <- as.numeric(total_bdq_dlm$freq_table[1,2])
tab5[7, 7] <- as.numeric(total_bdq_dlm$freq_table[2,2])

# add `Other` data
tab5[8, 2] <- as.numeric(month_other$freq_table[1,2])
tab5[8, 3] <- as.numeric(month_other$freq_table[2,2])
tab5[8, 4] <- as.numeric(year_other$freq_table[1,2])
tab5[8, 5] <- as.numeric(year_other$freq_table[2,2])
tab5[8, 6] <- as.numeric(total_other$freq_table[1,2])
tab5[8, 7] <- as.numeric(total_other$freq_table[2,2])

# calculate column totals
tab5_matrix <- as.matrix(tab5[c(3:8), c(2:7)])
        # convert matrix to numeric matrix
        class(tab5_matrix) <- "numeric"
        # calculate column totals
        col_totals <- colSums(tab5_matrix, na.rm = TRUE)

# add column totals 
tab5[9, c(2:7)] <- col_totals

tab5

Table 6

tab6 <- mmr_table_generator(table = "Table 6")

# generate data frame
tab6_data <- tidy %>% 
        filter(!is.na(Age)) %>% 
        count(drug_tx, out_bin, Age) %>% 
        complete(drug_tx, out_bin, Age, fill = list(n = 0))

# generate >24w data
bdq24 <- tidy %>% 
        filter(drug_tx == "bdq" & duration_bdq == 1) %>% 
        filter(!is.na(Age)) %>% 
        count(drug_tx, out_bin, Age) %>%  
        complete(drug_tx, out_bin, Age, fill = list(n = 0))

dlm24 <- tidy %>% 
        filter(drug_tx == "dlm" & duration_dlm == 1) %>% 
        filter(!is.na(Age)) %>% 
        count(drug_tx, out_bin, Age) %>%  
        complete(drug_tx, out_bin, Age, fill = list(n = 0))

# add Bdq data
tab6[3, 2] <- tab6_data[1, 4] + tab6_data[5, 4]
tab6[3, 3] <- tab6_data[2, 4] + tab6_data[6, 4]
tab6[3, 4] <- tab6_data[3, 4] + tab6_data[7, 4]
tab6[3, 5] <- tab6_data[4, 4] + tab6_data[9, 4]

# add Bdq>24 data
tab6[4, 2] <- bdq24[1, 4]
tab6[4, 3] <- bdq24[2, 4]
tab6[4, 4] <- bdq24[3, 4]
tab6[4, 5] <- bdq24[4, 4]

# add Dlm data
tab6[5, 2] <- tab6_data[9, 4]  + tab6_data[5, 4]
tab6[5, 3] <- tab6_data[10, 4] + tab6_data[6, 4]
tab6[5, 4] <- tab6_data[11, 4] + tab6_data[7, 4]
tab6[5, 5] <- tab6_data[12, 4] + tab6_data[9, 4]

# add Dlm>24 data
tab6[6, 2] <- dlm24[9, 4]
tab6[6, 3] <- dlm24[10, 4]
tab6[6, 4] <- dlm24[11, 4]
tab6[6, 5] <- dlm24[12, 4]

# add Bdq & Dlm data
tab6[7, 2] <- tab6_data[5, 4]
tab6[7, 3] <- tab6_data[6, 4]
tab6[7, 4] <- tab6_data[7, 4]
tab6[7, 5] <- tab6_data[9, 4]
tab6
miss_age <- sum(is.na(tidy$Age))
if (miss_age > 0) {
  note <- paste0("Note: Age variable has ", miss_age, " missing values. They are not included in the table above.")
} else {
  note <- " "        
}

r note



JayAchar/tbreportr documentation built on May 27, 2019, 12:01 a.m.