#' Function to perform subgroups (effect estimate) analysis
#'
#' function to perform subgroups analysis given minimal amout of
#' infos. It should produce a table suitable for forest plotting.
#'
#' @param ep end point (quantitative, factor or Surv). Estimates with
#' \code{lm} for quantitative, \code{glm} for binomial factor,
#' \code{coxph} for \code{Surv}
#' @param factors_df a data.frame with main estimate factors (eg in a
#' trial could be a 1 column with treatment variable)
#' @param subgroups_df data.frame of factor for subsetting and
#' interaction test
#' @param factors_lab label for the main factor under study (eg
#' Treatment in a RCT)
#' @param subgroups_lab label for the subgroup (eg could by Histology,
#' Age class and so on)
#'
#'@export
subgroups <- function(ep,
factors_df = NULL,
subgroups_df = NULL,
factors_lab = names(factors_df),
subgroups_lab = names(subgroups_df),
...)
{
sub_error_msg <- "subgroups must be a data.frame of factors"
if (!is.data.frame(subgroups_df)) stop(sub_error_msg)
all_factors <- all(unlist(lapply(subgroups_df, is.factor)))
if (!all_factors) stop(sub_error_msg)
UseMethod("subgroups")
}
## funzione che effettua la stima in un sottogruppo
cox_subgroup_worker <- function(f, data, group_label)
{
# browser()
est <- tryCatch({
if (is.factor(data$factor)) {
## caso con una categorica come effetto
fit <- survival::survfit(f, data = data)
sfit <- summary(fit)
groups_labs <- gsub("^.+=(.+)", "\\1", rownames(sfit$table))
n <- t(sfit$table[, c('records', 'events')])
dim(n) <- NULL
n_headers <- paste(rep(groups_labs, each = 2),
c("_n", "_ev"), sep = "")
n <- as.data.frame(setNames(as.list(n), n_headers))
} else {
## caso con una quantitativa come effetto
mf <- lbmisc::NA_remove(model.frame(formula = f, data = data),
quiet = TRUE)
n <- data.frame('n' = nrow(mf),
'ev' = sum(!lbsurv::is_censored(mf$ep)))
}
## cox per la stima di effetto
cox <- survival::coxph(formula = f, data = data)
effects_names <- c('hr', 'ci_lower','ci_upper', 'p')
effects <- setNames(pretty_model(cox), effects_names)
effects$hr_string <- sprintf("%.3f (%.3f - %.3f)",
effects$hr,
effects$ci_lower,
effects$ci_upper)
var_order <- c('hr', 'ci_lower','ci_upper',
'hr_string', 'p')
res <- data.frame('group' = group_label,
n,
effects[, var_order])
## quality control
res
}, error = function(x) invisible(NULL))
est
}
## funzione worker per surv (tutte le stime per sottogruppi dati da UNA
## variabile di creazione gruppi
single_worker_Surv <- function(ep, factor, subgroup, subgroup_lab){
# browser()
## qui ep, factor e subgroup sono ciascuno una sola variabile!
data <- data.frame(ep = ep, factor = factor,
subgroups = droplevels(subgroup))
main_f <- ep ~ factor
int_f <- ep ~ factor * subgroup
# test di interazione
main_cox <- survival::coxph(formula = main_f, data = data)
int_cox <- survival::coxph(formula = int_f, data = data)
int_test <- anova(int_cox, main_cox)[2, 'P(>|Chi|)']
## stime main e nei substrati
datasets <- c(
list('All' = data),
setNames(split(data, f = subgroup),
sprintf("%s: %s", subgroup_lab, levels(subgroup))))
estimates <- Map(cox_subgroup_worker,
list(main_f),
datasets,
names(datasets))
n <- do.call(rbind, lapply(estimates, function(x) x$n))
effects <- do.call(rbind, lapply(estimates, function(x) x$effects))
names(estimates) <- names(datasets)
estimates <- do.call(rbind, estimates)
rownames(estimates) <- NULL
estimates$interaction_p <-
pretty_pval(c(NA, int_test, rep(NA, nrow(estimates) - 2L)))
## return
estimates
}
#'@export
subgroups.Surv <- function(ep,
factors_df = NULL,
subgroups_df = NULL,
factors_lab = names(factors_df),
subgroups_lab = names(subgroups_df),
...)
{
## worker used for each factor in turn
factor_worker <- function(x){# x is a single variable/factor under study
## do single_worker_Surv for each subgroups variable
res <- Map(single_worker_Surv,
list(ep), list(x),
subgroups_df, as.list(subgroups_lab))
## se vi è più di una variabile di subgrouping, tieni la stima main
## solo nel primo caso
if (length(res) > 1L) {
keep_main <- function(x, id) if (id == 1L) x else x[-1 ,]
## browser()
res <- Map(keep_main, res, as.list(seq_along(res)))
res <- do.call(rbind, res)
rownames(res) <- NULL
res
} else res[[1]]
}
## do this for each factor under analysis
final_res <- lapply(factors_df, factor_worker)
names(final_res) <- factors_lab
final_res
}
## =========================================================================
## ---------------------------------------------------------- SARCULATOR
## =========================================================================
## ## x <- group13_spl[[1]]
## fp_data <- function(x, ep = c('os', 'pfs')){
## ep <- match.arg(ep)
## if (ep == 'os')
## f <- Surv(time = os_time, event = os_status) ~ treatment
## else
## f <- Surv(time = dfs_time, event = dfs_status) ~ treatment
## fit <- survfit(f, data = x)
## sfit <- summary(fit)
## cox <- coxph(f, data = x)
## scox <- summary(cox)
## ## gruppo
## gruppo <- data.frame(
## 'sarc' = gsub('pr-OS: ', '', as.character(unique(x$sarc10os_gr2))),
## 'isto' = isto_renamer(unique(x$inc03)))
## ## tabella dei numerini
## n <- t(sfit$table[, c('records', 'events')])
## dim(n) <- NULL
## n <- as.data.frame(setNames(as.list(n),
## c('std_n', 'std_ev', 'ht_n', 'ht_ev')))
## ## tabella effetti
## effetti <- setNames(pretty_model(cox), c('HR', 'low', 'up', 'p'))
## effetti[n$ht_ev %in% 0 | n$std_ev %in% 0, ] <- NA
## effetti$hr_label <- sprintf("%.2f (%.2f - %.2f)",
## effetti$HR,
## effetti$low,
## effetti$up)
## cbind(gruppo, n, effetti)
## }
## os_res <- do.call(rbind, lapply(group13_spl, fp_data, ep = 'os'))
## lab_font <- fpTxtGp(label = list(gpar(fontfamily = "", cex=0.7)))
## forest_table <- as.data.frame(lapply(os_res[, vars], as.character))
## header <- data.frame(c(NA, 'pr-OS'),
## c(NA, 'Histology'),
## c('STD', 'n.'),
## c('STD', 'ev.'),
## c('HT', 'n.'),
## c('HT','ev.'),
## c(NA, 'HR (95%CI)'))
## names(header) <- names(forest_table)
## labeltext <- rbind(header, forest_table)
## is_summary <- c(TRUE, TRUE, rep(FALSE, nrow(forest_table)))
## forestplot::forestplot(labeltext = labeltext,
## hrzl_lines = TRUE,
## is.summary = is_summary,
## align = 'l',
## colgap = unit(3, 'mm'),
## txt_gp = lab_font,
## mean = c(NA, NA, os_res$HR),
## lower = c(NA, NA, os_res$low),
## upper = c(NA, NA, os_res$up),
## # clip = c(0.1, 10),
## xlog = TRUE)
## =========================================================================
## --------------------------------------------------------- MRP
## =========================================================================
## subgroup_fp <- function(f, # Surv() ~ treatment formula
## data, # data.frame
## subgroups = '' ## name of the factor variable for subgr
## )
## {
## ## TODO: interaction test printing
## do_subgroups <- subgroups %in% names(data)
## ## nomi variabili del dataset
## flist <- as.list(f)
## event_var <- as.character(flist[[2]]$event)
## time_var<- as.character(flist[[2]]$time)
## treatm_var <- as.character(flist[[3]])
## ## check per sicurezza perché dopo si assume che gli eventi siano 0 o 1
## if (!all(data[, event_var]) %in% c(0, 1, NA))
## stop('Events must be 0,1 or NA')
## ## livelli della variabile di trattamento
## treatm_var_labs <- levels(data[, treatm_var])
## n_header <- paste(rep(treatm_var_labs, each = 2), c('n', 'ev'), sep = "_")
## grp1_n <- n_header[1]
## grp1_ev <- n_header[2]
## grp2_n <- n_header[3]
## grp2_ev <- n_header[4]
## ## a row of NA data.frame with structure conforming to results returned
## void <- res <- data.frame(as.list(rep(NA, 9)))
## full_header <- c(n_header, c('HR', 'low', 'up', 'p', 'HR_string'))
## void <- setNames(void, full_header)
## surv_worker <- function(f, data) {
## all_vars <- c(event_var, time_var, treatm_var, subgroups)
## data <- NA_remove(data[, all_vars], quiet = TRUE)
## if (nrow(data) > 0L) {
## ## numbers
## fit <- survfit(formula = f, data = data)
## sfit <- summary(fit)
## ## when the group is only one (n = 0, ev = 0 for the other),
## ## a vector is returned
## n <- if (is.matrix(sfit$table))
## t(sfit$table[, c('records', 'events')])
## else {
## ## bisogna ricostruire eventi e n via tabelle
## ## di contingenza
## n <- table(data[, treatm_var])
## ## qui si assume che tutti gli eventi
## ## siano tra 0 e 1 se no
## ev <- table(factor(data[, event_var], levels = 0:1),
## data[, treatm_var])['1', ]
## rbind(n, ev)
## }
## dim(n) <- NULL
## n <- as.data.frame(setNames(as.list(n), n_header))
## ## estimates: do them if there's at least one event per group
## feasible_estimates <- n[, grp1_ev] > 0L & n[, grp2_ev] > 0L
## if (feasible_estimates) {
## cox <- coxph(formula = f, data = data)
## scox <- summary(cox)
## HR <- setNames(pretty_model(cox), c('HR', 'low', 'up', 'p'))
## HR$HR_string <- sprintf("%.2f (%.2f - %.2f)",
## HR$HR, HR$low, HR$up)
## } else {
## HR <- data.frame(
## 'HR' = NA, 'low' = NA, 'up' = NA, 'p' = NA,
## 'HR_string' = NA
## #sprintf("%.2f (%.2f - %.2f)", NA, NA, NA)
## )
## }
## cbind(n, HR)
## } else { ## no elements in this subgroup
## void ## return a void data.frame (for the moment)
## }
## }
## ## Overall estimates
## overall_HR <- surv_worker(f = f, data = data)
## overall_HR <- cbind(data.frame(group = 'All'), overall_HR)
## rownames(overall_HR) <- NULL
## ## Subgroups estimates
## if (do_subgroups){
## data_spl <- split(data, data[, subgroups], drop = FALSE)
## subgroups_HR <- lapply(data_spl,
## function(x) surv_worker(f = f, data = x))
## subgroups_HR <- do.call(rbind, subgroups_HR)
## subgroups_HR$group <- rownames(subgroups_HR)
## }
## ## Return
## if (do_subgroups) {
## ## add the group NA to void
## ## void2 <- cbind(group = NA, void)
## ## res <- rbind(overall_HR, void2, subgroups_HR)
## res <- rbind(overall_HR, subgroups_HR)
## rownames(res) <- NULL
## } else {
## res <- overall_HR
## }
## ## rm NA(NA - NA)
## res[res$HR_string %in% 'NA (NA - NA)', 'HR_string'] <- NA
## res
## }
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.