Nothing
# =============================================================================
# autocodebook - Arvore de fluxo (CONSORT composavel)
# =============================================================================
# Modelo de dados: uma arvore de nos guardada em .cb_env$flow.
#
# Tipos de no:
# - "root" : coorte inicial (1, criado automaticamente)
# - "step" : reducao linear (filtro). 1 filho.
# - "split" : ramificacao por coluna. N filhos (um por categoria).
# - "outcome" : folha empilhada (contagem de desfecho). Sem filhos.
#
# A arvore e construida incrementalmente:
# track_split(by=) -> ramifica TODAS as folhas-corrente pela coluna
# track_outcomes(vars=)-> anexa contagens de desfecho nas folhas-corrente
#
# "Folhas-corrente" = os nos no nivel mais profundo que ainda podem ramificar
# (root no inicio; depois os filhos do ultimo split).
#
# REGRA BIG DATA: cada split/outcome roda group_by no Spark; so os N
# agregados (poucas linhas) voltam pro R.
# =============================================================================
# Inicializa a arvore no environment (chamado por cb_init via flow_reset)
.flow_init <- function() {
.cb_env$flow <- list(
id_col = .cb_env$id_col,
n_root = NA_integer_, # N da coorte inicial
levels = list(), # lista de niveis de split
outcomes = list(), # desfechos empilhados (por folha)
built = FALSE
)
invisible(NULL)
}
#' Reset the flow tree
#'
#' Clears the CONSORT flow tree. Called automatically by `cb_init()`.
#' @return Invisible NULL.
#' @export
flow_reset <- function() {
.flow_init()
invisible(NULL)
}
#' Get the current flow tree (raw structure)
#'
#' Returns the internal flow representation. Mostly for debugging /
#' programmatic access. For a tidy table, use `flow_table()`.
#' @return A list describing the flow tree.
#' @export
flow_get <- function() {
if (is.null(.cb_env$flow)) .flow_init()
.cb_env$flow
}
# -----------------------------------------------------------------------------
# Helper: conta N unico de individuos (ou linhas se assume_unique)
# -----------------------------------------------------------------------------
.flow_count_ids <- function(sdf, assume_unique = FALSE) {
id_col <- .cb_env$id_col
if (inherits(sdf, "tbl_spark")) {
if (isTRUE(assume_unique)) {
as.integer(sparklyr::sdf_nrow(sdf))
} else {
sdf %>% dplyr::select(dplyr::all_of(id_col)) %>%
dplyr::distinct() %>% sparklyr::sdf_nrow() %>% as.integer()
}
} else {
if (isTRUE(assume_unique)) nrow(sdf)
else length(unique(sdf[[id_col]]))
}
}
# -----------------------------------------------------------------------------
# Helper: conta N por categoria de uma coluna (group_by no Spark)
# Retorna tibble (categoria, n_ids) ja coletada.
# -----------------------------------------------------------------------------
.flow_count_by <- function(sdf, by_col) {
id_col <- .cb_env$id_col
b <- rlang::sym(by_col)
# Conta individuos unicos por categoria.
# Usa coalesce pra agrupar NA numa categoria "(NA)".
res <- sdf %>%
dplyr::mutate(.cat_v = dplyr::coalesce(as.character(!!b), "(NA)")) %>%
dplyr::group_by(.cat_v) %>%
dplyr::summarise(
n_ids = dplyr::n_distinct(!!rlang::sym(id_col)),
.groups = "drop"
) %>%
dplyr::collect() %>%
dplyr::rename(categoria = .cat_v) %>%
dplyr::arrange(categoria)
res
}
# -----------------------------------------------------------------------------
# Helper: conta desfecho (quantos tem outcome == 1 / TRUE) por categoria
# Para outcome binario: conta os "positivos". Retorna n e pct.
# -----------------------------------------------------------------------------
.flow_count_outcome <- function(sdf, outcome_col, split_cols = character()) {
id_col <- rlang::sym(.cb_env$id_col)
o <- rlang::sym(outcome_col)
# Agrupa pelos splits acumulados + conta positivos do desfecho
grp_syms <- lapply(split_cols, rlang::sym)
base <- sdf %>%
dplyr::mutate(
.is_pos = dplyr::if_else(
as.numeric(!!o) == 1 | as.logical(!!o) == TRUE, 1L, 0L
)
)
if (length(split_cols) > 0) {
base <- base %>%
dplyr::mutate(dplyr::across(dplyr::all_of(split_cols),
~ dplyr::coalesce(as.character(.x), "(NA)")))
base <- base %>% dplyr::group_by(!!!grp_syms)
}
out <- base %>%
dplyr::summarise(
n_total = dplyr::n_distinct(!!id_col),
n_pos = sum(.is_pos, na.rm = TRUE),
.groups = "drop"
) %>%
dplyr::collect()
out
}
# =============================================================================
# track_split() - adiciona um nivel de ramificacao
# =============================================================================
#' Split the cohort into branches by a column (CONSORT flowchart)
#'
#' Adds one branching level to the flow tree. The cohort is divided by the
#' distinct values of `by`. Chain multiple `track_split()` calls to create
#' nested branches (e.g. exposure then mediator). Passes the data through
#' unchanged, so it fits in a `%>%` pipeline.
#'
#' @param sdf A Spark DataFrame or local data frame.
#' @param by Character. Column name to split by. Its distinct values become
#' the branches. NA values are grouped as "(NA)".
#' @param label Optional character. A human-readable name for this split
#' level (e.g. "Exposure: drought"). Defaults to `by`.
#' @param value_labels Optional named character vector mapping raw values to
#' readable labels, e.g. `c("0" = "Sem seca", "1" = "Com seca")`. If not
#' given, the function tries factor levels / labelled attributes on the
#' column; failing that, uses the raw value.
#' @param max_levels Integer. Safety cap on nesting depth. Default 3.
#'
#' @return `sdf` unchanged (for piping).
#' @export
#'
#' @examples
#' cb_init(id_col = "id_indiv")
#' df <- data.frame(
#' id_indiv = sprintf("ID%03d", 1:100),
#' exposto_seca = sample(c(0L, 1L), 100, replace = TRUE),
#' migrou = sample(c(0L, 1L), 100, replace = TRUE),
#' obito_dcv = sample(c(0L, 1L), 100, replace = TRUE)
#' )
#' df <- track_split(df, by = "exposto_seca", label = "Exposure: drought",
#' value_labels = c("0" = "No drought", "1" = "Drought"))
#' df <- track_split(df, by = "migrou", label = "Mediator: migration",
#' value_labels = c("0" = "Did not migrate", "1" = "Migrated"))
#' track_outcomes(df, vars = "obito_dcv", labels = list(obito_dcv = "CVD death"))
#' flow_table()
track_split <- function(sdf, by, label = NULL, value_labels = NULL,
max_levels = 3L) {
if (is.null(.cb_env$flow)) .flow_init()
cols <- if (inherits(sdf, "tbl_spark")) colnames(sdf) else names(sdf)
if (!by %in% cols) {
stop("[autocodebook] Coluna '", by, "' nao encontrada nos dados.",
call. = FALSE)
}
n_levels <- length(.cb_env$flow$levels)
if (n_levels >= max_levels) {
warning("[autocodebook] Limite de ", max_levels,
" niveis de split atingido. Split por '", by, "' ignorado.",
call. = FALSE)
return(invisible(sdf))
}
# Auto-deteccao de value_labels se nao fornecido (fatores em df local)
if (is.null(value_labels) && !inherits(sdf, "tbl_spark")) {
col_data <- sdf[[by]]
if (is.factor(col_data)) {
lv <- levels(col_data)
value_labels <- stats::setNames(lv, lv)
}
}
# N da raiz (so na primeira vez)
if (is.na(.cb_env$flow$n_root)) {
.cb_env$flow$n_root <- .flow_count_ids(sdf)
}
# Colunas de split acumuladas (para group_by aninhado)
prev_cols <- vapply(.cb_env$flow$levels, function(l) l$by, character(1))
all_cols <- c(prev_cols, by)
# Conta N por combinacao de todos os niveis ate aqui
id_col <- rlang::sym(.cb_env$id_col)
grp_syms <- lapply(all_cols, rlang::sym)
counts <- sdf %>%
dplyr::mutate(dplyr::across(dplyr::all_of(all_cols),
~ dplyr::coalesce(as.character(.x), "(NA)"))) %>%
dplyr::group_by(!!!grp_syms) %>%
dplyr::summarise(n_ids = dplyr::n_distinct(!!id_col), .groups = "drop") %>%
dplyr::collect()
.cb_env$flow$levels[[length(.cb_env$flow$levels) + 1L]] <- list(
by = by,
label = if (!is.null(label)) label else by,
value_labels = value_labels,
counts = counts,
cols = all_cols
)
.cb_msg("[autocodebook] split por '", by, "' -> ",
nrow(counts), " grupos (nivel ", n_levels + 1L, ")")
invisible(sdf)
}
# Helper: aplica value_labels a um valor cru
.flow_value_label <- function(raw, value_labels) {
if (is.null(value_labels)) return(as.character(raw))
key <- as.character(raw)
if (key %in% names(value_labels)) value_labels[[key]] else key
}
# =============================================================================
# track_outcomes() - empilha desfechos nas folhas correntes
# =============================================================================
#' Attach outcome counts to the current leaves (CONSORT flowchart)
#'
#' Adds one or more outcome variables, counted within each current leaf
#' (combination of all splits so far). Outcomes are stacked, not branched.
#' Each outcome is treated as binary: counts how many individuals have
#' value == 1 (or TRUE), plus the percentage within the leaf.
#'
#' @param sdf A Spark DataFrame or local data frame.
#' @param vars Character vector of outcome column names (binary 0/1 or
#' logical).
#' @param labels Optional named list (var -> label).
#'
#' @return `sdf` unchanged (for piping).
#' @export
track_outcomes <- function(sdf, vars, labels = NULL) {
if (is.null(.cb_env$flow)) .flow_init()
cols <- if (inherits(sdf, "tbl_spark")) colnames(sdf) else names(sdf)
miss <- setdiff(vars, cols)
if (length(miss) > 0) {
stop("[autocodebook] Colunas de desfecho nao encontradas: ",
paste(miss, collapse = ", "), call. = FALSE)
}
# Colunas de split acumuladas (define as folhas)
split_cols <- if (length(.cb_env$flow$levels) > 0) {
.cb_env$flow$levels[[length(.cb_env$flow$levels)]]$cols
} else character()
# N da raiz se ainda nao setado (caso outcomes sem split)
if (is.na(.cb_env$flow$n_root)) {
.cb_env$flow$n_root <- .flow_count_ids(sdf)
}
for (v in vars) {
cnt <- .flow_count_outcome(sdf, v, split_cols = split_cols)
lbl <- if (!is.null(labels) && !is.null(labels[[v]])) labels[[v]] else v
.cb_env$flow$outcomes[[length(.cb_env$flow$outcomes) + 1L]] <- list(
var = v,
label = lbl,
counts = cnt, # tibble: split_cols + n_total + n_pos
cols = split_cols
)
.cb_msg("[autocodebook] desfecho '", v, "' contado em ",
max(1L, nrow(cnt)), " folha(s)")
}
invisible(sdf)
}
# =============================================================================
# flow_table() - converte a arvore numa tabela tidy (para export editavel)
# =============================================================================
#' Flow tree as a tidy table
#'
#' Flattens the CONSORT flow tree into a publication-friendly data frame.
#' One row per leaf x outcome. Split levels become named columns (using their
#' labels), values are mapped through value_labels, and percentages are
#' formatted as readable strings.
#'
#' @return A tibble.
#' @export
flow_table <- function() {
fl <- flow_get()
if (is.na(fl$n_root)) {
return(tibble::tibble())
}
n_levels <- length(fl$levels)
# Funcao: dado um data frame com as colunas de split cruas, devolve
# um data frame com colunas nomeadas pelos labels dos niveis e valores
# mapeados pelos value_labels.
.label_split_cols <- function(df) {
if (n_levels == 0) return(df[, character(0), drop = FALSE])
out <- list()
for (lv in seq_len(n_levels)) {
lvl <- fl$levels[[lv]]
raw <- as.character(df[[lvl$by]])
mapped <- vapply(raw, .flow_value_label, character(1),
value_labels = lvl$value_labels)
out[[lvl$label]] <- mapped
}
as.data.frame(out, check.names = FALSE, stringsAsFactors = FALSE)
}
# Caso sem splits e sem outcomes: so a raiz
if (n_levels == 0 && length(fl$outcomes) == 0) {
return(tibble::tibble(Group = "Cohort", N = fl$n_root))
}
# Caso com splits mas sem outcomes: tabela de N por folha
if (length(fl$outcomes) == 0) {
last <- fl$levels[[n_levels]]
labeled <- .label_split_cols(last$counts)
labeled$N <- last$counts$n_ids
return(tibble::as_tibble(labeled))
}
# Caso com outcomes: uma linha por folha x desfecho
rows <- list()
for (oc in fl$outcomes) {
cnt <- oc$counts
labeled <- if (length(oc$cols) > 0) {
.label_split_cols(cnt)
} else {
data.frame(Group = rep("Cohort", nrow(cnt)),
check.names = FALSE, stringsAsFactors = FALSE)
}
labeled[["Outcome"]] <- oc$label
labeled[["N total"]] <- cnt$n_total
labeled[["N events"]] <- cnt$n_pos
pct <- ifelse(cnt$n_total > 0, cnt$n_pos / cnt$n_total, NA_real_)
labeled[["%"]] <- vapply(pct, .pct_str, character(1))
rows[[length(rows) + 1L]] <- labeled
}
out <- dplyr::bind_rows(rows)
tibble::as_tibble(out)
}
# =============================================================================
# flow_diagram() - desenha a arvore de fluxo como um diagrama CONSORT
# =============================================================================
# Helper local: formata inteiro com separador de milhar.
.flow_fmt_int <- function(x) {
format(as.numeric(x), big.mark = ",", scientific = FALSE, trim = TRUE)
}
# Helper local: quebra um rotulo em varias linhas para caber na caixa.
.flow_wrap <- function(s, width = 28L) {
vapply(s, function(z) paste(strwrap(z, width = width), collapse = "\n"),
character(1), USE.NAMES = FALSE)
}
#' Draw the eligibility flow as a CONSORT-style flowchart
#'
#' Renders a publication-ready CONSORT-style diagram (a 'ggplot' object) from
#' the information captured during the pipeline. The layout is computed
#' automatically, so no manual positioning is needed:
#'
#' - A vertical trunk built from the linear eligibility steps (recorded by
#' [auto_filter()] / [track_step()]): the cohort baseline at the top, a
#' single side box listing every exclusion and its count, and the eligible
#' cohort below.
#' - One column of boxes per subgroup leaf of the flow tree (built with
#' [track_split()]), shown side by side under the eligible cohort.
#' - Outcome boxes (added with [track_outcomes()]) stacked beneath each
#' subgroup column, one row per outcome, with the event count and percent.
#'
#' Either part is optional: with only linear steps you get the trunk; with
#' only a split tree you get the cohort box and its subgroup columns. The
#' function reads the current session state and writes nothing to disk.
#'
#' Requires the 'ggplot2' package (a soft dependency).
#'
#' @param title Plot title. Default: "Eligibility flowchart".
#' @param show_exclusions Logical. Show the side box listing the linear
#' exclusion steps? Default: TRUE.
#' @param box_fill Fill colour of the boxes. Default: "white".
#' @param border_col Border colour of the boxes. Default: "grey25".
#' @param text_size Base text size for the box labels. Default: 3.5.
#'
#' @return A 'ggplot' object, or invisible NULL if there is nothing to draw.
#' @export
#'
#' @examples
#' if (requireNamespace("ggplot2", quietly = TRUE)) {
#' cb_init(id_col = "id_indiv")
#' df <- data.frame(
#' id_indiv = sprintf("ID%04d", 1:400),
#' sgm = sample(c(0L, 1L), 400, replace = TRUE, prob = c(0.95, 0.05)),
#' self_harm = sample(c(0L, 1L), 400, replace = TRUE, prob = c(0.97, 0.03))
#' )
#' df <- track_split(df, by = "sgm", label = "SGM status",
#' value_labels = c("0" = "Non-SGM", "1" = "SGM"))
#' track_outcomes(df, vars = "self_harm",
#' labels = list(self_harm = "Self-harm"))
#' flow_diagram()
#' }
flow_diagram <- function(title = "Eligibility flowchart",
show_exclusions = TRUE,
box_fill = "white",
border_col = "grey25",
text_size = 3.5) {
if (!requireNamespace("ggplot2", quietly = TRUE)) {
stop("[autocodebook] Package 'ggplot2' is required for flow_diagram(). ",
"Install it with: install.packages('ggplot2')", call. = FALSE)
}
fl <- flow_get()
tr <- .cb_env$tracking
has_tracking <- !is.null(tr) && nrow(tr) > 0
n_levels <- length(fl$levels)
has_tree <- !is.na(fl$n_root) && n_levels > 0
if (!has_tracking && is.na(fl$n_root)) {
message("[autocodebook] Flow is empty - nothing to draw.")
return(invisible(NULL))
}
# Geometria (unidades arbitrarias; coord_equal preserva a proporcao)
W <- 3.4; H <- 1.25; COLP <- 4.0; ROWP <- 2.0
boxes <- list(); segs <- list()
add_box <- function(xc, yc, w, h, text, face = "plain", hj = 0.5) {
tx <- if (hj == 0) xc - w / 2 + 0.18 else xc
boxes[[length(boxes) + 1L]] <<- data.frame(
xc = xc, yc = yc, xmin = xc - w / 2, xmax = xc + w / 2,
ymin = yc - h / 2, ymax = yc + h / 2,
face = face, hj = hj, tx = tx, text = text, stringsAsFactors = FALSE)
}
add_seg <- function(x, y, xend, yend, arr = FALSE) {
segs[[length(segs) + 1L]] <<- data.frame(x = x, y = y, xend = xend,
yend = yend, arr = arr)
}
# --- Folhas / colunas de subgrupo -----------------------------------------
if (has_tree) {
last <- fl$levels[[n_levels]]; cnt <- last$counts; cc <- last$cols
ord <- do.call(order, lapply(cc, function(k) as.character(cnt[[k]])))
cnt <- cnt[ord, , drop = FALSE]
keymat <- as.matrix(cnt[, cc, drop = FALSE])
leaf_keys <- apply(keymat, 1, function(r) paste(as.character(r),
collapse = ""))
leaf_lab <- apply(keymat, 1, function(r) {
labs <- vapply(seq_len(n_levels), function(k)
.flow_value_label(as.character(r[[k]]), fl$levels[[k]]$value_labels),
character(1))
paste(labs, collapse = " / ")
})
leaf_lab <- .flow_wrap(leaf_lab, 30L)
leaf_n <- cnt$n_ids
C <- nrow(cnt)
col_x <- (seq_len(C) - (C + 1) / 2) * COLP
} else {
C <- 0L; col_x <- 0
}
trunk_x <- 0
y_base <- 0; y_elig <- -ROWP * 1.25; y_head <- y_elig - ROWP * 1.25
# --- Tronco linear (baseline -> exclusoes -> elegiveis) --------------------
if (has_tracking) {
elig_n <- tr$n_ids[nrow(tr)]
tot_rm <- sum(tr$n_removed, na.rm = TRUE)
base_n <- elig_n + tot_rm
add_box(trunk_x, y_base, W * 1.7, H,
paste0("Individuals in the cohort baseline\nn = ",
.flow_fmt_int(base_n)), face = "bold")
add_box(trunk_x, y_elig, W * 1.7, H,
paste0("Eligible individuals included\nin the analyses\nn = ",
.flow_fmt_int(elig_n)), face = "bold")
add_seg(trunk_x, y_base - H / 2, trunk_x, y_elig + H / 2, arr = TRUE)
if (isTRUE(show_exclusions) && tot_rm > 0) {
b <- tr[tr$n_removed > 0, , drop = FALSE]
blines <- paste0("\u2022 ", b$description, " (n = ",
vapply(b$n_removed, .flow_fmt_int, character(1)), ")")
excl_w <- W * 1.65
# Mantem a caixa de exclusoes a direita do tronco e das colunas, sem
# sobrepor (importante quando nao ha split: C = 0).
min_excl_x <- trunk_x + (W * 1.7) / 2 + 0.6 + excl_w / 2
right_cols <- if (C > 0) max(col_x) + COLP * 1.15 else -Inf
excl_x <- max(min_excl_x, right_cols)
excl_h <- max(H, 0.55 + 0.40 * (length(blines) + 1))
excl_y <- (y_base + y_elig) / 2
add_box(excl_x, excl_y, excl_w, excl_h,
paste0("Exclusions (n = ", .flow_fmt_int(tot_rm), "):\n",
paste(blines, collapse = "\n")), hj = 0)
add_seg(trunk_x, excl_y, excl_x - excl_w / 2, excl_y, arr = TRUE)
}
split_from <- y_elig - H / 2
} else {
add_box(trunk_x, y_elig, W * 1.7, H,
paste0("Cohort\nn = ", .flow_fmt_int(fl$n_root)), face = "bold")
split_from <- y_elig - H / 2
}
# --- Colunas de subgrupo + desfechos empilhados ---------------------------
if (C > 0) {
bus_y <- (split_from + (y_head + H / 2)) / 2
add_seg(trunk_x, split_from, trunk_x, bus_y)
if (C > 1) add_seg(min(col_x), bus_y, max(col_x), bus_y)
for (j in seq_len(C)) {
add_seg(col_x[j], bus_y, col_x[j], y_head + (H * 1.25) / 2)
add_box(col_x[j], y_head, W, H * 1.25,
paste0(leaf_lab[j], "\nn = ", .flow_fmt_int(leaf_n[j])),
face = "bold")
}
if (length(fl$outcomes) > 0) {
header_half <- (H * 1.25) / 2; gap_oc <- 0.28
for (oi in seq_along(fl$outcomes)) {
oc <- fl$outcomes[[oi]]; ocnt <- oc$counts
okeys <- if (length(oc$cols) > 0) {
apply(as.matrix(ocnt[, oc$cols, drop = FALSE]), 1,
function(r) paste(as.character(r), collapse = ""))
} else rep("", nrow(ocnt))
y_oc <- y_head - header_half - gap_oc - (oi - 1) * (H + gap_oc) - H / 2
olab <- .flow_wrap(oc$label, 26L)
for (j in seq_len(C)) {
ridx <- which(okeys == leaf_keys[j])
if (length(ridx) == 1L) {
nt <- ocnt$n_total[ridx]; np <- ocnt$n_pos[ridx]
pct <- if (nt > 0) .pct_str(np / nt) else "-"
txt <- paste0(olab, "\nn = ", .flow_fmt_int(np), " (", pct, ")")
} else {
txt <- paste0(olab, "\n-")
}
add_box(col_x[j], y_oc, W, H, txt)
}
}
}
}
# --- Monta o ggplot --------------------------------------------------------
bx <- do.call(rbind, boxes)
sg <- if (length(segs)) {
do.call(rbind, segs)
} else {
data.frame(x = numeric(0), y = numeric(0), xend = numeric(0),
yend = numeric(0), arr = logical(0))
}
ggplot2::ggplot() +
ggplot2::geom_segment(
data = sg[!sg$arr, , drop = FALSE],
ggplot2::aes(x = x, y = y, xend = xend, yend = yend),
colour = "grey45", linewidth = 0.4) +
ggplot2::geom_segment(
data = sg[sg$arr, , drop = FALSE],
ggplot2::aes(x = x, y = y, xend = xend, yend = yend),
colour = "grey25", linewidth = 0.45,
arrow = grid::arrow(length = grid::unit(0.18, "cm"), type = "closed")) +
ggplot2::geom_rect(
data = bx,
ggplot2::aes(xmin = xmin, xmax = xmax, ymin = ymin, ymax = ymax),
fill = box_fill, colour = border_col, linewidth = 0.4) +
ggplot2::geom_text(
data = bx[bx$face == "bold", , drop = FALSE],
ggplot2::aes(x = tx, y = yc, label = text, hjust = hj),
fontface = "bold", size = text_size, lineheight = 0.95) +
ggplot2::geom_text(
data = bx[bx$face == "plain", , drop = FALSE],
ggplot2::aes(x = tx, y = yc, label = text, hjust = hj),
size = text_size, lineheight = 0.95) +
ggplot2::coord_equal(clip = "off") +
ggplot2::labs(title = title) +
ggplot2::theme_void(base_size = 11) +
ggplot2::theme(
plot.title = ggplot2::element_text(face = "bold", hjust = 0.5,
size = 13),
plot.margin = ggplot2::margin(12, 16, 12, 16))
}
# =============================================================================
# flow_diagram_export() — salva o diagrama (raster, vetorial ou editavel)
# =============================================================================
#' Save the eligibility flowchart to a file
#'
#' Renders [flow_diagram()] and writes it to disk. The output format is taken
#' from the file extension, so the same call can produce a raster image, a
#' vector image, or an editable office document:
#'
#' - `.png`, `.jpg`/`.jpeg`, `.tiff`, `.bmp` - raster image.
#' - `.pdf`, `.svg`, `.eps` - vector image (editable in Inkscape / Illustrator;
#' `.svg` needs the 'svglite' package).
#' - `.emf` - Windows vector metafile, editable in Word once inserted
#' (needs the 'devEMF' package).
#' - `.docx` - a Word document with the flowchart embedded as a vector
#' image (editable after ungrouping; needs the 'officer' package, plus
#' 'devEMF' for the vector version, otherwise a raster image is used).
#' - `.pptx` - a PowerPoint slide where every box and label is a native,
#' fully editable shape (needs the 'rvg' and 'officer' packages).
#'
#' There is no default path: the destination must be supplied explicitly
#' (e.g. a file under [tempdir()] or a directory chosen by the user).
#'
#' @param path File path. The extension determines the format.
#' @param width,height Plot size in inches. If NULL (default), sensible
#' values are derived from the number of subgroups and outcomes.
#' @param dpi Resolution for raster formats. Default: 300.
#' @param ... Further arguments passed to [flow_diagram()] (e.g. `title`,
#' `box_fill`, `text_size`).
#'
#' @return Invisible path, or invisible NULL if there is nothing to draw.
#' @export
#'
#' @examples
#' \donttest{
#' # Wrapped in \donttest because writing the image invokes a graphics
#' # device and may take more than 5 seconds; it writes only to tempdir().
#' if (requireNamespace("ggplot2", quietly = TRUE)) {
#' cb_init(id_col = "id_indiv")
#' df <- data.frame(
#' id_indiv = sprintf("ID%03d", 1:100),
#' g = sample(c(0L, 1L), 100, replace = TRUE),
#' y = sample(c(0L, 1L), 100, replace = TRUE)
#' )
#' df <- track_split(df, by = "g", value_labels = c("0" = "A", "1" = "B"))
#' track_outcomes(df, vars = "y", labels = list(y = "Outcome"))
#' # Raster (no extra packages needed); written to tempdir() and cleaned up:
#' out <- file.path(tempdir(), "flow.png")
#' flow_diagram_export(out)
#' unlink(out)
#' }
#' }
flow_diagram_export <- function(path, width = NULL, height = NULL,
dpi = 300, ...) {
if (!requireNamespace("ggplot2", quietly = TRUE)) {
stop("[autocodebook] Package 'ggplot2' is required for ",
"flow_diagram_export(). Install it with: install.packages('ggplot2')",
call. = FALSE)
}
g <- flow_diagram(...)
if (is.null(g)) {
message("[autocodebook] Flow is empty - nothing to export.")
return(invisible(NULL))
}
# Dimensoes padrao escalam com o numero de subgrupos e desfechos.
if (is.null(width) || is.null(height)) {
fl <- flow_get()
nlv <- length(fl$levels)
nleaf <- if (nlv > 0) nrow(fl$levels[[nlv]]$counts) else 1L
noc <- length(fl$outcomes)
if (is.null(width)) width <- max(9, 4 + nleaf * 3.6)
if (is.null(height)) height <- max(6, 5 + noc * 1.6)
}
ext <- tolower(tools::file_ext(path))
if (ext == "pptx") {
.require_pkg("rvg", "export PPTX")
.require_pkg("officer", "export PPTX")
doc <- officer::read_pptx()
doc <- officer::add_slide(doc, layout = "Blank", master = "Office Theme")
doc <- officer::ph_with(doc, rvg::dml(ggobj = g),
location = officer::ph_location_fullsize())
print(doc, target = path)
} else if (ext == "docx") {
.require_pkg("officer", "export DOCX")
# Embute o diagrama como EMF (vetor editavel no Word) se devEMF estiver
# disponivel; senao, como PNG. A figura e exibida ajustada a largura util
# da pagina, preservando a proporcao.
if (requireNamespace("devEMF", quietly = TRUE)) {
img <- tempfile(fileext = ".emf")
ggplot2::ggsave(img, g, device = devEMF::emf,
width = width, height = height)
} else {
img <- tempfile(fileext = ".png")
ggplot2::ggsave(img, g, width = width, height = height, dpi = dpi)
}
disp_w <- min(width, 6.5)
disp_h <- height * (disp_w / width)
doc <- officer::read_docx()
doc <- officer::body_add_img(doc, src = img,
width = disp_w, height = disp_h)
print(doc, target = path)
} else if (ext == "emf") {
.require_pkg("devEMF", "export EMF")
ggplot2::ggsave(path, g, device = devEMF::emf,
width = width, height = height)
} else {
# png/jpg/jpeg/tiff/bmp/pdf/svg/eps: ggsave infere o device pela extensao.
ggplot2::ggsave(path, g, width = width, height = height, dpi = dpi)
}
message("[autocodebook] Flowchart saved to: ", path)
invisible(path)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.