Nothing
# sd2R Shiny GUI — text-to-image generation
# Launch via sd2R::sd_app() or sd2R::sd_app(model_dir = "/path/to/models")
library(shiny)
# Null-coalescing operator (not always exported by shiny)
`%||%` <- function(a, b) if (is.null(a)) b else a
# ---------- Model presets by architecture ----------
MODEL_PRESETS <- list(
sd1 = list(
label = "SD 1.x",
width = 512L, height = 512L,
steps = 20L, cfg = 7.0,
sampler = "EULER_A", scheduler = "KARRAS",
max_chars = 350,
resolutions = c("512x512", "768x768", "1024x1024")
),
sd2 = list(
label = "SD 2.x",
width = 768L, height = 768L,
steps = 20L, cfg = 7.0,
sampler = "EULER_A", scheduler = "KARRAS",
max_chars = 350,
resolutions = c("512x512", "768x768", "1024x1024")
),
sdxl = list(
label = "SDXL",
width = 1024L, height = 1024L,
steps = 25L, cfg = 5.0,
sampler = "EULER", scheduler = "KARRAS",
max_chars = 700,
resolutions = c("512x512", "768x768", "1024x1024")
),
flux = list(
label = "Flux",
width = 1024L, height = 1024L,
steps = 20L, cfg = 1.0,
sampler = "EULER", scheduler = "SIMPLE",
max_chars = 2000,
resolutions = c("512x512", "768x768", "1024x1024")
),
flux2 = list(
label = "FLUX.2 (Klein)",
width = 1024L, height = 1024L,
steps = 4L, cfg = 1.0,
sampler = "EULER", scheduler = "SIMPLE",
max_chars = 2000,
resolutions = c("512x512", "768x768", "1024x1024")
),
sd3 = list(
label = "SD 3",
width = 1024L, height = 1024L,
steps = 28L, cfg = 5.0,
sampler = "EULER", scheduler = "SGM_UNIFORM",
max_chars = 700,
resolutions = c("512x512", "768x768", "1024x1024")
)
)
sampler_names <- names(sd2R::SAMPLE_METHOD)
scheduler_names <- names(sd2R::SCHEDULER)
# ---------- Classify files by role (based on filename) ----------
# Returns a named list of character vectors: $main, $diffusion, $vae, $clip_l,
# $t5xxl, $llm. Each file appears only in dropdowns where it can plausibly be used.
classify_files <- function(files) {
if (length(files) == 0) {
return(list(main = character(), diffusion = character(),
vae = character(), clip_l = character(),
t5xxl = character(), llm = character()))
}
fl <- tolower(files)
is_vae <- grepl("(^|[^a-z])(vae|\\bae\\b)", fl)
is_clip <- grepl("clip", fl) & !grepl("clip_vision|clip-vision", fl)
is_t5 <- grepl("t5", fl)
# LLM text encoder for FLUX.2 (Qwen3 / Mistral-Small) and other DiT LLMs.
is_llm <- grepl("qwen|mistral", fl)
is_diff <- grepl("flux|sd3|dit|unet", fl) & !is_llm
is_aux_only <- grepl("upscaler|esrgan|taesd|lora|controlnet|control_net|photo_maker|clip_vision|clip-vision", fl)
# Main checkpoint = anything that isn't a recognized auxiliary or diffusion-only file
is_main <- !is_vae & !is_clip & !is_t5 & !is_llm & !is_diff & !is_aux_only
list(
main = files[is_main],
diffusion = files[is_diff],
vae = files[is_vae],
clip_l = files[is_clip],
t5xxl = files[is_t5],
llm = files[is_llm]
)
}
# ---------- Auto-assign model roles by filename ----------
# arch_override: if non-NULL, respect the user-selected architecture instead of
# auto-detecting it from filenames. Files are then matched only against that
# architecture (e.g. "flux" excludes flux2 diffusion files, and vice versa).
auto_assign_roles <- function(dir_path, arch_override = NULL) {
files <- list.files(dir_path, pattern = "\\.(safetensors|gguf|ckpt)$",
full.names = FALSE, ignore.case = TRUE)
if (length(files) == 0) return(list(arch = arch_override %||% "sd1"))
sizes <- file.size(file.path(dir_path, files))
names(sizes) <- files
fl <- tolower(files)
roles <- list(arch = "sd1", model = "", diffusion = "", vae = "",
clip_l = "", t5xxl = "", llm = "")
assigned <- rep(FALSE, length(files))
# Step 1: detect architecture from filenames.
# flux2 must be checked before flux (flux2 filenames also contain "flux").
has_flux2 <- any(grepl("flux[._-]?2|flux2", fl))
has_flux <- any(grepl("flux", fl))
has_sd3 <- any(grepl("sd3", fl))
has_sdxl <- any(grepl("sdxl|sd_xl", fl))
has_t5 <- any(grepl("t5", fl))
if (!is.null(arch_override)) {
# User picked the architecture explicitly — honour it and match files to it.
roles$arch <- arch_override
} else if (has_flux2) {
roles$arch <- "flux2"
} else if (has_flux) {
roles$arch <- "flux"
} else if (has_sd3) {
roles$arch <- "sd3"
} else if (has_sdxl) {
roles$arch <- "sdxl"
} else {
# Check sizes: SD2 models are typically >3GB, SD1 ~2-4GB
# Heuristic: if largest model >5GB and no other markers -> sd2
roles$arch <- "sd1"
}
is_multipart <- roles$arch %in% c("flux", "flux2", "sd3")
# Step 2: assign auxiliary roles (VAE, CLIP, T5)
# VAE: "vae" or standalone "ae" in name. SD1/SD2/SDXL bundle the VAE inside the
# checkpoint, so they need no external file. flux and flux2 use different VAEs
# (e.g. ae.safetensors vs flux2-vae.safetensors) that must not be swapped.
vae_idx <- grep("(^|[^a-z])(vae|\\bae\\b)", fl)
if (roles$arch %in% c("sd1", "sd2", "sdxl")) {
vae_idx <- integer(0)
} else if (identical(roles$arch, "flux")) {
# FLUX.1 VAE must not be a flux2 VAE.
vae_idx <- setdiff(vae_idx, grep("flux[._-]?2|flux2", fl))
} else if (identical(roles$arch, "flux2")) {
# Prefer an explicit flux2 VAE; fall back to any VAE only if none is named.
f2 <- intersect(vae_idx, grep("flux[._-]?2|flux2", fl))
if (length(f2)) vae_idx <- f2
}
if (length(vae_idx)) {
pick <- vae_idx[which.max(sizes[vae_idx])]
roles$vae <- files[pick]
assigned[pick] <- TRUE
}
# CLIP-L / T5-XXL are external encoders only for FLUX.1 and SD3. SD1/SD2/SDXL
# ship them inside the single checkpoint, and FLUX.2 uses an LLM encoder
# instead — assigning standalone encoders there would feed sd.cpp incompatible
# paths, so restrict these roles to the architectures that actually need them.
uses_clip_t5 <- roles$arch %in% c("flux", "sd3")
# CLIP-L: "clip" in name (FLUX.1 / SD3 / SDXL)
if (uses_clip_t5) {
idx <- grep("clip", fl)
idx <- setdiff(idx, which(assigned))
if (length(idx)) {
pick <- idx[which.max(sizes[idx])]
roles$clip_l <- files[pick]
assigned[pick] <- TRUE
}
}
# T5-XXL: "t5" in name (FLUX.1 / SD3)
if (uses_clip_t5) {
idx <- grep("t5", fl)
idx <- setdiff(idx, which(assigned))
if (length(idx)) {
pick <- idx[which.max(sizes[idx])]
roles$t5xxl <- files[pick]
assigned[pick] <- TRUE
}
}
# LLM text encoder: Qwen3 (FLUX.2 Klein) / Mistral-Small (full FLUX.2) — only
# relevant to FLUX.2.
if (identical(roles$arch, "flux2")) {
idx <- grep("qwen|mistral", fl)
idx <- setdiff(idx, which(assigned))
if (length(idx)) {
pick <- idx[which.max(sizes[idx])]
roles$llm <- files[pick]
assigned[pick] <- TRUE
}
}
# Step 3: assign diffusion model — only the multipart architectures use it,
# and each one must pick its own kind of file (never another arch's).
if (is_multipart) {
is_flux_file <- grepl("flux", fl)
is_flux2_file <- grepl("flux[._-]?2|flux2", fl)
is_sd3_file <- grepl("sd3", fl)
is_generic <- grepl("dit|unet", fl) # arch-neutral diffusion naming
if (identical(roles$arch, "flux")) {
# FLUX.1: flux-named files, excluding flux2.
cand <- which(is_flux_file & !is_flux2_file)
} else if (identical(roles$arch, "flux2")) {
# FLUX.2: requires an actual flux2 file; never fall back to FLUX.1.
cand <- which(is_flux2_file)
} else { # sd3
cand <- which(is_sd3_file | is_generic)
}
idx <- setdiff(cand, which(assigned))
if (length(idx)) {
pick <- idx[which.max(sizes[idx])]
roles$diffusion <- files[pick]
assigned[pick] <- TRUE
}
}
# Step 4: main model — only for single-file architectures (SD1/SD2/SDXL)
# For Flux/SD3 skip this to avoid loading incompatible checkpoints.
# Exclude obvious component files (encoders/VAE/diffusion/aux) so we never
# hand an encoder or VAE to the "Model" slot when no real checkpoint exists.
if (!is_multipart) {
is_component <- grepl(paste0("(^|[^a-z])(vae|\\bae\\b)|clip|t5|qwen|mistral|",
"flux|sd3|dit|unet|upscaler|esrgan|taesd|lora|",
"controlnet|control_net|photo_maker|clip_vision|clip-vision"),
fl)
remaining <- setdiff(which(!assigned), which(is_component))
if (length(remaining)) {
pick <- remaining[which.max(sizes[remaining])]
roles$model <- files[pick]
}
}
roles
}
# Read initial model_dir from option set by sd_app()
init_model_dir <- getOption("sd2R.model_dir", default = "/mnt/Data2/DS_projects/sd_models")
# ---------- UI ----------
ui <- fluidPage(
tags$head(tags$style(HTML("
body { background: #1a1a2e; color: #e0e0e0; font-family: 'Segoe UI', sans-serif; }
.well { background: #16213e; border: 1px solid #2a3a5c; }
.btn-primary { background: #0f3460; border-color: #1a5276; color: #fff; }
.btn-primary:hover { background: #1a5276; }
.btn-danger { background: #c0392b; border-color: #a93226; }
/* Input fields: light background, black text for readability */
.form-control,
.selectize-input,
.selectize-input input {
background: #eef1f5 !important;
color: #111 !important;
border-color: #2a3a5c;
font-weight: 500;
}
.form-control:focus,
.selectize-input.focus {
background: #fff !important;
color: #000 !important;
border-color: #e94560;
}
textarea.form-control {
background: #eef1f5 !important;
color: #111 !important;
}
/* Dropdowns */
.selectize-dropdown {
background: #eef1f5;
color: #111;
}
.selectize-dropdown-content .option {
color: #111;
}
.selectize-dropdown-content .option.active {
background: #1a5276;
color: #fff;
}
/* Labels */
.control-label {
color: #ccc;
font-weight: 600;
}
h3, h4 { color: #e94560; }
.progress { background: #0f3460; }
.progress-bar { background: #e94560; }
#gpu_info { font-family: monospace; font-size: 0.85em; white-space: pre-wrap;
background: #0f3460; padding: 8px; border-radius: 4px; margin-bottom: 10px;
color: #e0e0e0; }
#char_counter { font-size: 0.85em; margin-top: -8px; margin-bottom: 8px; }
.img-container { text-align: center; padding: 10px; }
.img-container img { max-width: 100%; border: 2px solid #2a3a5c; border-radius: 4px; }
#status_text { font-style: italic; color: #aaa; }
/* Numeric inputs */
input[type='number'] {
background: #eef1f5 !important;
color: #111 !important;
}
"))),
titlePanel(
div(
span("sd2R", style = "color:#e94560; font-weight:bold;"),
span(" Image Generator", style = "color:#e0e0e0;")
),
windowTitle = "sd2R Image Generator"
),
sidebarLayout(
sidebarPanel(
width = 4,
# GPU info
h4("GPU"),
uiOutput("gpu_info"),
actionButton("gpu_caps", "GPU caps", class = "btn-default btn-sm",
style = "margin: 6px 0 4px 0; width: 100%;"),
# Model
h4("Model"),
selectInput("model_type", "Architecture", names(MODEL_PRESETS),
selected = "sd1"),
# Models folder
fluidRow(
column(9, textInput("model_dir", "Models folder", value = init_model_dir)),
column(3, actionButton("scan_dir", "Scan", class = "btn-primary btn-sm",
style = "margin-top: 25px; width: 100%;"))
),
# Auto-assigned dropdowns — visibility depends on architecture
conditionalPanel(
condition = "input.model_type != 'flux' && input.model_type != 'flux2' && input.model_type != 'sd3'",
selectInput("sel_model", "Model", choices = NULL)
),
conditionalPanel(
condition = "input.model_type == 'flux' || input.model_type == 'flux2' || input.model_type == 'sd3'",
selectInput("sel_diffusion", "Diffusion model", choices = NULL),
selectInput("sel_clip_l", "CLIP-L (optional)", choices = NULL),
selectInput("sel_t5xxl", "T5-XXL (optional)", choices = NULL)
),
# LLM text encoder — FLUX.2 only (Qwen3 / Mistral-Small)
conditionalPanel(
condition = "input.model_type == 'flux2'",
selectInput("sel_llm", "LLM encoder (Qwen3/Mistral)", choices = NULL)
),
selectInput("sel_vae", "VAE (optional)", choices = NULL),
actionButton("load_model", "Load Model", class = "btn-primary btn-block",
style = "width: 100%; margin-bottom: 15px;"),
hr(),
# Generation params
h4("Generation"),
textAreaInput("prompt", "Prompt", rows = 4,
value = "A fox and a bear walking through a misty autumn forest, golden sunlight filtering through the trees, detailed fur, photorealistic"),
uiOutput("char_counter"),
textAreaInput("neg_prompt", "Negative prompt", rows = 2,
value = "bad quality, blurry, ugly"),
selectInput("resolution", "Resolution", choices = NULL),
fluidRow(
column(6, selectInput("sampler", "Sampler", sampler_names, selected = "EULER_A")),
column(6, selectInput("scheduler", "Scheduler", scheduler_names, selected = "KARRAS"))
),
fluidRow(
column(4, numericInput("steps", "Steps", 20, min = 1, max = 100)),
column(4, numericInput("cfg", "CFG", 7.0, min = 0, max = 30, step = 0.5)),
column(4, numericInput("seed", "Seed", 42, min = -1))
),
fluidRow(
column(12,
checkboxInput("live_preview", "Live preview (fast latent projection)",
value = TRUE)
)
),
fluidRow(
column(8,
checkboxInput("gen_log", "Write generation log (diagnostics)",
value = FALSE)
),
column(4, uiOutput("download_log_ui"))
),
hr(),
fluidRow(
column(6,
actionButton("generate", "Generate", class = "btn-primary btn-block",
style = "width: 100%;")
),
column(6,
downloadButton("save_btn", "Save PNG", class = "btn-block",
style = "width: 100%;")
)
)
),
mainPanel(
width = 8,
uiOutput("progress_ui"),
div(class = "img-container", uiOutput("result_image"))
)
)
)
# ---------- Server ----------
server <- function(input, output, session) {
rv <- reactiveValues(
generating = FALSE,
loading_model = FALSE,
status_msg = "",
progress_trigger = NULL,
image_trigger = NULL,
show_caps = FALSE, # toggle: GPU caps text replaces the image pane
caps_text = "", # captured output of the Vulkan caps inspector
log_ready = NULL # bumped when a generation log is ready to download
)
# Non-reactive state for use in later() callbacks
# IMPORTANT: ctx stored here (not in rv) to avoid Shiny reactive wrapping
# of XPtr, which can cause GC issues with async C++ threads
local_state <- new.env(parent = emptyenv())
local_state$load_t0 <- 0
local_state$model_type <- "sd1"
local_state$gen_seed <- 42L
local_state$ctx <- NULL
local_state$last_image <- NULL
local_state$gen_log_on <- FALSE
# GPU info at startup
output$gpu_info <- renderUI({
info <- tryCatch({
if (!sd2R::sd_vulkan_device_count()) {
"No Vulkan GPU detected"
} else {
devs <- ggmlR::ggml_vulkan_list_devices()
lines <- vapply(devs, function(d) {
sprintf("[%d] %s (%.1f / %.1f GB)",
d$index, d$name,
d$free_memory / 1e9, d$total_memory / 1e9)
}, character(1))
paste(lines, collapse = "\n")
}
}, error = function(e) paste("GPU info error:", e$message))
div(id = "gpu_info", info)
})
# --- Scan folder: list files, auto-assign roles, populate dropdowns ---
scan_model_dir <- function() {
dir_path <- trimws(input$model_dir)
if (!nzchar(dir_path) || !dir.exists(dir_path)) {
showNotification("Folder not found", type = "error")
return()
}
all_files <- list.files(dir_path,
pattern = "\\.(safetensors|gguf|ckpt)$",
full.names = FALSE, ignore.case = TRUE)
if (length(all_files) == 0) {
showNotification("No model files found in folder", type = "warning")
return()
}
none <- c("(none)" = "")
by_role <- classify_files(all_files)
mk <- function(v) c(none, setNames(v, v))
# Respect the architecture the user picked: match files to it instead of
# auto-switching the dropdown.
roles <- auto_assign_roles(dir_path, arch_override = input$model_type)
updateSelectInput(session, "sel_model", choices = mk(by_role$main), selected = roles$model)
updateSelectInput(session, "sel_diffusion", choices = mk(by_role$diffusion), selected = roles$diffusion)
updateSelectInput(session, "sel_vae", choices = mk(by_role$vae), selected = roles$vae)
updateSelectInput(session, "sel_clip_l", choices = mk(by_role$clip_l), selected = roles$clip_l)
updateSelectInput(session, "sel_t5xxl", choices = mk(by_role$t5xxl), selected = roles$t5xxl)
updateSelectInput(session, "sel_llm", choices = mk(by_role$llm), selected = roles$llm)
# Warn if the primary file for the chosen architecture is missing, instead
# of silently leaving a (none) the user might not notice.
if (roles$arch %in% c("flux", "flux2", "sd3")) {
primary_ok <- nzchar(roles$diffusion)
primary_lbl <- "diffusion model"
} else {
primary_ok <- nzchar(roles$model)
primary_lbl <- "model checkpoint"
}
if (!primary_ok) {
showNotification(sprintf("No %s found for %s in this folder",
primary_lbl, toupper(roles$arch)),
type = "warning", duration = 8)
} else {
showNotification(sprintf("Found %d files, matched for %s",
length(all_files), toupper(roles$arch)),
type = "message")
}
}
# Scan on button click
observeEvent(input$scan_dir, scan_model_dir())
# Auto-scan if model_dir was passed via sd_app()
if (nzchar(init_model_dir) && dir.exists(init_model_dir)) {
observeEvent(TRUE, scan_model_dir(), once = TRUE, ignoreInit = FALSE)
}
# --- Resolve model paths ---
get_model_paths <- function() {
dir_path <- trimws(input$model_dir)
if (!nzchar(dir_path)) return(list())
full <- function(f) {
if (is.null(f) || !nzchar(f)) return(NULL)
file.path(dir_path, f)
}
list(
model_path = full(input$sel_model),
diffusion_model_path = full(input$sel_diffusion),
vae_path = full(input$sel_vae),
clip_l_path = full(input$sel_clip_l),
t5xxl_path = full(input$sel_t5xxl),
llm_path = full(input$sel_llm)
)
}
# Update controls when preset changes
observeEvent(input$model_type, {
p <- MODEL_PRESETS[[input$model_type]]
updateSelectInput(session, "resolution", choices = p$resolutions,
selected = paste0(p$width, "x", p$height))
updateSelectInput(session, "sampler", selected = p$sampler)
updateSelectInput(session, "scheduler", selected = p$scheduler)
updateNumericInput(session, "steps", value = p$steps)
updateNumericInput(session, "cfg", value = p$cfg)
# Clear stale role selections from the other branch to avoid sending
# incompatible path combinations to sd.cpp
if (input$model_type %in% c("flux", "flux2", "sd3")) {
updateSelectInput(session, "sel_model", selected = "")
} else {
updateSelectInput(session, "sel_diffusion", selected = "")
updateSelectInput(session, "sel_clip_l", selected = "")
updateSelectInput(session, "sel_t5xxl", selected = "")
}
# LLM encoder applies to flux2 only
if (!identical(input$model_type, "flux2")) {
updateSelectInput(session, "sel_llm", selected = "")
}
})
# Char counter
output$char_counter <- renderUI({
p <- MODEL_PRESETS[[input$model_type]]
n <- nchar(input$prompt %||% "")
color <- if (n > p$max_chars) "#e94560" else "#888"
div(id = "char_counter",
span(sprintf("%d / %d characters", n, p$max_chars), style = paste0("color:", color)))
})
# --- Progress file for async generation ---
progress_file <- tempfile("sd_progress_", fileext = ".json")
# --- Live preview file (single PPM, updated atomically by the C callback) ---
preview_file <- tempfile("sd_preview_", fileext = ".ppm")
preview_active <- FALSE # whether preview is wired up for the current run
# Read progress from temp file written by C++ callback
read_progress <- function() {
if (!file.exists(progress_file)) return(NULL)
tryCatch({
txt <- readLines(progress_file, warn = FALSE)
if (length(txt) == 0 || !nzchar(txt[1])) return(NULL)
jsonlite::fromJSON(txt[1])
}, error = function(e) NULL)
}
# Progress UI (updated by polling)
output$progress_ui <- renderUI({
rv$progress_trigger # dependency for reactivity
p <- read_progress()
if (rv$generating) {
if (!is.null(p) && p$steps > 0) {
pct <- p$pct
eta <- round(p$eta_sec, 1)
tagList(
div(style = "margin-bottom: 8px; color: #e0e0e0;",
sprintf("Step %d / %d — ETA: %.1f sec", p$step, p$steps, eta)),
div(style = "background: #0f3460; border-radius: 4px; height: 20px; margin-bottom: 10px;",
div(style = sprintf(
"background: #e94560; height: 100%%; border-radius: 4px; width: %d%%; transition: width 0.3s;",
pct)))
)
} else {
div(style = "color: #aaa; font-style: italic; margin-bottom: 10px;",
"Starting generation...")
}
} else if (rv$loading_model) {
if (!is.null(p) && p$steps > 0) {
pct <- p$pct
tagList(
div(style = "margin-bottom: 8px; color: #e0e0e0;", rv$status_msg),
div(style = "background: #0f3460; border-radius: 4px; height: 20px; margin-bottom: 10px;",
div(style = sprintf(
"background: #3498db; height: 100%%; border-radius: 4px; width: %d%%; transition: width 0.3s;",
pct)))
)
} else {
div(style = "color: #aaa; font-style: italic; margin-bottom: 10px;",
rv$status_msg)
}
} else {
div(style = "color: #aaa; font-style: italic; margin-bottom: 10px;",
rv$status_msg)
}
})
# --- Log file for async loading status ---
log_file <- tempfile("sd_log_", fileext = ".txt")
read_log <- function() {
if (!file.exists(log_file)) return("")
tryCatch({
txt <- readLines(log_file, warn = FALSE)
if (length(txt)) txt[length(txt)] else ""
}, error = function(e) "")
}
# Load model (async via std::thread)
observeEvent(input$load_model, {
paths <- get_model_paths()
if (is.null(paths$model_path) && is.null(paths$diffusion_model_path)) {
showNotification("Select a model or diffusion model file", type = "error")
return()
}
if (rv$loading_model || rv$generating) {
showNotification("Busy", type = "warning")
return()
}
rv$loading_model <- TRUE
local_state$load_t0 <- as.numeric(Sys.time())
local_state$model_type <- input$model_type
rv$status_msg <- "Loading model..."
# Free the previously loaded context BEFORE creating the new one. Without
# this, loading a second model keeps the first in VRAM (the XPtr finalizer
# is non-deterministic and may not run for a long time), so two ~11 GB
# models pile up — on a 24 GB card the GPU ends up nearly full and the next
# Vulkan createDevice (load or even the GPU-caps probe) throws
# vk::InitializationFailed and terminates the app. Releasing first means the
# VRAM peak is one model, not two.
if (!is.null(local_state$ctx)) {
tryCatch(sd2R::sd_destroy_context(local_state$ctx),
error = function(e) NULL)
local_state$ctx <- NULL
gc()
}
# Build params for C++ sd_create_context_async
ctx_params <- list(
vae_decode_only = TRUE,
free_params_immediately = FALSE,
diffusion_flash_attn = TRUE,
# sd_ctx_params_init() in C++ leaves vae_conv_direct/diffusion_conv_direct
# uninitialized, so they MUST be passed explicitly — otherwise the VAE
# convolution path reads garbage and the decode crashes ("vae not start").
# Match sd_ctx()'s defaults: VAE on (×24 faster CONV_2D), diffusion off.
vae_conv_direct = TRUE,
diffusion_conv_direct = FALSE,
rng_type = as.integer(sd2R::RNG_TYPE$CUDA),
wtype = as.integer(sd2R::SD_TYPE$COUNT),
n_threads = 0L,
flow_shift = 0.0,
lora_apply_mode = as.integer(sd2R::LORA_APPLY_MODE$AUTO)
)
if (!is.null(paths$model_path))
ctx_params$model_path <- paths$model_path
if (!is.null(paths$diffusion_model_path))
ctx_params$diffusion_model_path <- paths$diffusion_model_path
if (!is.null(paths$vae_path))
ctx_params$vae_path <- paths$vae_path
if (!is.null(paths$clip_l_path))
ctx_params$clip_l_path <- paths$clip_l_path
if (!is.null(paths$t5xxl_path))
ctx_params$t5xxl_path <- paths$t5xxl_path
if (!is.null(paths$llm_path))
ctx_params$llm_path <- paths$llm_path
# FLUX.2: request the meta backend when this build supports it (ggmlR has
# ggml_backend_meta_device). It only actually engages with >= 2 GPUs (C++
# falls back to the normal single-backend path on 1 GPU or older builds).
meta_ok <- isTRUE(tryCatch(sd2R:::sd_meta_backend_available(),
error = function(e) FALSE))
if (identical(input$model_type, "flux2") && meta_ok) {
ctx_params$meta_backend <- TRUE
}
# Set log + progress files and launch async
sd2R:::sd_set_log_file(log_file)
sd2R:::sd_set_progress_file(progress_file)
sd2R:::sd_set_verbose(TRUE)
tryCatch({
sd2R:::sd_create_context_async(ctx_params)
poll_loading()
}, error = function(e) {
rv$loading_model <- FALSE
rv$status_msg <- paste("Load error:", e$message)
sd2R:::sd_clear_log_file()
})
})
# Poll loading status every 500ms
poll_loading <- function() {
later::later(function() {
status <- sd2R:::sd_create_context_poll()
elapsed <- round(as.numeric(Sys.time()) - local_state$load_t0, 1)
# Check tensor loading progress (uses same progress_file as generation)
p <- read_progress()
msg <- read_log()
if (!is.null(p) && p$steps > 0) {
# Tensor loading in progress — show progress bar style
rv$status_msg <- sprintf("Loading tensors %d/%d (%.0fs)... %s",
p$step, p$steps, elapsed, msg)
} else if (nzchar(msg)) {
rv$status_msg <- sprintf("Loading (%.0fs)... %s", elapsed, msg)
} else {
rv$status_msg <- sprintf("Loading model... %.0fs", elapsed)
}
rv$progress_trigger <- Sys.time()
if (status$done) {
tryCatch({
ctx <- sd2R:::sd_create_context_result()
attr(ctx, "model_type") <- local_state$model_type
attr(ctx, "vae_decode_only") <- TRUE
local_state$ctx <- ctx
rv$status_msg <- sprintf("Model loaded in %.1f sec.", elapsed)
}, error = function(e) {
rv$status_msg <- paste("Load error:", e$message)
})
rv$loading_model <- FALSE
sd2R:::sd_clear_log_file()
sd2R:::sd_clear_progress_file()
} else {
poll_loading()
}
}, delay = 0.5)
}
# Generate (async via std::thread)
observeEvent(input$generate, {
if (is.null(local_state$ctx)) {
showNotification("Load a model first", type = "error")
return()
}
if (!nzchar(input$prompt %||% "")) {
showNotification("Enter a prompt", type = "error")
return()
}
if (rv$generating || rv$loading_model) {
showNotification("Busy — wait for current operation", type = "warning")
return()
}
dims <- as.integer(strsplit(input$resolution, "x")[[1]])
rv$generating <- TRUE
local_state$gen_dims <- dims
local_state$gen_seed <- as.integer(input$seed)
local_state$gen_t0 <- as.numeric(Sys.time())
rv$status_msg <- "Starting generation..."
# Set progress file path in C++
sd2R:::sd_set_progress_file(progress_file)
# Generation diagnostic log (opt-in). Writes inputs + the device/backend
# actually selected + per-stage timings to log_file so we can tell whether
# diffusion ran on the discrete GPU or the integrated one.
local_state$gen_log_on <- isTRUE(input$gen_log)
if (local_state$gen_log_on) {
sd2R:::sd_set_log_file(log_file) # truncates the file
sd2R:::sd_set_log_debug(TRUE) # include Vulkan device list (DEBUG)
sd2R:::sd_set_verbose(TRUE)
sd2R::sd_profile_start()
hdr <- c(
"=== Generation ===",
sprintf("time: %s", format(Sys.time())),
sprintf("model_type: %s", input$model_type %||% "?"),
sprintf("prompt: %s", input$prompt %||% ""),
sprintf("negative: %s", input$neg_prompt %||% ""),
sprintf("resolution: %dx%d", dims[1], dims[2]),
sprintf("steps: %s", input$steps),
sprintf("sampler: %s", input$sampler),
sprintf("scheduler: %s", input$scheduler),
sprintf("cfg: %s", input$cfg),
sprintf("seed: %s", input$seed),
gen_device_line(local_state$ctx),
"",
"--- sd.cpp log ---")
cat(hdr, file = log_file, sep = "\n", append = TRUE)
}
# Build the executable step plan. This mirrors sd_generate()'s routing:
# cfg auto-1.0 for Flux/Flux.2 (the root cause of the VAE crash with cfg=7),
# strategy selection (direct / tiled / highres-fix) and VRAM-aware VAE
# tiling — none of which the old direct-async path inherited. Highres-fix
# expands into base -> upscale -> refine steps run by the state machine.
plan <- tryCatch(
sd2R:::.sd_generate_plan(
local_state$ctx,
prompt = input$prompt,
negative_prompt = input$neg_prompt %||% "",
width = dims[1], height = dims[2],
sample_method = sd2R::SAMPLE_METHOD[[input$sampler]],
sample_steps = as.integer(input$steps),
cfg_scale = as.numeric(input$cfg),
seed = as.integer(input$seed),
scheduler = sd2R::SCHEDULER[[input$scheduler]],
batch_count = 1L,
vae_mode = "auto",
vae_auto_threshold = 768L * 768L),
error = function(e) e)
if (inherits(plan, "error")) {
rv$generating <- FALSE
rv$status_msg <- paste("Plan error:", conditionMessage(plan))
sd2R:::sd_clear_progress_file()
return()
}
local_state$plan <- plan
local_state$step_idx <- 0L # index of the step about to run
local_state$step_image <- NULL # image carried between steps
# Live preview: write the latest in-progress frame to preview_file. proj
# mode is cheap and needs no VAE/taesd, so it is always safe to enable.
preview_active <<- isTRUE(input$live_preview)
if (preview_active) {
if (file.exists(preview_file)) unlink(preview_file)
local_state$preview_image <- NULL
sd2R::sd_preview_start(preview_file, mode = sd2R::PREVIEW$PROJ, interval = 1L)
}
# Kick off the state machine (runs steps in order, async gen + sync upscale).
tryCatch({
run_next_step()
}, error = function(e) {
rv$generating <- FALSE
rv$status_msg <- paste("Error:", e$message)
sd2R:::sd_clear_progress_file()
if (preview_active) { sd2R::sd_preview_stop(); preview_active <<- FALSE }
})
})
# Finish the whole run: release preview, report timing, reset state.
finish_generation <- function(err = NULL) {
rv$generating <- FALSE
sd2R:::sd_clear_progress_file()
if (preview_active) { sd2R::sd_preview_stop(); preview_active <<- FALSE }
# Finalize the diagnostic log: stop profiling and append per-stage timings,
# then a distilled summary (device / flash-attn / stage wall times).
if (isTRUE(local_state$gen_log_on)) {
tryCatch({
sd2R::sd_profile_stop()
prof <- utils::capture.output(
print(sd2R::sd_profile_summary(sd2R::sd_profile_get())))
cat(c("", "--- Stage timings (profiler) ---", prof),
file = log_file, sep = "\n", append = TRUE)
}, error = function(e) {
cat(c("", paste("[log] profile error:", conditionMessage(e))),
file = log_file, sep = "\n", append = TRUE)
})
if (!is.null(err)) {
cat(c("", paste("[log] generation error:", err)),
file = log_file, sep = "\n", append = TRUE)
}
tryCatch({
cat(c("", summarize_gen_log(
log_file,
dev_idx = attr(local_state$ctx, "vram_device") %||% 0L)),
file = log_file, sep = "\n", append = TRUE)
}, error = function(e) {
cat(c("", paste("[log] summary error:", conditionMessage(e))),
file = log_file, sep = "\n", append = TRUE)
})
sd2R:::sd_set_log_debug(FALSE)
rv$log_ready <- Sys.time() # reveal the download button
}
if (!is.null(err)) {
rv$status_msg <- paste("Error:", err)
return(invisible())
}
elapsed <- round(as.numeric(Sys.time()) - local_state$gen_t0, 1)
rv$status_msg <- sprintf("Done. %dx%d, seed=%d, %.1fs",
local_state$gen_dims[1], local_state$gen_dims[2],
local_state$gen_seed, elapsed)
}
# State machine driver: advance to and execute the next plan step. Synchronous
# "upscale" steps run inline (fast) and fall through to the next step; async
# "gen" steps launch the C++ worker and hand off to poll_step().
run_next_step <- function() {
repeat {
local_state$step_idx <- local_state$step_idx + 1L
if (local_state$step_idx > length(local_state$plan)) {
# No final gen step produced an image — shouldn't happen, but be safe.
finish_generation()
return(invisible())
}
step <- local_state$plan[[local_state$step_idx]]
if (identical(step$type, "upscale")) {
rv$status_msg <- step$label
res <- tryCatch({
base_img <- local_state$step_image
up <- if (!is.null(step$upscaler) && nzchar(step$upscaler) &&
file.exists(step$upscaler)) {
sd2R:::sd_upscale_image(step$upscaler, base_img,
upscale_factor = step$upscale_factor)
} else {
base_img
}
if (up$width != step$width || up$height != step$height) {
up <- sd2R:::.resize_sd_image(up, step$width, step$height)
}
up
}, error = function(e) e)
if (inherits(res, "error")) {
finish_generation(conditionMessage(res)); return(invisible())
}
local_state$step_image <- res
next # fall through to the next step in the same tick
}
# gen step: launch async, optionally feeding the previous image as init.
rv$status_msg <- step$label
params <- step$params
if (isTRUE(step$uses_init) && !is.null(local_state$step_image)) {
params$init_image <- local_state$step_image
}
ok <- tryCatch({
sd2R:::sd_generate_async(local_state$ctx, params)
TRUE
}, error = function(e) { finish_generation(conditionMessage(e)); FALSE })
if (!ok) return(invisible())
poll_step(step)
return(invisible())
}
}
# Poll the currently running gen step every 500ms; on completion store its
# image and either finish (final step) or advance the machine.
poll_step <- function(step) {
later::later(function() {
status <- sd2R:::sd_generate_poll()
rv$progress_trigger <- Sys.time()
# Pull the latest preview frame (if enabled) so the result pane shows the
# image taking shape. sd_read_preview() returns NULL until a frame exists.
if (preview_active) {
pv <- tryCatch(sd2R::sd_read_preview(preview_file), error = function(e) NULL)
if (!is.null(pv)) {
local_state$preview_image <- pv
rv$image_trigger <- Sys.time()
}
}
if (status$done) {
res <- tryCatch(sd2R:::sd_generate_result(), error = function(e) e)
if (inherits(res, "error")) {
finish_generation(conditionMessage(res)); return()
}
local_state$step_image <- res[[1]]
if (isTRUE(step$final)) {
local_state$last_image <- res[[1]]
local_state$preview_image <- NULL # final replaces preview
rv$image_trigger <- Sys.time()
finish_generation()
} else {
# Show the intermediate result while the next step runs.
local_state$preview_image <- NULL
local_state$last_image <- res[[1]]
rv$image_trigger <- Sys.time()
run_next_step()
}
} else {
poll_step(step)
}
}, delay = 0.5)
}
# Display result. While generating with live preview on, show the latest
# preview frame (small latent-projection image, scaled up with pixelation so
# it reads as a draft); once done, the final image replaces it.
# --- Device line for the generation log -------------------------------------
# The R-side view of which Vulkan device this context targets (index + name +
# free/total VRAM). The authoritative C++ pick is the "Selected main device:"
# line that sd.cpp logs once at context init; this line makes the device
# visible in every generation log even when that init line isn't re-emitted.
gen_device_line <- function(ctx) {
idx <- tryCatch(attr(ctx, "vram_device") %||% 0L, error = function(e) 0L)
name <- tryCatch(ggmlR::ggml_vulkan_device_description(idx),
error = function(e) "?")
mem <- tryCatch(ggmlR::ggml_vulkan_device_memory(idx), error = function(e) NULL)
memstr <- if (!is.null(mem)) {
sprintf(" [%.1f/%.1f GB free]", mem$free / 1e9, mem$total / 1e9)
} else ""
sprintf("device: [%d] %s%s", idx, name, memstr)
}
# --- Generation-log summary -------------------------------------------------
# Distills the raw sd.cpp INFO log (already accumulated in log_file) into the
# signals that matter for "why is this slow": which device was picked, whether
# the flash-attention fast path engaged, and the per-stage wall times that
# sd.cpp prints as "<stage> completed, taking X.XXs". This is the per-section
# view of test_sampling_profile.R, built from the stage timings sd.cpp already
# emits (per-op Vulkan timings need GGML_VK_PERF_LOGGER, which writes to the R
# console, not this file, and is unsafe from the async worker thread).
summarize_gen_log <- function(path, dev_idx = 0L) {
if (!file.exists(path)) return(character(0))
lines <- readLines(path, warn = FALSE)
out <- c("=== Summary ===")
dev <- grep("Selected main device:", lines, value = TRUE)
if (length(dev)) out <- c(out, sub(".*Selected main device:", "device:", dev[1]))
# Flash-attention status. sd.cpp does not reliably print a "Using flash
# attention" line for every architecture (e.g. flux2), so query the device
# capability directly (coopmat1_fa_support) as the source of truth, and use
# any sd.cpp "flash attention" line only as secondary confirmation.
fa_cap <- tryCatch(
isTRUE(ggmlR::ggml_vulkan_device_caps(dev_idx)$coopmat1_fa_support),
error = function(e) NA)
fa_log <- any(grepl("flash attention", lines, ignore.case = TRUE))
fa <- if (isTRUE(fa_cap) || fa_log) {
sprintf("flash-attn: ON (coopmat path%s)",
if (fa_log) ", confirmed in log" else " per device caps")
} else if (is.na(fa_cap)) {
"flash-attn: unknown (could not query device caps)"
} else {
"flash-attn: not available on this device"
}
out <- c(out, fa)
# Text-encoder backend (CPU vs GPU). sd.cpp logs the encoder compute buffer
# as "<name> compute buffer size: N MB(RAM)" or "...MB(VRAM)" — RAM means the
# encoder ran on CPU (keep_clip_on_cpu, or the platform default), VRAM means
# it ran on the GPU. This is THE signal that separates a healthy run from the
# Windows VRAM-spill case, so surface it explicitly.
te_line <- grep("(qwen|qwen3|mistral|t5|clip|llm).*compute buffer size:.*MB\\((RAM|VRAM)\\)",
lines, value = TRUE, ignore.case = TRUE)
if (length(te_line)) {
where <- sub(".*MB\\((RAM|VRAM)\\).*", "\\1", te_line[1])
out <- c(out, sprintf("text encoder backend: %s",
if (identical(where, "RAM")) "CPU (RAM)" else "GPU (VRAM)"))
}
# All "<label> completed/decoded, taking X.XXs" stage timings, in order.
# Strip the leading "file.cpp:NNN - " source location sd.cpp prepends.
pat <- "(.+?)(?: completed| decoded)?, taking ([0-9.]+)s"
hits <- regmatches(lines, regexec(pat, lines))
rows <- Filter(function(m) length(m) == 3, hits)
if (length(rows)) {
out <- c(out, "", "stage timings (from sd.cpp):")
for (m in rows) {
label <- trimws(m[[2]])
label <- sub("^[A-Za-z0-9_.-]+:[0-9]+\\s*-\\s*", "", label) # drop file:line -
out <- c(out, sprintf(" %-40s %8.2fs", label, as.numeric(m[[3]])))
}
}
total <- grep("generate_image completed in", lines, value = TRUE)
if (length(total)) {
tt <- sub(".*completed in ([0-9.]+)s.*", "\\1", total[length(total)])
out <- c(out, "", sprintf(" %-46s %8.2fs", "TOTAL generate_image", as.numeric(tt)))
}
out
}
# --- Vulkan capabilities inspector (ported from ggmlR vulkan_caps.R) -------
# Prints the same report into a string. Used by the "GPU caps" toggle to show
# coopmat / flash-attention / bf16 support — the key signals for diffusion
# speed (a missing coopmat1_fa_support means flash-attn silently falls back).
collect_vulkan_caps <- function() {
capture.output({
# --- CPU / build capabilities ----------------------------------------
# These come from the linked libggml.a (ggmlR), NOT from sd2R's own
# compile flags — the CPU math kernels (incl. the text encoder when it
# runs on CPU) live there. A Windows build of ggmlR missing AVX2/FMA or
# OPENMP makes CPU text_encode collapse to scalar/single-thread and can
# take minutes (observed: 584s on a strong CPU). This block is the first
# thing to check when text_encode is slow with the encoder on CPU.
cat("=== CPU / Build Capabilities (from libggml.a) ===\n\n")
si <- tryCatch(sd2R::sd_system_info(), error = function(e) NULL)
if (!is.null(si)) {
cat("sd2R version :", si$sd2R_version, "\n")
cat("sd.cpp build :", si$sd_cpp_version, "\n")
cat("CPU cores :", si$num_cores, "\n")
cat("ggml string :", trimws(si$system_info), "\n")
}
cf <- tryCatch(ggmlR::ggml_cpu_features(), error = function(e) NULL)
if (!is.null(cf)) {
cat("ggml version :", tryCatch(ggmlR::ggml_version(),
error = function(e) "?"), "\n")
flag <- function(x) if (isTRUE(cf[[x]])) "YES" else "no"
# The four that actually move the needle for CPU text_encode speed.
cat("\n --- key CPU flags ---\n")
cat(sprintf(" OPENMP : %s (multi-thread matmul; OFF => single-thread)\n",
if (grepl("OPENMP = 1", si$system_info %||% "")) "YES" else "no"))
cat(sprintf(" AVX2 : %s (vectorized matmul; OFF => scalar, ~slow)\n", flag("avx2")))
cat(sprintf(" FMA : %s\n", flag("fma")))
cat(sprintf(" F16C : %s (fast f16<->f32 for quantized weights)\n", flag("f16c")))
cat(sprintf(" AVX512 : %s\n", flag("avx512")))
cat("\n")
}
cat("=== Vulkan Device Capabilities ===\n\n")
if (!ggmlR::ggml_vulkan_available()) {
cat("Vulkan: NOT COMPILED\n")
cat(" Reinstall ggmlR with libvulkan-dev + glslc.\n")
return(invisible())
}
cat("Vulkan: compiled OK\n")
n <- ggmlR::ggml_vulkan_device_count()
cat("Devices found:", n, "\n\n")
if (n == 0) {
cat("No Vulkan devices. Check driver installation.\n")
return(invisible())
}
for (i in seq_len(n)) {
idx <- i - 1L
desc <- tryCatch(ggmlR::ggml_vulkan_device_description(idx),
error = function(e) sprintf("<device %d>", idx))
mem <- tryCatch(ggmlR::ggml_vulkan_device_memory(idx),
error = function(e) NULL)
# ggml_vulkan_device_caps() spins up a temporary Vulkan logical device
# (createDevice). On a near-full GPU that throws vk::InitializationFailed
# error — which, uncaught, terminates the whole R/Shiny process. Catch it
# so the diagnostic (whose job is to *help*) never kills the app, and
# report low VRAM as the likely cause.
caps <- tryCatch(ggmlR::ggml_vulkan_device_caps(idx),
error = function(e) NULL)
cat(sprintf("Device [%d]: %s\n", idx, desc))
if (!is.null(mem)) {
cat(sprintf(" Memory : %.2f GB free / %.2f GB total\n",
mem$free / 1e9, mem$total / 1e9))
} else {
cat(" Memory : <unavailable>\n")
}
if (is.null(caps)) {
cat("\n --- Capabilities ---\n")
cat(" could not query device caps (createDevice failed).\n")
if (!is.null(mem) && mem$free < 2e9) {
cat(sprintf(" Likely cause: only %.2f GB VRAM free — release loaded\n",
mem$free / 1e9))
cat(" models (rm(ctx); gc()) and retry.\n")
}
cat("\n")
next
}
cat("\n --- Capabilities ---\n")
cat(sprintf(" arch : %s\n", caps$arch))
cat(sprintf(" fp16 : %s (fast inference)\n",
if (caps$fp16) "YES" else "NO"))
cat(sprintf(" bf16 : %s (Flux/SD3 native BF16)\n",
if (caps$bf16) "YES" else "NO"))
cat(sprintf(" integer_dot_product: %s (Q4/Q8 GEMM)\n",
if (caps$integer_dot_product) "YES" else "NO"))
cat(sprintf(" coopmat_support : %s (fast GEMM kernels)\n",
if (caps$coopmat_support) "YES" else "NO"))
cat(sprintf(" coopmat1_fa_support: %s (flash-attention path)\n",
if (caps$coopmat1_fa_support) "YES" else "NO"))
cat(sprintf(" subgroup_size : %d\n", caps$subgroup_size))
if (caps$coopmat_support && caps$coopmat_m > 0) {
cat(sprintf(" coopmat tile : M=%d N=%d K=%d\n",
caps$coopmat_m, caps$coopmat_n, caps$coopmat_k))
}
# --- Direct FLASH_ATTN_EXT support probe -----------------------------
# caps$coopmat1_fa_support only reflects the coopmat-v1 FA path. On
# Ampere+/Blackwell NVIDIA the FA path is coopmat2, which that flag
# does NOT capture. The only honest signal is to build a real
# flash_attn_ext node and ask the backend the exact question sd2R asks
# per attention layer (ggml_extend.hpp: ggml_backend_supports_op). If
# this says NO, diffusion_flash_attn silently falls back to F32 attn.
cat("\n --- Flash-attention op probe (supports_op) ---\n")
fa_probe <- tryCatch({
# Locate the matching backend device by description.
dev <- NULL
ndev <- ggmlR::ggml_backend_dev_count()
for (d in seq_len(ndev) - 1L) {
dd <- ggmlR::ggml_backend_dev_get(d)
if (identical(ggmlR::ggml_backend_dev_description(dd), desc)) {
dev <- dd; break
}
}
if (is.null(dev)) {
cat(" device handle not found via backend registry — skipped\n")
} else {
# Probe the two head dims that actually occur in diffusion models:
# 64 (SD1.x/SDXL/Flux DiT) and 128 (some Flux/SD3 blocks).
#
# The tensor types MUST mirror what build_kqv() in ggml_extend.hpp
# actually feeds to ggml_flash_attn_ext at runtime, otherwise this
# probe lies. The ggmlR Vulkan FA kernel requires Q in F32 and
# K/V in F16 (ggml-vulkan supports_op + shader assert q->type==F32).
# Building Q as F16 here is what previously made the probe always
# report NOT SUPPORTED even though FA was in fact available.
for (hd in c(64L, 128L)) {
pctx <- ggmlR::ggml_init(16L * 1024L * 1024L, no_alloc = TRUE)
on.exit(ggmlR::ggml_free(pctx), add = TRUE)
n_head <- 8L; seq_len <- 256L
q <- ggmlR::ggml_new_tensor_4d(pctx, ggmlR::GGML_TYPE_F32, hd, n_head, seq_len, 1L)
k <- ggmlR::ggml_new_tensor_4d(pctx, ggmlR::GGML_TYPE_F16, hd, n_head, seq_len, 1L)
v <- ggmlR::ggml_new_tensor_4d(pctx, ggmlR::GGML_TYPE_F16, hd, n_head, seq_len, 1L)
fa <- ggmlR::ggml_flash_attn_ext(pctx, q, k, v, NULL,
1.0 / sqrt(hd), 0.0, 0.0)
ok <- ggmlR::ggml_backend_dev_supports_op(dev, fa)
cat(sprintf(" FLASH_ATTN_EXT head_dim=%-3d : %s\n",
hd, if (isTRUE(ok)) "SUPPORTED" else "NOT SUPPORTED (fallback)"))
}
}
TRUE
}, error = function(e) {
cat(sprintf(" probe error: %s\n", conditionMessage(e)))
FALSE
})
cat("\n --- Verdict ---\n")
if (caps$fp16 && caps$coopmat1_fa_support) {
cat(" BEST: coopmat flash-attention path active (fastest)\n")
} else if (caps$fp16 && caps$coopmat_support) {
cat(" GOOD: coopmat GEMM path, NO flash-attention\n")
cat(" -> diffusion attention falls back to F32 (slow).\n")
} else if (caps$fp16) {
cat(" OK: FP16 active, no coopmat (scalar/subgroup shaders)\n")
} else {
cat(" WARN: FP32 only - slow, check driver/device support\n")
}
cat("\n")
}
}, type = "output")
}
observeEvent(input$gpu_caps, {
if (isTRUE(rv$show_caps)) {
rv$show_caps <- FALSE # toggle back to the image
updateActionButton(session, "gpu_caps", label = "GPU caps")
return()
}
rv$caps_text <- tryCatch(
paste(collect_vulkan_caps(), collapse = "\n"),
error = function(e) paste("GPU caps error:", conditionMessage(e)))
rv$show_caps <- TRUE
updateActionButton(session, "gpu_caps", label = "Hide caps")
})
output$result_image <- renderUI({
rv$image_trigger # reactive dependency to re-render on new image
# Caps toggle takes over the whole pane (either caps OR image).
if (isTRUE(rv$show_caps)) {
return(tags$pre(
style = paste("text-align:left; white-space:pre-wrap;",
"background:#1a1a2e; color:#e0e0e0; padding:14px;",
"border-radius:6px; font-size:0.85em; overflow:auto;"),
rv$caps_text))
}
final <- local_state$last_image
showing_preview <- rv$generating && !is.null(local_state$preview_image)
img <- if (showing_preview) local_state$preview_image else final
if (is.null(img)) {
div(style = "color:#555; padding: 100px 0; font-size: 1.3em;",
"Generated image will appear here")
} else {
tmp <- tempfile(fileext = ".png")
sd2R::sd_save_image(img, tmp)
b64 <- base64enc::base64encode(tmp)
style <- "max-width: 100%;"
if (showing_preview) {
# nearest-neighbour upscale so a 32x32 draft fills the pane crisply
style <- paste0(style, " image-rendering: pixelated; width: 100%;",
" opacity: 0.92;")
}
tagList(
tags$img(src = paste0("data:image/png;base64,", b64), style = style),
if (showing_preview)
div(style = "color:#e94560; font-size:0.85em; margin-top:4px;",
"live preview…")
)
}
})
# Download
output$save_btn <- downloadHandler(
filename = function() {
paste0("sd2R_", format(Sys.time(), "%Y%m%d_%H%M%S"), ".png")
},
content = function(file) {
if (!is.null(local_state$last_image)) {
sd2R::sd_save_image(local_state$last_image, file)
}
}
)
# Show the "download log" button only when logging is on and a log exists.
output$download_log_ui <- renderUI({
rv$log_ready
# Show after either a logged generation or a profile run produced a file.
if (!is.null(rv$log_ready) && file.exists(log_file) &&
file.info(log_file)$size > 0) {
downloadButton("download_log", "Download log", class = "btn-block",
style = "width: 100%;")
}
})
output$download_log <- downloadHandler(
filename = function() {
paste0("sd2R_gen_log_", format(Sys.time(), "%Y%m%d_%H%M%S"), ".txt")
},
content = function(file) {
if (file.exists(log_file)) file.copy(log_file, file, overwrite = TRUE)
}
)
}
shinyApp(ui, server)
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.