R/rxp_gc.R

Defines functions rxp_gc

Documented in rxp_gc

#' Garbage Collect Rixpress Build Artifacts and Logs
#'
#' This function performs garbage collection on Nix store paths and build log files
#' generated by rixpress. It can operate in two modes: full garbage collection
#' (when \code{keep_since = NULL}) or targeted deletion based on log file age.
#'
#' @param keep_since Date or character string (YYYY-MM-DD format). If provided,
#'   only build logs older than this date will be targeted for deletion, along
#'   with their associated Nix store paths. If \code{NULL}, performs a full
#'   Nix garbage collection. Default is \code{NULL}.
#' @param project_path Character string specifying the path to the project
#'   directory containing the \code{_rixpress} folder with build logs.
#'   Default is \code{"."} (current directory).
#' @param dry_run Logical. If \code{TRUE}, shows what would be deleted without
#'   actually performing any deletions. Useful for previewing the cleanup
#'   operation. Default is \code{FALSE}.
#' @param timeout_sec Numeric. Timeout in seconds for individual Nix commands.
#'   Also used for concurrency lock expiration. Default is 300 seconds.
#' @param verbose Logical. If \code{TRUE}, provides detailed output including
#'   full paths, command outputs, and diagnostic information about references
#'   preventing deletion. Default is \code{FALSE}.
#' @param ask Logical. If \code{TRUE}, ask for user confirmation before
#'   performing deleting artifacts. Default is \code{TRUE}.
#'
#' @return Invisibly returns a list with cleanup summary information:
#' \itemize{
#'   \item \code{kept}: Vector of build log filenames that were kept
#'   \item \code{deleted}: Vector of build log filenames targeted for deletion
#'   \item \code{protected}: Number of store paths protected via GC roots (date-based mode)
#'   \item \code{deleted_count}: Number of store paths successfully deleted
#'   \item \code{failed_count}: Number of store paths that failed to delete
#'   \item \code{referenced_count}: Number of store paths skipped due to references
#'   \item \code{log_files_deleted}: Number of build log files successfully deleted
#'   \item \code{log_files_failed}: Number of build log files that failed to delete
#'   \item \code{dry_run_details}: List of detailed information when dry_run = TRUE
#' }
#'
#' @details
#' The function operates in two modes:
#'
#' \strong{Full Garbage Collection Mode} (\code{keep_since = NULL}):
#' \itemize{
#'   \item Runs \code{nix-store --gc} to delete all unreferenced store paths
#'   \item Does not delete any build log files
#'   \item Suitable for complete cleanup of unused Nix store paths
#' }
#'
#' \strong{Targeted Deletion Mode} (\code{keep_since} specified):
#' \itemize{
#'   \item Identifies build logs older than the specified date
#'   \item Extracts store paths from old logs using \code{rxp_inspect()}
#'   \item Protects recent store paths by creating temporary GC roots
#'   \item Attempts to delete old store paths individually using \code{nix-store --delete}
#'   \item Deletes the corresponding build log \code{.json} files from \code{_rixpress/}
#'   \item Handles referenced paths gracefully (paths that cannot be deleted due to dependencies)
#' }
#'
#' \strong{Concurrency Safety:}
#' The function uses a lock file mechanism to prevent multiple instances from
#' running simultaneously, which could interfere with each other's GC root management.
#'
#' \strong{Reference Handling:}
#' Some store paths may not be deletable because they are still referenced by:
#' \itemize{
#'   \item User or system profile generations
#'   \item Active Nix shell environments
#'   \item Result symlinks in project directories
#'   \item Other store paths that depend on them
#' }
#' These paths are reported but not considered errors.
#'
#' @examples
#' \dontrun{
#' # Preview what would be deleted (dry run)
#' rxp_gc(keep_since = "2025-08-01", dry_run = TRUE, verbose = TRUE)
#'
#' # Delete artifacts from builds older than August 1st, 2025
#' rxp_gc(keep_since = "2025-08-01")
#'
#' # Full garbage collection of all unreferenced store paths
#' rxp_gc()
#'
#' # Clean up artifacts older than 30 days ago
#' rxp_gc(keep_since = Sys.Date() - 30)
#' }
#'
#' @seealso
#' \code{\link{rxp_list_logs}}, \code{\link{rxp_inspect}}
#'
#' @family utilities
#' @export
rxp_gc <- function(
  keep_since = NULL,
  project_path = ".",
  dry_run = FALSE,
  timeout_sec = 300,
  verbose = FALSE,
  ask = TRUE
) {
  nix_bin <- Sys.which("nix-store")
  if (!nzchar(nix_bin)) {
    stop("nix-store not found on PATH. Install Nix or adjust PATH.")
  }

  safe_system2 <- function(command, args, timeout = timeout_sec) {
    out <- tryCatch(
      system2(command, args, stdout = TRUE, stderr = TRUE, timeout = timeout),
      error = function(e) {
        if (grepl("timeout", conditionMessage(e), ignore.case = TRUE)) {
          stop("Command '", command, "' timed out after ", timeout, " seconds.")
        }
        stop("Command '", command, "' failed: ", conditionMessage(e))
      }
    )
    status <- attr(out, "status")
    if (!is.null(status) && status != 0) {
      stop(
        "Command '",
        command,
        " ",
        paste(args, collapse = " "),
        "' failed (exit ",
        status,
        ").\n",
        paste(out, collapse = "\n")
      )
    }
    out
  }

  validate_store_paths <- function(paths) {
    if (length(paths) == 0) {
      return(character(0))
    }
    paths <- unique(paths[nzchar(paths)])
    paths <- paths[grepl("^/nix/store/[a-z0-9]{32}-", paths)]
    # Only return paths that actually exist on the filesystem
    paths[file.exists(paths) | dir.exists(paths)]
  }

  extract_which_log <- function(filename) {
    # Expect: build_log_YYYYMMDD_HHMMSS_*.rds -> returns HHMMSS
    m <- regexec("build_log_[0-9]{8}_([0-9]{6})_", filename)
    r <- regmatches(filename, m)[[1]]
    if (length(r) >= 2) r[2] else NA_character_
  }

  get_paths_from_logs <- function(filenames) {
    # Returns named list: filename -> character() of store paths
    out <- setNames(vector("list", length(filenames)), filenames)
    for (i in seq_along(filenames)) {
      fn <- filenames[i]
      wl <- extract_which_log(fn)
      if (is.na(wl)) {
        warning("Could not parse which_log from filename: ", fn)
        out[[fn]] <- character(0)
        next
      }
      insp <- tryCatch(
        suppressMessages(rxp_inspect(which_log = wl)),
        error = function(e) NULL
      )
      if (is.null(insp) || !is.data.frame(insp)) {
        warning("rxp_inspect failed for ", fn)
        out[[fn]] <- character(0)
        next
      }
      if (!("path" %in% names(insp))) {
        warning("rxp_inspect returned no 'path' column for ", fn)
        out[[fn]] <- character(0)
        next
      }
      out[[fn]] <- validate_store_paths(insp$path)
    }
    out
  }

  # ---- Concurrency lock -----------------------------------------------------
  lock_file <- file.path(tempdir(), "rixpress_gc.lock")
  if (file.exists(lock_file)) {
    # try to detect live process on Unix
    ok_to_remove <- TRUE
    pid <- NA_integer_
    tstamp <- NA_character_
    info <- try(readLines(lock_file, warn = FALSE), silent = TRUE)
    if (!inherits(info, "try-error") && length(info) >= 2) {
      pid <- suppressWarnings(as.integer(info[1]))
      tstamp <- info[2]
      alive <- FALSE
      if (.Platform$OS.type == "unix" && !is.na(pid)) {
        ps_out <- try(
          system2("ps", c("-p", pid), stdout = TRUE, stderr = FALSE),
          silent = TRUE
        )
        if (!inherits(ps_out, "try-error")) {
          alive <- any(grepl(paste0("\\b", pid, "\\b"), ps_out))
        }
      }
      if (alive) {
        stop(
          "Another rxp_gc process appears to be running (PID: ",
          pid,
          "). ",
          "If not, remove the lock: ",
          lock_file
        )
      }
      # Remove if stale (older than timeout)
      ts <- suppressWarnings(as.POSIXct(tstamp))
      if (
        !is.na(ts) && difftime(Sys.time(), ts, units = "secs") <= timeout_sec
      ) {
        ok_to_remove <- FALSE
      }
    }
    if (ok_to_remove) {
      unlink(lock_file)
      message("Removed stale lock file.")
    } else {
      stop("Lock file exists and is recent: ", lock_file)
    }
  }
  writeLines(c(as.character(Sys.getpid()), as.character(Sys.time())), lock_file)
  on.exit(if (file.exists(lock_file)) unlink(lock_file), add = TRUE)

  if (!is.null(keep_since)) {
    if (inherits(keep_since, "character")) {
      keep_since <- tryCatch(as.Date(keep_since), error = function(e) NA)
    }
    if (!inherits(keep_since, "Date") || is.na(keep_since)) {
      stop("Invalid 'keep_since'. Use a Date or 'YYYY-MM-DD' string.")
    }
  }

  project_path <- normalizePath(project_path, mustWork = TRUE)
  all_logs <- rxp_list_logs(project_path)
  if (
    !is.data.frame(all_logs) ||
      !all(c("filename", "modification_time") %in% names(all_logs))
  ) {
    stop(
      "rxp_list_logs() must return a data.frame with columns 'filename' and 'modification_time'."
    )
  }
  if (nrow(all_logs) == 0) {
    message("No build logs found. Nothing to do.")
    return(invisible(NULL))
  }

  # Partition logs
  if (is.null(keep_since)) {
    logs_to_keep <- all_logs
    logs_to_delete <- all_logs[0, ]
  } else {
    logs_to_keep <- all_logs[
      as.Date(all_logs$modification_time) >= keep_since,
      ,
      drop = FALSE
    ]
    logs_to_delete <- all_logs[
      as.Date(all_logs$modification_time) < keep_since,
      ,
      drop = FALSE
    ]
  }

  # Gather paths for keep/delete (via rxp_inspect)
  keep_paths_by_log <- if (nrow(logs_to_keep)) {
    get_paths_from_logs(logs_to_keep$filename)
  } else {
    list()
  }
  delete_paths_by_log <- if (nrow(logs_to_delete)) {
    get_paths_from_logs(logs_to_delete$filename)
  } else {
    list()
  }

  keep_paths_all <- validate_store_paths(unique(unlist(
    keep_paths_by_log,
    use.names = FALSE
  )))
  delete_paths_all <- validate_store_paths(unique(unlist(
    delete_paths_by_log,
    use.names = FALSE
  )))

  summary_info <- list(
    kept = logs_to_keep$filename,
    deleted = logs_to_delete$filename,
    protected = 0L,
    dry_run_details = NULL
  )

  if (!is.null(keep_since) && dry_run) {
    message("--- DRY RUN --- No changes will be made. ---")

    # 1) List logs that would be deleted
    message("Logs that would be deleted (", nrow(logs_to_delete), "):")
    if (nrow(logs_to_delete)) {
      cat("  ", logs_to_delete$filename, sep = "\n  ", "\n")
    } else {
      cat("  (none)\n")
    }

    # 2) For each such log, print its paths (via rxp_inspect) as 'path  output'
    details <- list()
    if (length(delete_paths_by_log)) {
      message("\nArtifacts per log (from rxp_inspect):")
      for (fn in names(delete_paths_by_log)) {
        wl <- extract_which_log(fn)
        insp <- tryCatch(
          suppressMessages(rxp_inspect(which_log = wl)),
          error = function(e) NULL
        )
        cat("\n== ", fn, " ==\n", sep = "")
        if (is.null(insp) || !is.data.frame(insp)) {
          cat("  (rxp_inspect unavailable)\n")
          details[[fn]] <- data.frame(
            path = character(0),
            output = character(0)
          )
          next
        }
        cols <- intersect(c("path", "output"), names(insp))
        df <- unique(insp[cols])
        if (!nrow(df)) {
          cat("  (no paths)\n")
        } else {
          # Print two columns: path and output
          for (j in seq_len(nrow(df))) {
            p <- if ("path" %in% cols) df$path[j] else ""
            o <- if (
              "output" %in% cols && !is.na(df$output[j]) && nzchar(df$output[j])
            ) {
              paste0("  ", df$output[j])
            } else {
              ""
            }
            cat("  ", p, o, "\n", sep = "")
          }
        }
        # keep a clean DF with both columns for return
        if (!("path" %in% names(df))) {
          df$path <- character(nrow(df))
        }
        if (!("output" %in% names(df))) {
          df$output <- character(nrow(df))
        }
        details[[fn]] <- df[, c("path", "output"), drop = FALSE]
      }
    } else {
      message("\n(no artifacts found under logs_to_delete)")
    }

    # 3) Aggregate candidate store paths to delete
    if (length(delete_paths_all)) {
      # Filter to only existing paths for more accurate reporting
      existing_delete_paths <- delete_paths_all[
        file.exists(delete_paths_all) | dir.exists(delete_paths_all)
      ]
      missing_paths <- setdiff(delete_paths_all, existing_delete_paths)

      message(
        "\nAggregate store paths targeted for deletion (deduplicated): ",
        length(delete_paths_all),
        " total, ",
        length(existing_delete_paths),
        " existing, ",
        length(missing_paths),
        " already missing"
      )

      if (length(existing_delete_paths) > 0) {
        message("\nExisting paths that would be deleted:")
        cat("  ", existing_delete_paths, sep = "\n  ", "\n")
      }

      if (length(missing_paths) > 0) {
        message("\nPaths already missing (will be skipped):")
        cat("  ", missing_paths, sep = "\n  ", "\n")
      }

      if (verbose && length(existing_delete_paths) > 0) {
        message("The paths listed above would be deleted in a real run.")
      }
    } else {
      message("\nNo valid Nix store paths were found under the logs to delete.")
    }

    summary_info$dry_run_details <- details

    # Also show which log files would be deleted
    if (nrow(logs_to_delete) > 0) {
      message("\nBuild log files that would be deleted:")
      for (i in seq_len(nrow(logs_to_delete))) {
        log_file <- logs_to_delete$filename[i]
        log_path <- file.path(project_path, "_rixpress", log_file)
        exists_indicator <- if (file.exists(log_path)) "[OK]" else "[X]"
        message("  ", exists_indicator, " ", log_file)
      }
    }

    return(invisible(summary_info))
  }

  if (is.null(keep_since) && dry_run) {
    message(
      "--- DRY RUN --- Would run 'nix-store --gc' (delete all unreferenced store paths). ---"
    )
    if (verbose) {
      message(
        "(Tip: for an approximate preview, run 'nix-collect-garbage -n' from a shell.)"
      )
    }
    return(invisible(summary_info))
  }

  if (is.null(keep_since)) {
    # Full GC mode
    if (
      ask &&
        !utils::askYesNo(
          "Run full Nix garbage collection (delete all unreferenced artifacts)?",
          default = FALSE
        )
    ) {
      message("Operation cancelled.")
      return(invisible(NULL))
    }

    # Run full GC
    message("Running Nix garbage collector...")
    gc_out <- safe_system2(nix_bin, "--gc")
    if (length(gc_out)) {
      if (verbose) {
        cat(gc_out, sep = "\n")
      } else {
        rel <- gc_out[grepl(
          "freed|removing|deleting",
          gc_out,
          ignore.case = TRUE
        )]
        if (length(rel)) {
          message("GC summary (last up to 10 lines):")
          cat(utils::tail(rel, 10), sep = "\n")
        }
      }
    }
    message("Garbage collection complete.")
    return(invisible(summary_info))
  }

  # Targeted deletion mode
  if (nrow(logs_to_delete) == 0) {
    message(
      "No build logs older than ",
      format(keep_since, "%Y-%m-%d"),
      " found. Nothing to do."
    )
    return(invisible(NULL))
  }

  if (length(delete_paths_all) == 0) {
    message(
      "No valid store paths found in logs older than ",
      format(keep_since, "%Y-%m-%d"),
      ". Nothing to delete."
    )
    return(invisible(NULL))
  }

  prompt <- paste0(
    "This will permanently delete ",
    length(delete_paths_all),
    " store paths from ",
    nrow(logs_to_delete),
    " build(s) older than ",
    format(keep_since, "%Y-%m-%d"),
    ". Continue?"
  )
  if (ask && !isTRUE(utils::askYesNo(prompt, default = FALSE))) {
    message("Operation cancelled.")
    return(invisible(NULL))
  }

  # Protect recent artifacts (date-based mode only)
  temp_gcroots_dir <- NULL
  if (length(keep_paths_all)) {
    temp_gcroots_dir <- tempfile("rixpress-gc-")
    dir.create(temp_gcroots_dir)
    on.exit(
      if (!is.null(temp_gcroots_dir)) {
        unlink(temp_gcroots_dir, recursive = TRUE, force = TRUE)
      },
      add = TRUE
    )

    message(
      "Protecting ",
      length(keep_paths_all),
      " recent artifacts via GC roots..."
    )
    protected <- 0L
    for (i in seq_along(keep_paths_all)) {
      link_path <- file.path(temp_gcroots_dir, paste0("root-", i))
      # Create/register an indirect root so Nix tracks it under gcroots/auto
      tryCatch(
        {
          safe_system2(
            nix_bin,
            c("--add-root", link_path, "--indirect", keep_paths_all[i])
          )
          protected <- protected + 1L
        },
        error = function(e) {
          warning(
            "Failed to add GC root for ",
            keep_paths_all[i],
            ": ",
            conditionMessage(e)
          )
        }
      )
    }
    if (protected == 0L) {
      stop("Failed to protect any store paths. Aborting.")
    }
    summary_info$protected <- protected
  }

  # Delete specific store paths
  message("Deleting ", length(delete_paths_all), " targeted store paths...")

  # Filter to only existing paths
  existing_paths <- delete_paths_all[
    file.exists(delete_paths_all) | dir.exists(delete_paths_all)
  ]
  missing_paths <- setdiff(delete_paths_all, existing_paths)

  if (length(missing_paths) > 0) {
    message("Skipping ", length(missing_paths), " paths that no longer exist.")
    if (verbose) {
      message("Missing paths:")
      cat("  ", missing_paths, sep = "\n  ", "\n")
    }
  }

  if (length(existing_paths) == 0) {
    message("No existing paths to delete. All targeted paths are already gone.")
    return(invisible(summary_info))
  }

  message("Proceeding to delete ", length(existing_paths), " existing paths...")

  # Try to delete paths individually to handle references gracefully
  total_deleted <- 0L
  failed_paths <- character(0)
  referenced_paths <- character(0)

  for (i in seq_along(existing_paths)) {
    path <- existing_paths[i]

    # Double-check path still exists
    if (!file.exists(path) && !dir.exists(path)) {
      message(
        "  [",
        i,
        "/",
        length(existing_paths),
        "] Skipping ",
        basename(path),
        " (already gone)"
      )
      next
    }

    message(
      "  [",
      i,
      "/",
      length(existing_paths),
      "] Attempting to delete ",
      basename(path),
      "..."
    )

    tryCatch(
      {
        delete_out <- system2(
          nix_bin,
          c("--delete", path),
          stdout = TRUE,
          stderr = TRUE
        )

        status <- attr(delete_out, "status")
        if (is.null(status) || status == 0) {
          total_deleted <- total_deleted + 1L
          message("    [OK] Successfully deleted")
          if (verbose && length(delete_out)) {
            cat("    ", delete_out, sep = "\n    ", "\n")
          }
        } else {
          # Check if it's a "still alive" error (has references)
          if (
            any(grepl(
              "still alive|Cannot delete",
              delete_out,
              ignore.case = TRUE
            ))
          ) {
            referenced_paths <- c(referenced_paths, path)
            message("    [!] Skipped (still referenced)")
            if (verbose) {
              message("    Details: ", paste(delete_out, collapse = " "))
            }
          } else {
            failed_paths <- c(failed_paths, path)
            message("    [X] Failed to delete")
            if (verbose) {
              cat("    ", delete_out, sep = "\n    ", "\n")
            }
          }
        }
      },
      error = function(e) {
        failed_paths <<- c(failed_paths, path)
        message("    [X] Error: ", conditionMessage(e))
      }
    )
  }

  # Summary reporting
  message("\nDeletion summary:")
  message("  Successfully deleted: ", total_deleted, " paths")
  message("  Skipped (still referenced): ", length(referenced_paths), " paths")
  message("  Failed (other errors): ", length(failed_paths), " paths")

  if (length(referenced_paths) > 0 && verbose) {
    message("\nReferenced paths (cannot delete):")
    for (path in referenced_paths) {
      message("  ", basename(path))

      # Show GC roots
      tryCatch(
        {
          roots <- system2(
            nix_bin,
            c("--query", "--roots", path),
            stdout = TRUE,
            stderr = FALSE
          )
          if (length(roots) > 0) {
            message("    GC roots: ", paste(roots, collapse = ", "))
          } else {
            message("    GC roots: (none found)")
          }
        },
        error = function(e) {
          message("    GC roots: (query failed)")
        }
      )

      # Show referrers (what depends on this path)
      tryCatch(
        {
          refs <- system2(
            nix_bin,
            c("--query", "--referrers", path),
            stdout = TRUE,
            stderr = FALSE
          )
          if (length(refs) > 0) {
            message(
              "    Referenced by: ",
              paste(basename(refs), collapse = ", ")
            )
          } else {
            message("    Referenced by: (none)")
          }
        },
        error = function(e) {
          message("    Referenced by: (query failed)")
        }
      )
    }

    message("\nTip: To investigate further, you can run:")
    message(
      "  nix-store --query --roots <path>  # Show what GC roots reference this path"
    )
    message(
      "  nix-store --query --referrers <path>  # Show what store paths depend on this"
    )
    message("  ls -la /nix/var/nix/gcroots/  # Browse all GC roots")
  }

  if (length(failed_paths) > 0 && verbose) {
    message("\nFailed paths:")
    cat("  ", basename(failed_paths), sep = "\n  ", "\n")
  }

  message(
    "\nStore path deletion complete. Successfully deleted ",
    total_deleted,
    " of ",
    length(delete_paths_all),
    " targeted paths."
  )
  summary_info$deleted_count <- total_deleted
  summary_info$failed_count <- length(failed_paths)
  summary_info$referenced_count <- length(referenced_paths)

  # Delete old build log files
  if (nrow(logs_to_delete) > 0) {
    message("\nDeleting old build log files...")
    log_files_deleted <- 0L
    log_files_failed <- character(0)

    for (i in seq_len(nrow(logs_to_delete))) {
      log_file <- logs_to_delete$filename[i]
      log_path <- file.path(project_path, "_rixpress", log_file)

      message(
        "  [",
        i,
        "/",
        nrow(logs_to_delete),
        "] Deleting ",
        log_file,
        "..."
      )

      if (!file.exists(log_path)) {
        message("    [!] File not found (already deleted?)")
        next
      }

      tryCatch(
        {
          unlink(log_path)
          if (!file.exists(log_path)) {
            log_files_deleted <- log_files_deleted + 1L
            message("    [OK] Successfully deleted")
          } else {
            log_files_failed <- c(log_files_failed, log_file)
            message("    [X] Failed to delete (file still exists)")
          }
        },
        error = function(e) {
          log_files_failed <<- c(log_files_failed, log_file)
          message("    [X] Error: ", conditionMessage(e))
        }
      )
    }

    message("\nBuild log deletion summary:")
    message("  Successfully deleted: ", log_files_deleted, " files")
    message("  Failed: ", length(log_files_failed), " files")

    if (length(log_files_failed) > 0 && verbose) {
      message("\nFailed to delete log files:")
      cat("  ", log_files_failed, sep = "\n  ", "\n")
    }

    summary_info$log_files_deleted <- log_files_deleted
    summary_info$log_files_failed <- length(log_files_failed)
  }

  message("\nCleanup complete!")
  invisible(summary_info)
}

Try the rixpress package in your browser

Any scripts or data that you put into this service are public.

rixpress documentation built on Feb. 19, 2026, 9:06 a.m.