R/repository-impl.R

all_commits <- function (store) {
  guard()
  query <- list(rlang::quo(class == 'commit'))
  ids   <- storage::os_find(store, query)
  map(ids, function(id) commit(store, id))
}

# --- private API: update ------------------------------------------------------


repository_updater <- function (repo, env, plot, expr) {
  guard()
  stopifnot(is_repository(repo), is.environment(env))

  u <- repo$proto()
  u$env  <- env
  u$plot <- plot
  u$expr <- expr
  u$png  <- NULL
  u$plot_id <- character()

  u$process_objects <- function (.) {
    guard("u$process_objects")

    .$objects <- lapply(as.list(env), strip_object)
    .$ids <- lapply(.$objects, storage::compute_id)
    .$new <- Filter(function (id) !storage::os_exists(.$store, id), .$ids)

    dbg("newly created: ", names(.$new))

    .$tags <- lapply(names(.$new), function (name) auto_tags(.$objects[[name]]))
    .$tags <- imap(with_names(.$tags, names(.$new)), function (tags, name) {
      names <- extract_parents(env, expr)
      dbg(name, " parents: ", paste(names, collapse = ", "))

      names2 <- intersect(names, names(.$last_commit$objects))
      if (!setequal(names, names2)) {
        warning("parents identified but not present in the previous commit ",
                name, ": ", paste(setdiff(names, names2), collapse = ", "),
                call. = FALSE)
      }

      tags$parents <- .$last_commit$objects[names2]
      tags
    })
  }

  u$process_plot <- function (.) {
    guard("u$process_plot")

    if (is.null(.$plot)) {
      return()
    }

    # rawplot wraps all plotting logic
    .$plot <- rawplot(.$plot)
    .$png <- as_png(.$plot, 150, 150)

    # if the current plot looks the same as the last one, do not update at all
    if (png_equal(.$png, .$last_png)) {
      return()
    }

    # prepare the rawplot wrapper for storing
    .$plot <- for_store(.$plot)
    .$plot_id <- storage::compute_id(.$plot)

    # no need to store, just remember the id
    if (storage::os_exists(.$store, .$plot_id)) {
      dbg("plot already present")
      return()
    }

    .$plot_tags <- auto_tags(.$plot, class = base::union(class(.$plot), 'plot'))
    names <- extract_parents(env, expr)
    .$plot_tags$parents <- .$last_commit$objects[names]
  }

  u$introduced_changes <- function (.) {
    guard("u$introduced_changes")

    ia <- .$ids
    ib <- .$last_commit$objects

    sorted_names <- function(x) if (is.null(names(x))) character() else sort(names(x))
    an <- sorted_names(ia)
    bn <- sorted_names(ib)

    return(!identical(ia[an], ib[bn]) || (!is.null(.$plot$png) && !png_equal(.$plot$png, .$last_png)))
  }

  u$write <- function (.) {
    guard("u$write")

    # store list of object pointers + basic 'history' tags
    data <- list(expr = .$expr, objects = .$ids, plot = .$plot_id)
    tags <- list(class = 'commit', parent = .$last_commit$id, time = current_time())
    cid  <- storage::compute_id(list(data, tags))

    # this should never happen because hash is computed from both objects
    # and parent id; if it does happen, something is SERIOUSLY broken
    if (storage::os_exists(.$store, cid)) {
      stop("commit already exists, aborting")
    }

    # write the commit meta-data
    storage::os_write(.$store, data, tags, id = cid)

    # write objects, append the parent commit id to tags
    imap(.$new, function (id, name) {
      dbg("artifact `", name, "` not present, storing [", id, "]")
      storage::os_write(.$store, .$objects[[name]], id = id,
                        tags = c(.$tags[[name]], list(parent_commit = cid, names = name)))
    })

    if (length(.$plot_id)) {
      dbg("storing new plot [", .$plot_id, "] with parents: ", paste(.$plot_tags$parents, collapse = ", "))
      storage::os_write(.$store, .$plot, id = .$plot_id,
                        tags = c(.$plot_tags, list(parent_commit = cid)))
    }

    .$last_commit_id <- cid
    invisible(cid)
  }

  u$sync_repo <- function (.) {
    guard("u$sync_repo")

    .$.super$last_commit <- list(id = .$last_commit_id, objects = .$ids)
    .$.super$last_png    <- .$png
  }

  u
}


extract_parents <- function (env, expr)
{
  # add the "parent objects" tag using "defer"
  fn <- function(){}; body(fn) <- expr

  df <- defer::defer_(fn, .caller_env = env, .extract = TRUE)
  ev <- defer::extract_variables(df)
  ef <- defer::extract_functions(df)

  ef$entry <- NULL
  c(names(ev), names(ef))
}


#' Removes references to environments.
#'
#' Some objects (e.g. formula, lm) store references to environments
#' in which they were created. This function replaces each such reference
#' with a reference to `emptyenv()`.
#'
#' As much as possible, this function tries not to make any copies of
#' the original data. This is because the address of the object might
#' be used to determine whether object's identifier needs to be computed
#' which might be a costly operation.
#'
#' @param obj Object to be processed.
#' @param attr Is `obj` an attribute of some other object?
#'
#' @return `obj` with environment references replaced by `emptyenv()`
#'
#' @rdname strip_object
#'
strip_object <- function (obj)
{
  guard()

  if (is.symbol(obj)) return(obj)
  if (inherits(obj, 'recordedplot')) return(obj)

  strip_object_impl(obj)
}


#' @rdname strip_object
strip_object_impl <- function (obj, attr = FALSE)
{
  # TODO should we disregard any environment?
  if (is.environment(obj) && isTRUE(attr)) return(emptyenv())

  attrs <- if (!is.null(attributes(obj))) lapply(attributes(obj), strip_object_impl, attr = TRUE)

  if (is.list(obj)) {
    obj_tmp <- lapply(unclass(obj), strip_object_impl, attr = FALSE)
    # use stripped object only if stripping actually changed something
    obj_lst <- lapply(unclass(obj), function(x)x)
    if (!identical(obj_tmp, obj_lst)) {
      obj <- obj_tmp
    }
  }
  if (!identical(attributes(obj), attrs)) {
    attributes(obj) <- attrs
  }

  obj
}


# --- explain ----------------------------------------------------------

object_origin <- function (repo, ids, ancestors) {
  stopifnot(is.numeric(ancestors))

  black <- new_vector()
  grey  <- new_vector(data = lapply(ids, list, 0))

  while (grey$size()) {
    el <- grey$pop_front()
    lapply(storage::os_read_tags(repo$store, as_id(first(el)))$parents, function (id) {
      if (!black$find(id) && second(el) < ancestors) {
        grey$push_back(list(id, second(el)+1))
      }
    })
    black$push_back(first(el))
  }

  as.character(black$values)
}


#' Pretty-format an R expression.
#'
#' Internally calls [styler::style_text()] but adds line breaks after each
#' occurence of the pipe operator.
#'
#' @param expr Expression.
#' @param indent Indentation for all lines of the expression.
#'
#' @importFrom stringi stri_paste stri_replace_all_fixed stri_replace_all_regex
#' @export
format_expr <- function (expr, indent = '  ') {
  expr <- stri_replace_all_regex(stri_paste(deparse(expr), collapse = ''), '\\s', '')
  expr <- stri_replace_all_fixed(expr, '%>%', '%>%\n')
  lines <- styler::style_text(expr)
  stri_paste(indent, lines, collapse = '\n')
}


# --- deltas -----------------------------------------------------------

#' Transform a graph of commits into a graph of deltas.
#'
#' A _delta_ is an introduction of a new artifact (object, plot, printout)
#' in the R session. Graph of deltas is easier to read for a person than
#' a graph of commits becase only the relevant (new) information is shown
#' in each node of the graph. Thus, translating from commits to deltas is
#' the first step to present the history of changes in R session recorded
#' in commits.
#'
#' @description `history_to_deltas` is the main function which orchestrates
#' the transformation.
#'
#' @param hist List of commits.
#' @return Object of S3 class `deltas`.
#'
#' @rdname deltas
#' @importFrom utils head tail
#'
deprecated_history_to_deltas <- function (hist)
{
  store <- attr(hist, 'store')

  nodes <- new_map()
  convert <- function (commit_id, parent_delta) {
    commit  <- hist[[commit_id]]
    new_ids <- commit$objects[introduced(hist, commit_id)]

    mapply(new_ids, c(parent_delta, head(new_ids, -1)), FUN = function (child, parent) {
      delta <- storage::os_read_tags(store, child)
      delta$id <- child
      delta$parent <- parent
      nodes$assign(delta$id, delta)
    })

    parent_delta <- last(new_ids)
    lapply(commit$children, function (commit_id) convert(commit_id, parent_delta))
  }

  roots <- names(filter(hist, no_parent()))
  lapply(roots, function (id) convert(id, NA_character_))

  # return the final "steps" structure
  structure(nodes$values, class = 'deltas')
}


#' @description `is_deltas` verifies if the given object is a valid
#' `deltas` structure.
#'
#' @param x Object to be tested.
#'
#' @rdname deltas
#'
is_deltas <- function (x) inherits(x, 'deltas')
lbartnik/repository documentation built on May 28, 2019, 9:50 a.m.