R/pull.R

#' Pull remote changes to a Google Doc to the source Rmarkdown file
#'
#' @param file_name The path to the Rmarkdown file which you'd like to update
#'
#' @param find_and_replace Should a final find-and-replace pass happen?
#' @return \code{TRUE} (invisibly) if successfull, otherwise, an error.
#' @export
gd_pull <- function(file_name, find_and_replace = TRUE) {

  # For magirttr / R CMD CHECK
  . <- NULL

  gd_auth()

  # Convert the file to commonmark standard
  standardize_rmd(file_name)

  # Extact the doc's body and YAML front matter
  yaml_vars <- rmarkdown::yaml_front_matter(file_name)
  body      <- partition_yaml_front_matter(readLines(file_name))$body
  doc_id    <- yaml_vars$googdown$doc_id
  doc_title <- yaml_vars$title


  # Have there been any changes? If not, then go home early
  # You might want to come up with a more specific function here; e.g. one that
  # validates it was the last revision which came back from a push operation
  local_rev  <- latest_revision_from_local_metadata(doc_id, update = FALSE)
  remote_rev <- latest_revision_from_local_metadata(doc_id, update = TRUE)

  if (local_rev == remote_rev) {
    catif("No changes pulled: Local and remote documents already in sync")
    return(invisible(TRUE))
  }

  # Get cache files in order ---------------------------------------------------

  # And the previous local and remote versions, for comparison
  remote1_ast_path <- file_path(
    getOption("gd.cache"), doc_id, paste0(local_rev, "-remote.ast")
  )

  local1_ast_path <- file_path(
    getOption("gd.cache"), doc_id, paste0(local_rev, "-local.ast")
  )

  source1_ast_path <- file_path(
    getOption("gd.cache"), doc_id, paste0(local_rev, "-source.ast")
  )

  # Download the new file ------------------------------------------------------

  # If there are differences, pull the remote AST into the cache
  remote2_ast_path <- tempfile(fileext = ".ast")

  # Download the latest version of the doc using its id, as a docx file. Convert
  # to AST and replace image targets with the hashes of the file contents. If
  # new images have been added to the remote document, add them to the project's
  # top level in the folder specified as getOption("gd.new_image_path"), by
  # default, ./assets. Whether an image is new or not is defined by comparing it
  # to the AST of the local file generated by the previous push.
  doc_id_to_ast(
    doc_id                       = doc_id,
    output_file                  = remote2_ast_path,
    pull_in_new_images           = TRUE,
    image_export_comparison_file = local1_ast_path
  )

  catif("Downloading remote changes")

  # Fold the JSON of the ast files
  c(remote1_ast_path, remote2_ast_path, local1_ast_path, source1_ast_path) %>%
    mapply(fold_ast_json, ., .)

  # Merging changes ------------------------------------------------------------

  md_merged_ast     <- tempfile(fileext = ".ast")
  rmd_merged_ast    <- tempfile(fileext = ".ast")
  rmd_merged_body   <- tempfile(fileext = ".Rmd")
  final_merged_file <- tempfile(fileext = ".Rmd")

  catif("Attempting to merge remote and local markdown files")

  ## # Attempt merging remote and local markdown files
  ## markdown_merge_attempt <- try(
  ##   silent = TRUE,
  ##   remote_diff_to_local(
  ##     remote1     = remote1_ast_path,
  ##     local1      = local1_ast_path,
  ##     remote2     = remote2_ast_path,
  ##     output_file = md_merged_ast
  ##   )
  ## )

  local1_md_path <- ast_to_md(local1_ast_path)
  remote1_md_path <- ast_to_md(remote1_ast_path)
  remote2_md_path <- ast_to_md(remote2_ast_path)
  merged_md_path <- tempfile(fileext = ".md")
  # Attempt merging remote and local markdown files
  markdown_merge_attempt <- try(
    silent = TRUE,
    remote_diff_to_local(
      remote1     = remote1_md_path,
      local1      = local1_md_path,
      remote2     = remote2_md_path,
      output_file = merged_md_path
    )
  )

  if ("try-error" %in% class(markdown_merge_attempt)) {
    message("DID NOT WORK: LOCAL-REMOTE MARKDOWN MERGE FAILED")
    return(list(
      remote1     = remote1_ast_path,
      local1      = local1_ast_path,
      remote2     = remote2_ast_path,
      output_file = md_merged_ast
    ))
  }

  # Convert the merged markdown file to an AST, and remove any image captions
  # that Pandoc may have decided to add
  remove_ast_image_captions(
    md_to_ast(merged_md_path),
    output_ast = md_merged_ast
  )

  catif("Remote and local markdown files successfully merged")

  catif("Attempting to fold the AST of the newly merged markdown file")


  fold_markdown_ast_attempt <- try(
    silent = TRUE,
    fold_ast_json(md_merged_ast, md_merged_ast)
  )

  if ("try-error" %in% class(fold_markdown_ast_attempt)) {
    message("DID NOT WORK: MARKDOWN AST FOLDING FAILED")
    return(list(
      remote1     = remote1_ast_path,
      local1      = local1_ast_path,
      remote2     = remote2_ast_path,
      output_file = md_merged_ast
    ))
  }


  catif("Markdown AST successfully folded")

  catif("Attempting to 'unknit' merged local markdown ast")

  # Attempt the unknitting part
  unknit_attempt <- try(
    silent = TRUE,
    unknit_new_md(
      original_rmd_ast = source1_ast_path,
      original_md_ast = local1_ast_path,
      new_md_ast = md_merged_ast,
      output_file = rmd_merged_ast
    )
  )

  if ("try-error" %in% class(unknit_attempt)) {
    message("DID NOT WORK: UNKNITTING FAILED")
    return(list(
      original_rmd_ast = source1_ast_path,
      original_md_ast = local1_ast_path,
      new_md_ast = md_merged_ast,
      output_file = rmd_merged_ast
    ))
  }

  catif("Markdown AST successfully unknit to Rmd AST")
  catif("Attempting to convert the Rmd AST to Rmd")

  # Convert the AST back to Rmarkdown (and unescape to remove extra \'s brought
  # about by diffing)
  ast_to_rmd(rmd_merged_ast, rmd_merged_body, unescape = TRUE)

  # Perform a final find-and-replace pass, in case any dynamic output has not
  # been detected by diffing (e.g. if charts have become re-ordered)
  source_rmd <- file_path(
    getOption("gd.cache"), doc_id, paste0(local_rev, "-source.Rmd")
  )

  # Perform a final find-and-replace pass if desired
  if (find_and_replace) {

    found_anything <- final_find_and_replace_pass(
      merged_rmd_file = rmd_merged_body,
      local_md        = remote1_md_path,
      source_rmd      = source_rmd,
      output_file     = rmd_merged_body
    )

    if(found_anything) catif("Additional changes made during a final find and",
                             " replace pass")
  }

  catif("Success!")

  # Add the YAML back on
  writeLines(
    c("---", yaml::as.yaml(yaml_vars), "---", readLines(rmd_merged_body)),
    final_merged_file
  )


  # Write out, end -------------------------------------------------------------

  # Copy the new file to the original file path
  file.copy(final_merged_file, file_name, overwrite = TRUE)

  # Versioning  / Caching
  cache_version_files(
    doc_id = doc_id, doc_revision = remote_rev, source = file_name,
    rendered_md = ast_to_md(md_merged_ast), remote_ast = remote2_ast_path
  )

  catif("Remote changes merged in to ", file_name)
}

#' A function for caching information about document state
#'
#' @param doc_id The Google ID of the document
#' @param source The Rmd source file at the time of the run
#' @param rendered_md The rendered md of the Rmd source file at the time of the
#'   run
#' @param cache_dir The dir used for caching
#' @param doc_revision Optional. Google's version number for the remote
#'   doc. Will be determined with a call to
#'   \code{latest_revision_from_local_metadata} if NULL (the default)
#' @param remote_ast Optional. The filepath to the Pandoc AST of the remote
#'   Google doc at doc_version. Will be retrieved from the remote document if
#'   NULL (the default)
#' @return Nothing
#' @keywords internal
cache_version_files <- function(doc_id, source, rendered_md,
                                cache_dir = getOption("gd.cache"),
                                doc_revision = NULL, remote_ast = NULL) {

  cache_file <- function(file, new_name = NULL, version = TRUE) {

    # Create the directory (won't wipe anything if it already exists)
    doc_dir <- file_path(cache_dir, doc_id)
    dir.create(doc_dir, recursive = TRUE, showWarnings = FALSE)

    # gitignore it (if it isn't already gitignored)
    add_line(".gitignore", cache_dir)

    # Get a name for the file, either new_name, or the file's original name
    use_name <- if (is.null(new_name)) basename(file) else basename(new_name)

    # Some files you want versioned, some files you don't
    out_file <- if (version) {
      file_path(doc_dir, paste0(doc_revision, "-", use_name))
    } else {
      file_path(doc_dir, use_name)
    }

    # Prefix the filename with the version number, and add to the cache
    # directory
    file.copy(file, out_file, overwrite = TRUE)
  }


  cache_run_status <- function() {
    run_status_file <- file_path(cache_dir, doc_id, "runs.csv")

    # Read in existing json (or init an empty list if it doesn't)
    if (!file.exists(run_status_file)) {
      csv <- data.frame()
    } else {
      csv <- utils::read.csv(run_status_file, stringsAsFactors = FALSE)
    }

    # openssl has it's own classes, which are tricky to coerce
    source_hash <- openssl::md5(read_txt(source))
    class(source_hash) <- "character"


    run_info <- data.frame(
      operation    = "push",
      doc_id       = doc_id,
      time         = Sys.time(),
      doc_revision = doc_revision,
      source_md5   = source_hash,
      stringsAsFactors = FALSE
    )

    # Append the latest run information, and write out the file
    utils::write.csv(row.names = FALSE, rbind(csv, run_info), run_status_file)

    return(invisible(TRUE))
  }

  # Do the caching -------------------------------------------------------------

  # You probably won't need all of these once the versioning / unknitting
  # process settles down

  # Extract the doc version
  if (is.null(doc_revision)) {
    doc_revision <- latest_revision_from_local_metadata(doc_id, update = TRUE)
  }

  # Save the AST of the remote file
  if (is.null(remote_ast)) {
    remote_ast <- doc_id_to_ast(doc_id)
  }

  # Save the Rmd of the original file
  cache_file(source, "source.Rmd")

  # Save the AST of the original file
  cache_file(rmd_to_ast(source), "source.ast")

  # Save the MD of the knitted file
  cache_file(rendered_md, "local.md")

  # Save the AST of the knitted file
  #
  # Here we're performing the 'image hashing' technique on the AST prior to
  # saving; when the remote AST is downloaded, image targets are replaced with
  # the hashes of the images' contents. Here we're replacing the image targets
  # in the *local* file, with the equivalent hashes in the remote file.
  rendered_md %>%
    md_to_ast() %>%
    imagehash_local_ast_with_equivalent_remote_ast(remote_ast) %>%
    cache_file("local.ast")

  # Save the MD of the remote file
  cache_file(ast_to_md(remote_ast), "remote.md")

  # Save the AST of the remote file
  cache_file(remote_ast, "remote.ast")

  # Write out some general information about the run
  cache_run_status()
}


#' @keywords internal
revision_list_from_local_metadata <- function(
  doc_id, cache_dir = getOption("gd.cache"), update = FALSE
  ) {

  # If the dir already exists, nothing will happen
  dir.create(
    file_path(cache_dir, doc_id), recursive = TRUE, showWarnings = FALSE
  )

  revisions_file <- file_path(cache_dir, doc_id, "revisions.json")

  # Download if asked to (or if the file doesn't exist yet)
  if (!file.exists(revisions_file) | update) {
    writeLines(
      jsonlite::toJSON(gd_revisions_ls(doc_id)),
      revisions_file
    )
  }

  jsonlite::fromJSON(read_txt(revisions_file))
}

#' @keywords internal
latest_revision_from_local_metadata <- function(...) {
  max(as.numeric(unlist(revision_list_from_local_metadata(...)$items$id)))
}
brendan-r/googdown documentation built on May 13, 2019, 5:09 a.m.