R/render_lab_answers.R

#' Render and Validate Lab Answers
#'
#' Renders and validates lab answers generated by and downloaded from a lab tutorial.
#'
#' @param filename path to the lab answer file.
#' @param output_dir the output directory for the rendered answer file. If `NULL`, the output directory depends on the
#'  type of the file. If it is a markdown file, the output will be saved in the directory of the input.
#'  If it is a ZIP archive, all the files in the ZIP file will be rendered in the current working directory.
#' @param zip_archive is the given file a ZIP archive? If yes, process all files in the archive.
#'  By default, files ending in ".zip" are treated as ZIP archives.
#' @export
#'
#' @importFrom rmarkdown render html_document
#' @importFrom stringr str_sub str_remove
#' @importFrom rlang inform warn
#' @importFrom utils unzip
render_lab_answers <- function (filename, output_dir = NULL, zip_archive) {
  if (missing(zip_archive)) {
    zip_archive <- grepl('\\.zip$', filename)
  }

  rendered_files <- if (!isTRUE(zip_archive)) {
    if (is.null(output_dir)) {
      output_dir <- dirname(filename)
    }

    tryCatch({
      render_res <- .render_lab_answer_file(filename, output_dir)
      data.frame(input = normalizePath(filename), output = render_res$output, valid = render_res$valid,
                 message = render_res$message, stringsAsFactors = FALSE)
    }, error = function (e) {
      error_file <- file.path(output_dir, basename(filename))
      if (filename != error_file) {
        file.copy(filename, error_file)
      }
      warn(sprintf("Cannot render file %s", error_file))
      data.frame(input = normalizePath(filename), output = error_file, valid = NA, message = 'rendering error',
                 stringsAsFactors = FALSE)
    })
  } else {
    if (is.null(output_dir)) {
      output_dir <- getwd()
    }
    unpacked_path <- tempfile()
    dir.create(unpacked_path, mode = '0700')
    on.exit(unlink(unpacked_path), add = TRUE)

    unzip(filename, exdir = unpacked_path, junkpaths = TRUE)
    all_files <- list.files(unpacked_path, full.names = FALSE)
    do.call(rbind, lapply(all_files, function (fname) {
      fname_path <- file.path(unpacked_path, fname)
      tryCatch({
        render_res <- .render_lab_answer_file(fname_path, output_dir)
        inform(sprintf("Rendered file %s", render_res$output))
        data.frame(input = normalizePath(fname_path), output = render_res$output, valid = render_res$valid,
                   message = render_res$message, stringsAsFactors = FALSE)
      }, error = function (e) {
        error_file <- file.path(output_dir, fname)
        file.rename(fname_path, error_file)
        warn(sprintf("Cannot render file %s", error_file))
        data.frame(input = normalizePath(fname_path), output = error_file, valid = NA,
                   message = sprintf('rendering error (%s)', e), stringsAsFactors = FALSE)
      })
    }))
  }

  return(invisible(rendered_files))
}

.render_lab_answer_file <- function (filename, output_dir = NULL) {
  rendering_result <- list(output = filename, valid = FALSE, message = NA_character_)
  validation_result <- tryCatch({
    validation_result <- list(valid = .validate_lab_answers(filename), message = NA_character_)
    if (!all(validation_result$valid)) {
      validation_result$message <- if (all(!validation_result$valid)) {
        "The metadata (name, student number, ...) and the answers do not match the signature."
      } else if (!isTRUE(validation_result$valid[['metadata']])) {
        "The metadata (name, student number, ...) do not match the signature. The answers are valid."
      } else if (!isTRUE(validation_result$valid[['content']])) {
        "The answers do not match the signature. The metadata (name, student number, ...) are valid."
      }
    }
    validation_result
  }, error = function (e) {
    list(valid = c(FALSE, FALSE), message = as.character(e))
  })

  rendering_result$valid <- all(validation_result$valid)
  rendering_result$message <- validation_result$message

  new_md <- tempfile(pattern = 'lab_answers', fileext = '.md')
  on.exit(unlink(new_md), add = TRUE)
  new_md_fh <- file(new_md, open = 'w')
  on.exit(close(new_md_fh), add = TRUE, after = FALSE)

  in_fh <- file(filename, open = 'r')
  on.exit(close(in_fh), add = TRUE, after = FALSE)

  ## Read metadata
  metadata <- .read_lab_answers_metadata(in_fh)

  cat('---\ntitle: ', sprintf("Lab Answers for *%s* (%s)", metadata$student_name, metadata$student_id),
      '\nauthor: ', basename(filename), ' from ', metadata$url, '\n---\n', file = new_md_fh, sep = '')

  if (!is.na(validation_result$message)) {
    cat(sprintf('<div class="alert alert-danger text-center"><p class="lead">Validation failed!</p><p>%s</p></div>',
                validation_result$message), file = new_md_fh)
  }

  repeat {
    line <- .read_line(in_fh, n = 1L)
    if (length(line) == 0L) {
      break
    }
    cat(line, '\n', file = new_md_fh)
  }


  if (is.null(output_dir)) {
    output_dir <- dirname(filename)
  }

  rendering_result$output <- render(new_md, output_file = str_remove(basename(filename), '\\.[^\\s\\.]+$'),
                                    output_format = html_document(), output_dir = output_dir, quiet = TRUE)
  return(rendering_result)
}

#' @importFrom stringr str_sub str_remove
#' @importFrom openssl signature_verify base64_decode
#' @importFrom digest digest hmac
#' @importFrom rlang inform
.validate_lab_answers <- function (filename, blocksize = 8192L) {
  valid <- c(metadata = FALSE, content = FALSE)
  fh <- file(filename, open = 'rt', encoding = 'UTF-8')
  on.exit(close(fh), add = TRUE)

  ## Read metadata
  metadata <- .read_lab_answers_metadata(fh)

  ## Add metadata info to message (we need to keep track of the previous 4 hashes to backtrack at the end of the file)
  metadata_hash <- .create_metadata_hash(metadata)

  running_hash <- if (length(metadata$signature) == 2L) {
    # Check metadata separately
    valid[['metadata']] <- tryCatch(signature_verify(metadata_hash, hash = NULL, sig = metadata$signature$meta,
                                                     pubkey = metadata$pubkey),
                                    error = function (e) FALSE)
    raw(0L)
  } else {
    inform("Old signature detected. Verifying entire file content at once.")
    metadata_hash
  }

  # line 1 (after metadata): skip
  .read_line(fh, n = 1L, ok = FALSE)

  # all next lines: read in blocks, but discard all white space.
  buffer <- ''
  repeat {
    line <- .read_line(fh, n = 1L)

    if (length(line) == 0L) {
      break
    }

    buffer <- if (length(metadata$signature) == 2L) {
      paste(buffer, str_remove_all(line, '\\s'), sep = '')
    } else {
      paste(buffer, line, '\n', sep = '')
    }
    buffer_chunks <- nchar(buffer) %/% blocksize
    bytes_left <- nchar(buffer) %% blocksize

    for (i in seq_len(buffer_chunks)) {
      chunk <- str_sub(buffer, start = (i - 1L) * blocksize + 1L, end = i * blocksize)
      running_hash <- hmac(running_hash, chunk, serialize = FALSE, raw = TRUE, algo = 'sha256')
    }
    buffer <- str_sub(buffer, start = -bytes_left)
  }
  if (nchar(buffer) > 0L) {
    running_hash <- hmac(running_hash, buffer, serialize = FALSE, raw = TRUE, algo = 'sha256')
  }

  # Verify the signature (throws an error if the validation fails)
  if (length(metadata$signature) == 2L) {
    # Check metadata separately
    valid[['content']] <- tryCatch(signature_verify(running_hash, hash = NULL, sig = metadata$signature$content,
                                                    pubkey = metadata$pubkey),
                                   error = function (e) FALSE)
  } else {
    valid[['content']] <- tryCatch(signature_verify(running_hash, hash = NULL, sig = metadata$signature$old,
                                                    pubkey = metadata$pubkey),
                                   error = function (e) FALSE)
    valid[['metadata']] <- valid[['content']]
  }
  return(valid)
}

.read_line <- function (...) {
  mc <- match.call(expand.dots = TRUE)
  mc[[1L]] <- quote(readLines)
  with_abort(eval.parent(mc))
}

.read_chars <- function (...) {
  mc <- match.call(expand.dots = TRUE)
  mc[[1L]] <- quote(readChar)
  with_abort(eval.parent(mc))
}

#' @importFrom openssl read_pubkey base64_decode
#' @importFrom stringr str_sub str_trim str_split fixed
#' @importFrom rlang abort warn
.read_lab_answers_metadata <- function (fh) {
  seek(fh, 0L, rw = 'read', origin = 'start')

  info <- list(student_name = NULL, student_id = NULL, url = NULL, pubkey = NULL)
  ## Read metadata
  # line 1: ----(skip)
  .read_line(fh, n = 1L, ok = FALSE)
  # line 2: student name -- add to message
  info$student_name <- str_sub(.read_line(fh, n = 1L, ok = FALSE), start = 10L)
  # line 3: student nr -- add to message
  info$student_id <- str_sub(.read_line(fh, n = 1L, ok = FALSE), start = 13L)
  # line 4: url -- add to message
  info$url <- str_sub(.read_line(fh, n = 1L, ok = FALSE), start = 6L)
  # line 5: public key
  info$pubkey <- tryCatch(
    read_pubkey(base64_decode(str_sub(.read_line(fh, n = 1L, ok = FALSE), start = 9L))),
    error = function (e) {
      abort("Public key cannot be read.")
    })
  # line 6: signature
  signature_line <- .read_line(fh, n = 1L, ok = FALSE)
  if (str_sub(signature_line, end = 11L) != 'signature: ') {
    abort("No signature found in file.")
  }
  signatures <- str_split(str_trim(str_sub(signature_line, start = 12L), side = 'right'), pattern = fixed(';'))[[1L]]
  info$signature <- if (length(signatures) != 2L) {
    list(old = base64_decode(signatures[[1L]]))
  } else {
    list(meta = base64_decode(signatures[[1L]]), content = base64_decode(signatures[[2L]]))
  }

  # line 6: ----- (skip)
  .read_line(fh, n = 1L, ok = FALSE)
  return(info)
}

.create_metadata_hash <- function (metadata) {
  hash <- digest(paste('student:', metadata$student_name), algo = 'sha256', raw = TRUE, serialize = FALSE)
  hash <- hmac(hash, paste('student_id:', metadata$student_id), algo = 'sha256', raw = TRUE)
  hash <- hmac(hash, paste('url:', metadata$url), algo = 'sha256', raw = TRUE)
  return(hash)
}
dakep/stat305templates documentation built on Nov. 27, 2022, 8:23 a.m.