R/mitm-helpers.R

Defines functions mitm_help mitm_status stop_mitm start_mitm call_mitm

Documented in call_mitm mitm_help mitm_status start_mitm stop_mitm

#' Spawn a mitmdump background process with custom args and return the process id
#' 
#' Returns process id. Use [tools::pskill()] to kill.
#' 
#' @md
#' @param args see [sys::exec_background()]
#' @family mitm_helpers
#' @export
call_mitm <- function(args) {
  
  mitm_bin <- find_mitm()
  
  pid <- sys::exec_background(cmd = mitm_bin, args = args)
  
  pid

}

#' Spawn a mitmdump background process in "HAR capture" mode and return the process id
#' 
#' Returns `mitm_pid` object which can be used with [stop_mitm()] to kill 
#' the mitmproxy process and retrieve the generated HAR file or [mitm_status()] 
#' to check on the status of the background process.
#' 
#' @md
#' @param extra_args see `args` in [sys::exec_background()]
#' @param collect how to collect `mitmproxy` responses. `httr` will serialize each 
#'        response as individual lines of `ndjson` `httr` `response` objects that 
#'        can be read with [middlechild::read_httr()] from a file and that will be 
#'        returned as a `list` of said objects 
#' @return `mitm_pid` object
#' @family mitm_helpers
#' @export
start_mitm <- function(collect = c("httr", "har"), extra_args = NULL) {
  
  collect <- match.arg(collect[1], c("httr", "har"))
  
  mitm_bin <- find_mitm()
  
  script <- switch(collect, 
    "httr" = system.file("mitm", "httr_dump.py", package = "middlechild"),
    "har" =  system.file("mitm", "har_dump.py", package = "middlechild")
  )
  
  dump_file <- tempfile(fileext = switch(collect, httr = ".json.gz", har = ".har"))
  
  args <- c("-s", script, "--set", sprintf("%sdump=%s", collect, dump_file), extra_args)
  
  pid <- call_mitm(args = args) # e.g. mitmdump -s ./har_dump.py --set hardump=./dump.har ...
  
  out <- list(pid = pid, dump_file = dump_file, collect = collect)
  
  class(out) <- c("mitm_pid")
  
  out

}

#' Stop the mitmproxy background process and retrieve the generated HAR file
#' 
#' @md
#' @param pid_obj `mitm_pid` object created with `start_mitm`
#' @param read if `TRUE` (the default) read in the `httr` or HAR file. NOTE: you are 
#'        encouraged to set this to `FALSE` if you're using HAR format and
#'        reading a large number of  pages or pages with heavy resources. [HARtools::readHAR()] 
#         does not handle very large HAR files well.
#' @param save if not `NULL` then the `httr` or HAR file generated by `mitmproxy` will be
#'        sent to this filesystem location (**you need to use a path that includes a 
#'        filename**). This will be `path.expand()`ed. It is recommended that you use
#'        `.json.gz` as the file extension for the `httr` object type.
#' @return a `HARtools` object if `read` was `TRUE` and the collection was `har`;
#'         a list of (minimal) `httr` `response` objects; nothing if `read` was `FALSE`.
#' @export
stop_mitm <- function(pid_obj, read = TRUE, save = NULL) {
  
  tools::pskill(pid_obj$pid)
  
  Sys.sleep(3) # pause
  
  on.exit(unlink(pid_obj$dump_file), add=TRUE)
  
  if (!is.null(save)) {
    save <- path.expand(save)
    message("Moving mitmproxy dump file to ", save)
    file.copy(pid_obj$dump_file, save)
  }
  
  if (read) {
    
    switch(pid_obj$collect,
      "httr" = read_httr(pid_obj$dump_file),
      "har" = HARtools::readHAR(pid_obj$dump_file)
    ) -> out
    
    return(out)
    
  }
  
  invisible()
  
}

#' Check on the status of an mitmproxy process created with `mitm_start()`
#' 
#' @md
#' @param pid_obj `mitm_pid` object created with `start_mitm`
#' @return logical. `TRUE` if the background process is still running
#' @export
mitm_status <- function(pid_obj = NULL) {

  if (is.null(pid_obj))  stop("pid_obj cannot be NULL", call. = FALSE)
  
  return(is.na(sys::exec_status(pid_obj$pid, wait = FALSE)))
    
}

#' Show the mitmdump help screen 
#' 
#' @family mitm_helpers
#' @export
mitm_help <- function() {
  mitm_bin <- find_mitm()
  system2(mitm_bin, "--help", stdout = "")
}
ropenscilabs/middlechild documentation built on May 11, 2022, 9:11 a.m.