Nothing
# Binary file utils =============
# @TODO: remove deprecated functions
# Read binary file
# @note this does not work for very large files probably because of the 2^31-1 limit on vector size! think about ways to fix this...
# --> might have to acually read directly from the conection instead of the raw data buffer!
read_binary_file <- function(filepath) {
if (!file.exists(filepath) || file.info(filepath)$isdir == TRUE)
stop("file does not exist or is a directory: ", filepath, call. = TRUE)
bfile <-
structure(
list(
raw = raw(),
C_blocks = tibble(),
data = list(),
pos = 1L, # current position within the file
max_pos = NULL, # max position to consider during operations
error_prefix = "" # what prefix to append to errors
),
class = "binary_file"
)
# read
size <- file.info(filepath)$size
con <- file(filepath, "rb")
bfile$raw <- readBin(con, raw(), n = size)
bfile$max_pos <- length(bfile$raw)
close(con)
# find C_blocks
bfile$C_blocks <- find_C_blocks(bfile$raw)
return(bfile)
}
# get captured data from binary file
get_captured_data <- function(bfile, id) {
return(bfile$data[[id]])
}
# reset binary file navigation
reset_binary_file_navigation <- function(bfile) {
bfile$pos <- 1L
bfile$max_pos <- length(bfile$raw)
return(bfile)
}
# set an error prefix for the operations that fellow
set_binary_file_error_prefix <- function(bfile, prefix = "") {
bfile$error_prefix <- if (nchar(prefix) >0) str_c(prefix, " - ") else ""
return(bfile)
}
# throw binary file error if error mode is active
op_error <- function(bfile, msg) {
stop(sprintf("%s%s (pos %.0f)", bfile$error_prefix, msg, bfile$pos), call. = FALSE)
}
# Navigation ======
# skip nbyte number of bytes in the raw data stream
skip_pos <- function(bfile, nbyte) {
move_to_pos(bfile, bfile$pos + nbyte)
}
# move to position
move_to_pos <- function(bfile, pos, reset_cap = FALSE) {
if (reset_cap) bfile$max_pos <- length(bfile$raw)
if (pos > bfile$max_pos) {
op_error(
bfile, sprintf("cannot move to position %.0f as it exceeds position max set at %.0f",
pos, bfile$max_pos))
}
bfile$pos <- as.integer(pos)
return(bfile)
}
# cap at position
cap_at_pos <- function(bfile, pos) {
if(is.null(pos)) stop("cannot cap at position NULL", call. = FALSE)
if (pos < bfile$pos) {
op_error(
bfile, sprintf("cannot cap at position %.0f as it is smaller than current position %.0f",
pos, bfile$pos))
}
bfile$max_pos <- as.integer(pos)
return(bfile)
}
# fetch a specific C_block
# @param C_block name of the block
# @param min_pos minimum position where to find the block
# @param occurence which occurence to find? (use -1 for last occurence, use NULL for all)
# @FIXME: testing
fetch_C_block <- function(bfile, C_block, min_pos = 1, occurence = NULL, regexp_match = FALSE) {
# basic checks
if (!is(bfile, "binary_file")) stop("need binary file object", call. = FALSE)
if (nrow(bfile$C_blocks) == 0) stop("no C_blocks available", call. = FALSE)
if (missing(C_block) || !is.character(C_block)) stop("C_block name not provided", call. = FALSE)
# find C_blocks
if (regexp_match)
C_blocks <- filter(bfile$C_blocks, str_detect(.data$block, C_block), .data$start >= min_pos)
else
C_blocks <- filter(bfile$C_blocks, .data$block == C_block, .data$start >= min_pos)
if (nrow(C_blocks) == 0) {
op_error(bfile, sprintf("block '%s' not found after position %.0f", C_block, min_pos))
}
if (!is.null(occurence) && occurence > 1 && nrow(C_blocks) < occurence) {
op_error(
bfile, sprintf("occurence %.0f of block '%s' not found (only %.0f occurences) after position %.0f",
occurence, C_block, nrow(C_blocks), min_pos))
}
# return right occurence
if (!is.null(occurence) && occurence == -1)
return(tail(C_blocks, 1))
else if (!is.null(occurence))
return(C_blocks[occurence, ])
else
return(C_blocks)
}
# moves position to the end of a specific C_block
# @inheritParams fetch_C_block
# @param reset_cap whether to reset the cap
# @FIXME: testing
# move_to_C_block(my_test$binary, "NO")
# move_to_C_block(my_test$binary, "CData", occurence = 2)
move_to_C_block <- function(bfile, C_block, min_pos = 1, occurence = 1, move_to_end = TRUE, reset_cap = TRUE, regexp_match = FALSE) {
# fetch right C block
cblock <- fetch_C_block(bfile, C_block, min_pos = min_pos, occurence = occurence, regexp_match = regexp_match)
if (is.null(cblock)) {
op_error(bfile, sprintf("cannot move to C block '%s'", C_block))
}
# reset position cap
if (reset_cap) bfile$max_pos <- length(bfile$raw)
new_pos <- if(move_to_end) cblock$end[1] + 1L else cblock$start[1]
return(move_to_pos(bfile, new_pos))
}
# move to next C block (does not reet cap by default)
move_to_next_C_block <- function(bfile, C_block, reset_cap = FALSE, regexp_match = FALSE) {
move_to_C_block(bfile, C_block, min_pos = bfile$pos, occurence = 1, reset_cap = reset_cap, regexp_match = regexp_match)
}
# cap valid position at the beginning of a specific C_block
cap_at_C_block <- function(bfile, C_block, min_pos = 1, occurence = 1, regexp_match = FALSE) {
# fetch right C block
cblock <- fetch_C_block(bfile, C_block, min_pos = min_pos, occurence = occurence, regexp_match = regexp_match)
if (is.null(cblock)) {
op_error(bfile, sprintf("cannot cap at C block '%s'", C_block))
}
return(cap_at_pos(bfile, cblock$start[1]))
}
# cap at next C block
cap_at_next_C_block <- function(bfile, C_block, regexp_match = FALSE) {
cap_at_C_block(bfile, C_block, min_pos = bfile$pos, occurence = 1, regexp_match = regexp_match)
}
# move to C block range
# Note: if need to specific occurence, use move_to_C_block, cap_at_C_block
move_to_C_block_range <- function(bfile, from_C_block, to_C_block, min_pos = 1){
# range move safety check (a bit redundant because also checked in
# move_to_pos and cap_at_pos but errors there are out of context)
from <- fetch_C_block(bfile, from_C_block, min_pos = min_pos)
to <- fetch_C_block(bfile, to_C_block, min_pos = min_pos)
if (!is.null(from) && !is.null(to) && to$start[1] < from$end[1]) {
op_error(bfile, sprintf("cannot move to Cblock range, first occurence of block '%s' before first block '%s'",
from_C_block, to_C_block))
}
# move to blocks
bfile %>%
move_to_C_block(from_C_block, min_pos = min_pos, reset_cap = TRUE) %>%
cap_at_C_block(to_C_block, min_pos = min_pos)
}
# move to next C block range
move_to_next_C_block_range <- function(bfile, from_C_block, to_C_block){
move_to_C_block_range(bfile, from_C_block, to_C_block, min_pos = bfile$pos)
}
# regular expression for control block
re_block <- function(block_name) {
block <- get_ctrl_blocks_config()[[block_name]]
if (is.null(block)) stop("encountered unknown block: ", block_name, call. = FALSE)
structure(
list(
label = sprintf("<%s>", block_name),
regexp = block$regexp,
size = block$size
),
class = "binary_regexp")
}
# regular expression for null characters
re_null <- function(n) {
structure(
list(
label = sprintf("<%.0fx00>", n),
# NOTE: this works both with paste0 and str_c
regexp = paste0("\\x00{", n, "}"),
size = n
),
class = "binary_regexp")
}
# regular expression for anything but null characters
re_not_null <- function(n) {
structure(
list(
label = sprintf("<x01-xff{%.0f}>", n),
# NOTE: this does not work on windows if it's str_c!
regexp = paste0("[\x01-\xff]{", n, "}"),
size = n
),
class = "binary_regexp")
}
# regular expression for control sequences (00-0f)
re_control <- function(raw) {
# can't easily assemble so it's hard copy for now
hex <- c(`00` = "\\x00", `01` = "\x01", `02` = "\x02", `03` = "\x03", `04` = "\x04", `05` = "\x05", `06` = "\x06", `07` = "\x07", `08` = "\x08", `09` = "\x09", `0a` = "\x0a", `0b` = "\x0b", `0c` = "\x0c", `0d` = "\x0d", `0e` = "\x0e", `0f` = "\x0f")
# return hex regexps of the control sequence
ctrls <- as.character(raw)
structure(
list(
label = sprintf("{%s}", str_c(ctrls, collapse = " ")),
regexp = hex[ctrls] %>% str_c(collapse = ""),
size = length(raw)
),
class = "binary_regexp")
}
# regular expression for text (unicode) elements
re_text <- function(text) {
# can't easily assemble the \x5a structure so using a hard copy
hex <- c(`20` = "\x20", `21` = "\x21", `22` = "\x22", `23` = "\x23", `24` = "\x24", `25` = "\x25", `26` = "\x26", `27` = "\x27", `28` = "\x28", `29` = "\x29", `2a` = "\x2a", `2b` = "\x2b", `2c` = "\x2c", `2d` = "\x2d", `2e` = "\x2e", `2f` = "\x2f", `30` = "\x30", `31` = "\x31", `32` = "\x32", `33` = "\x33", `34` = "\x34", `35` = "\x35", `36` = "\x36", `37` = "\x37", `38` = "\x38", `39` = "\x39", `3a` = "\x3a", `3b` = "\x3b", `3c` = "\x3c", `3d` = "\x3d", `3e` = "\x3e", `3f` = "\x3f", `40` = "\x40", `41` = "\x41", `42` = "\x42", `43` = "\x43", `44` = "\x44", `45` = "\x45", `46` = "\x46", `47` = "\x47", `48` = "\x48", `49` = "\x49", `4a` = "\x4a", `4b` = "\x4b", `4c` = "\x4c", `4d` = "\x4d", `4e` = "\x4e", `4f` = "\x4f", `50` = "\x50", `51` = "\x51", `52` = "\x52", `53` = "\x53", `54` = "\x54", `55` = "\x55", `56` = "\x56", `57` = "\x57", `58` = "\x58", `59` = "\x59", `5a` = "\x5a", `5b` = "\x5b", `5c` = "\x5c", `5d` = "\x5d", `5e` = "\x5e", `5f` = "\x5f", `60` = "\x60", `61` = "\x61", `62` = "\x62", `63` = "\x63", `64` = "\x64", `65` = "\x65", `66` = "\x66", `67` = "\x67", `68` = "\x68", `69` = "\x69", `6a` = "\x6a", `6b` = "\x6b", `6c` = "\x6c", `6d` = "\x6d", `6e` = "\x6e", `6f` = "\x6f", `70` = "\x70", `71` = "\x71", `72` = "\x72", `73` = "\x73", `74` = "\x74", `75` = "\x75", `76` = "\x76", `77` = "\x77", `78` = "\x78", `79` = "\x79", `7a` = "\x7a", `7b` = "\x7b", `7c` = "\x7c", `7d` = "\x7d", `7e` = "\x7e")
# return hex regexps of the text
structure(
list(
label = sprintf("{%s}", text),
regexp = hex[as.character(charToRaw(text))] %>% str_c("\\x00") %>% str_c(collapse = ""),
size = 2*nchar(text)
),
class = "binary_regexp")
}
# plain regexp (default size is an estimate)
# @param label changing to proper name to avoid character errors
re_direct <- function(regexp, label = "re-direct", size = length(charToRaw(regexp))) {
structure(
list(
label = sprintf("[%s]", label),
regexp = regexp,
size = size
),
class = "binary_regexp")
}
# helper funtion to combine regexps
combine_regexps <- function(regexps, collapse, var = "regexp") {
# WARNING WARNING WARNING
# do not change this
# on all platforms: map_chr messes up the regexps
# on windows: str_c instead of paste leads to regexp fail in grepRaw
# on unix: paste instead of str_c breaks concatenating a few specific regexps (e.g. re_combine(re_not_null(2), re_block("fef-x")))
if (.Platform$OS.type == "windows")
paste(sapply(regexps, `[[`, var), collapse = collapse)
else
stringr::str_c(sapply(regexps, `[[`, var), collapse = collapse)
}
# or regexp
re_or <- function(..., size = estimate_size()) {
regexps <- list(...)
if(!all(sapply(regexps, is, "binary_regexp"))) stop("needs binary regexps for re_or", call. = FALSE)
estimate_size <- function() sum(map_dbl(regexps, "size"))
structure(
list(
label = sprintf("(%s)", str_c(map_chr(regexps, "label"), collapse = "|")),
# NOTE: on windows, the following command with str_c instead of paste or map_chr instead of sapply strangly leads to the regexp not getting recognized anymore in grepRaw
regexp = paste0("((", combine_regexps(regexps, collapse = ")|("), "))"),
size = size
),
class = "binary_regexp")
}
# combine regular expression patterns
re_combine <- function(...) {
regexps <- list(...)
if (!all(sapply(regexps, is, "binary_regexp")))
stop("can only combine binary regexps, generate with re_x() functions", call. = FALSE)
structure(
list(
label = str_c(map_chr(regexps, "label"), collapse = ""),
size = sum(map_dbl(regexps, "size")),
regexp = combine_regexps(regexps, collapse = "")
),
class = "binary_regexp")
}
# find next occurence of supplied regular expression pattern
find_next_pattern <- function(bfile, ..., max_gap = NULL, value = FALSE, all = FALSE) {
if (!is(bfile, "binary_file")) stop("need binary file object", call. = FALSE)
regexps <- re_combine(...)
pos <- grepRaw(regexps$regexp, bfile$raw, offset = bfile$pos, value = value, all = all)
if (length(pos) == 0) return(NULL) # return NULL if not found
else if (!is.null(max_gap) && !value && pos > bfile$pos + max_gap) return(NULL) # return NULL if outside max gap
else if (!value && all(pos > bfile$max_pos)) return (NULL) # return NULL if bigger than allowed
else return(pos)
}
# find the next occurences of a supplised regular expression pattern
find_next_patterns <- function(bfile, ...) {
positions <- find_next_pattern(bfile, ..., value = FALSE, all = TRUE)
positions <- positions[positions <= bfile$max_pos] # don't allow those that exceed the allowed positions
if (length(positions) == 0) return(NULL)
return(positions)
}
# move to next regular expression pattern
# @param ... construct with the re... functions
# @param max_gap maximum number of bytes until the pattern
# @param move_to_end whether to move to the end of the pattern (default is yes)
move_to_next_pattern <- function(bfile, ..., max_gap = NULL, move_to_end = TRUE) {
# safety check
if (!is(bfile, "binary_file")) stop("need binary file object", call. = FALSE)
if(!is.null(max_gap) && !is.numeric(max_gap)) stop("max gap must be a number", call. = FALSE)
# find pattern
pos <- find_next_pattern(bfile, ..., max_gap = max_gap)
# move to new position
if ( !is.null(pos) ) {
n <- if (move_to_end) length(find_next_pattern(bfile, ..., value = TRUE)) else 0
return(move_to_pos(bfile, pos + n))
}
# encountered a problem
regexps <- re_combine(...)
gap_text <- if (!is.null(max_gap)) sprintf(" after maximal gap of %.0f bytes", max_gap) else ""
op_error(
bfile,
sprintf("could not find '%s'%s in search interval %.0f to %.0f, found '%s...'",
regexps$label,
gap_text, bfile$pos, bfile$max_pos,
bfile %>%
map_binary_structure(length = regexps$size +
(if(!is.null(max_gap)) max_gap else 50) + 10) %>%
generate_binary_structure_map_printout()))
}
# cap at next regular expression pattern
# @param ... construct with the re... functions
# @param max_gap maximum number of bytes until the pattern
# @param move_to_end whether to move to the end of the pattern (default is yes)
cap_at_next_pattern <- function(bfile, ..., max_gap = NULL) {
# safety check
if (!is(bfile, "binary_file")) stop("need binary file object", call. = FALSE)
if(!is.null(max_gap) && !is.numeric(max_gap)) stop("max gap must be a number", call. = FALSE)
# find pattern
pos <- find_next_pattern(bfile, ..., max_gap = max_gap)
# cap at new position
if ( !is.null(pos) ) {
return(cap_at_pos(bfile, pos))
}
# encountered a problem
regexps <- re_combine(...)
gap_text <- if (!is.null(max_gap)) sprintf(" after maximal gap of %.0f bytes", max_gap) else ""
op_error(
bfile,
sprintf("could not find '%s'%s in search interval %.0f to %.0f, found '%s...'",
regexps$label,
gap_text, bfile$pos, bfile$max_pos,
bfile %>%
map_binary_structure(length = regexps$size +
(if(!is.null(max_gap)) max_gap else 50) + 10) %>%
generate_binary_structure_map_printout()))
}
# capture data block data in specified type
# uses parse_raw_data and therefore can handle multiple data types
# @inheritParams parse_raw_data
# note: consider renaming to capture_data_till_pattern
capture_data <- function(bfile, id, type, ..., data_bytes_min = 0, data_bytes_max = NULL,
move_past_dots = FALSE, re_size_exact = TRUE,
ignore_trailing_zeros = TRUE, exact_length = TRUE, sensible = NULL) {
# reset existing data in this field
bfile$data[[id]] <- NULL
# regexp
regexps <- re_combine(...)
# move to begining of target ... after the data
if (!is(bfile, "binary_file")) stop("need binary file object", call. = FALSE)
start <- bfile$pos
bfile$pos <- bfile$pos + data_bytes_min # offset until starting to look for next pattern
bfile <- move_to_next_pattern(bfile, regexps, max_gap = data_bytes_max, move_to_end = FALSE)
end <- bfile$pos - 1
# store data
if (end > start) {
if (length(type) == 1 && type[1] == "raw"){
# simplify for speed
bfile$data[[id]] <- bfile$raw[start:end]
} else {
id_text <- sprintf("'%s' capture failed at pos %d: ", id, start)
bfile$data[[id]] <-
parse_raw_data(bfile$raw[start:end], type,
ignore_trailing_zeros = ignore_trailing_zeros,
exact_length = exact_length, sensible = sensible,
errors = str_c(bfile$error_prefix, id_text))
}
}
# whether to move past the dots
if(move_past_dots) {
if (re_size_exact)
bfile <- move_to_pos(bfile, end + 1 + regexps$size)
else
bfile <- move_to_next_pattern(bfile, regexps, max_gap = 0)
}
return(bfile)
}
# capture specific number of data points following after the current position
# instead of capturing data until the next pattern, this captures a specific
# number of data points (i.e. primarily for numeric data), moves to after the data
# also uses parse_raw_data and therefore can handle multiple data types
# @inheritParams parse_raw_data
capture_n_data <- function(bfile, id, type, n, sensible = NULL) {
# reset existing data in this field
bfile$data[[id]] <- NULL
# find raw length
if (!is(bfile, "binary_file")) stop("need binary file object", call. = FALSE)
# data type safety check
if (length(missing <- setdiff(type, names(get_data_blocks_config()))) > 0)
stop("ecountered invalid data type(s): ", str_c(missing, collapse = ", "), call. = FALSE)
dbc <- get_data_blocks_config()[type]
size <- sum(map_int(dbc, "size"))
# store data
id_text <- sprintf("'%s' capture failed: ", id)
bfile$data[[id]] <-
parse_raw_data(bfile$raw[bfile$pos:(bfile$pos + size * n - 1)], type, n = n,
ignore_trailing_zeros = FALSE,
exact_length = FALSE, sensible = sensible,
errors = str_c(bfile$error_prefix, id_text))
# move to after the data
return(move_to_pos(bfile, bfile$pos + size * n))
}
# Keys ======
# find all isodat C_blocks in the binary (marked by the below regexp pattern followed by a string of ascii characters)
find_C_blocks <- function(raw) {
regexp <- paste0("\xff\xff(\\x00|[\x01-\x0f])\\x00.\\x00\x43[\x20-\x7e]+")
re_positions <- grepRaw(regexp, raw, all = TRUE, value = FALSE)
re_matches <- grepRaw(regexp, raw, all = TRUE, value = TRUE)
# values
lapply(re_matches, function(x) {
list(
id1 = as.character(readBin(x[3], "raw")),
id2 = as.character(readBin(x[5], "raw")),
block = str_c(readBin(x[7:length(x)], "character", n = length(x) - 7), collapse = "")
)
}) %>% bind_rows() %>%
# byte positions
mutate(
start = re_positions,
end = .data$start + nchar(.data$block) + 6 - 1
)
}
# Parse data =====
# configuration information for the control blocks
# good info at http://www.aboutmyip.com/AboutMyXApp/AsciiChart.jsp
# those with auto = TRUE are considered when making an automatic map_binary_structure
#
# @FIXME: testing
get_ctrl_blocks_config <- function() {
list(
# specific sequences
`del-nl` = list(size = 2L, auto = TRUE, regexp = "\x7f\x85"), # 7F 85 delete next line
`eop-nl` = list(size = 2L, auto = TRUE, regexp = "\xdc\x85"), # DC 85 end of proof?? next line
`0b-80` = list(size = 2L, auto = TRUE, regexp = "\x0b\x80"), # 0b 80 = vertical tab, divider in tables?
`e0-81` = list(size = 2L, auto = TRUE, regexp = "\xe0\x81"), # e0 81 = some sort of table divider?
`ce-80` = list(size = 2L, auto = TRUE, regexp = "\xce\x80"), # ce 80 = not sure what it means but common divider in tables
`ce-8a` = list(size = 2L, auto = TRUE, regexp = "\xce\x8a"), # ce 80 = not sure what it means but sometimes divider in tables
`ee-85` = list(size = 2L, auto = TRUE, regexp = "\xee\x85"), # ce 80 = not sure what it means but sometimes in arrays
`75-84` = list(size = 2L, auto = TRUE, regexp = "\x75\x84"), # 75 84 - no idea what it means but it's special somehow
`ff-80` = list(size = 2L, auto = TRUE, regexp = "\\x00\xff\x80\\x00"), # ff 80 - no idea what it means
`07-80-id` = list(size = 6L, auto = TRUE, regexp = "\x05\x80.\xff(\\x00|\x80|\xff){2}", # some sort of counter or id
replace = function(b) str_c("id-", str_c(readBin(b[c(3,5,6)], "raw", n=6), collapse="-"))),
`54-fc` = list(size = 4L, auto = TRUE, regexp = "\x54\xfc\\x00\\x00"), #another type of data start indicator
# FFF and EEE and combination blocks
nl = list(size = 4L, auto = TRUE, regexp = "\xff\xfe\xff\x0a"), # FF FE FF 0A new line
`fef-0` = list(size = 4L, auto = TRUE, regexp = "\xff\xfe\xff\\x00"), # meaning = ?
`fef-x` = list(size = 4L, auto = TRUE, regexp = "\xff\xfe\xff.", # meaning = ?
replace = function(b) str_c("fef-", readBin(b[4], "raw"))),
`eee-0` = list(size = 4L, auto = TRUE, regexp = "\xef\xef\xef\\x00."),
ffff = list(size = 4L, auto = TRUE, regexp = "\xff{4}"), # meaning = ?
# x000 blocks
stx = list(size = 4L, auto = TRUE, regexp = "\x02\\x00{3}"), # start of text
etx = list(size = 4L, auto = TRUE, regexp = "\x03\\x00{3}"), # end of text
`x-000` = list(size = 4L, auto = TRUE, regexp = "[\x01-\x1f]\\x00{3}", replace = # meaning = ? maybe 1-000 has special meaning?
function(b) str_c(str_replace(readBin(b[1], "raw"), "^0", ""), "-000")),
`f-000` = list(size = 4L, auto = TRUE, regexp = "\xff\\x00{3}"), # unclear
# c block (not auto processed because Cblocks are found separately)
`C-block` = list(size = 20L, auto = FALSE, regexp = "\xff\xff(\\x00|[\x01-\x0f])\\x00.\\x00\x43[\x20-\x7e]+"),
# text block (not auto processed) - note that this does NOT include non standard characters
latin = list(size = 20L, auto = FALSE, regexp = "([\x41-\x5a\x61-\x7a]\\x00)+"), #a-zA-Z
alpha = list(size = 20L, auto = FALSE, regexp = "([\x41-\x5a\x61-\x7a\x30-\x39]\\x00)+"), #a-zA-Z0-9
text = list(size = 20L, auto = FALSE, regexp = "([\x20-\x7e]\\x00)+"),
`text0` = list(size = 20L, auto = FALSE, regexp = "([\x20-\x7e]\\x00)*"), # allows no text
permil = list(size = 2L, auto = FALSE, regexp = "\x30\x20")
)
}
# helper function to get the control blokcs as a data frame
get_ctrl_blocks_config_df <- function() {
get_ctrl_blocks_config() %>%
{
tibble(
block = names(.),
regexp = map_chr(., "regexp"),
hexadecimal = map_chr(
.,
~.x$regexp %>% charToRaw() %>% as.character() %>% paste(collapse = " ")
)
)
}
}
# get configuration information for the data blocks
#
# @FIXME: testing
get_data_blocks_config <- function() {
list(
raw = list(type = "raw", auto = FALSE, size = 1L),
# extended unicode characters
text = list(type = "character", auto = TRUE, size = 2L, regexp = "[\x20-\xff]\\x00"),
# numeric data types
integer = list(type = "integer", auto = TRUE, size = 4L),
float = list(type = "numeric", auto = TRUE, size = 4L),
double = list(type = "numeric", auto = TRUE, size = 8L)
)
}
# parses binary data block
# can parse multiple type data blocks efficienctly, to do so provide multiple types (Details elow)
#
# @param raw the raw data, will be processed from position 1
# @param type data type(s), can be a single unit (e.g. "double") or a vector of sequential data types (e.g. c("float", "double")) see get_data_blocks_config() for details. Note that text cannot be combined with other data types.
# @param n how many instances to read (of EACH type if more than 1 supplied), default is to try to read the whole raw sequence
# @param ignore_trailing_zeros - whether to remove trailing zeros
# @param exact_length - whether to require exactly the right length
# @param sensible an expected value bracket that if not met will throw an error (will turn on error reporting if errors not already set)
# @param errors if set, triggers errors instead of returning NULL (use to set as custom error message used as error prefix)
# @return NULL if data problem encountered or throws error if sensible not met
# @FIXME: testing!!!
parse_raw_data <- function(raw, type, n = full_raw(), ignore_trailing_zeros = FALSE, exact_length = FALSE,
sensible = NULL, errors = NULL) {
# data type
if (length(missing <- setdiff(type, names(get_data_blocks_config()))) > 0)
stop("ecountered invalid data type(s): ", str_c(missing, collapse = ", "), call. = FALSE)
# check for text
if (any(type == "text") && length(type) > 1)
stop("data type 'text' cannot reliably be parsed jointly with other data types", call. = FALSE)
# errors
if (!is.null(sensible) && is.null(errors)) errors <- ""
error_prefix <- if(is.null(errors)) "" else errors
# total size
dbc <- get_data_blocks_config()[type]
size <- sum(map_int(dbc, "size"))
# take off trailing 00 (down to the size of the data)
raw_trim <- if (ignore_trailing_zeros) remove_trailing_zeros(raw, size) else raw
raw_trim_text <- str_c(c(as.character(head(raw_trim, 30)), if(length(raw_trim) > 30) "..."), collapse = " ")
# find full raw n
full_raw <- function() floor(length(raw_trim)/size)
# check that long enough for single read (other n = 0 and next errors don't make much sense)
if (length(raw_trim) < size) {
if (!is.null(errors))
stop(sprintf("%sraw data (%d bytes) not long enough for single read of requested data type (%.0f): %s",
error_prefix, length(raw_trim), size, raw_trim_text), call. = FALSE)
return(NULL)
}
# check that long enough to be possible fit
if (length(raw_trim) < size * n) {
if (!is.null(errors))
stop(sprintf("%sraw data (%d bytes) not long enough for requested byte read (%.0f): %s",
error_prefix, length(raw_trim), size*n, raw_trim_text), call. = FALSE)
return(NULL)
}
# if strict_length, make sure it is multiple of the data size
if (exact_length && length(raw_trim) != (size * n)) {
if (!is.null(errors))
stop(sprintf("%sraw data length (%d bytes) does not match the requested byte read (%.0f): %s",
error_prefix, length(raw_trim), size*n, raw_trim_text), call. = FALSE)
return(NULL)
}
# if text capture, check that it is truly all text
if (any(type == "text")) {
if (n == 0) {
stop(sprintf("%stext of length 0 does not make sense: %s", error_prefix, raw_trim_text), call. = FALSE)
} else if (n > 250) {
stop(sprintf("%stext with more than 250 characters (%.0f) is likely an error in finding the termination pattern: %s",
error_prefix, n, raw_trim_text), call. = FALSE)
}
regexp <- sprintf("(%s){%.0f}", dbc[[1]]$regexp, n)
if (length(grepRaw(regexp, raw_trim)) == 0) {
# something that is NOT text is in the raw_trim
non_text_pos <- grepRaw("([\x20-\xff][\x01-\xff])|(\\x00\\x00)", raw_trim)
actual_text <- intToUtf8(raw_trim[1:(non_text_pos - 1)])
if (!is.null(errors)) {
stop(
sprintf("%sexpected unicode data for %.0f bytes but found only %.0f ('%s'), non-text raw data afterwards: %s",
error_prefix, n*2, non_text_pos, actual_text,
raw_trim[(non_text_pos + 1):length(raw_trim)] %>% {
if (length(.) > 10) c(as.character(head(., 10)), sprintf("+%d more", length(.) - 10))
else as.character(.)
} %>% paste(collapse = " ")
),
call. = FALSE)
}
return(NULL)
}
}
# process data
type_bytes <- seq_along(dbc) %>% rep(times = map_int(dbc, "size")) %>% rep(times = n)
data <- list()
for (i in 1:length(dbc)) {
if (dbc[[i]]$type == "character") {
parsed_data <- raw[type_bytes == i] %>% intToUtf8()
} else {
parsed_data <- readBin(raw[type_bytes == i], what = dbc[[i]]$type, size = dbc[[i]]$size, n = n)
}
data <- c(data, list(parsed_data))
}
names(data) <- 1:length(dbc)
# sensible check
if (!is.null(errors) && !is.null(sensible)) {
# make sure supplied sensible information is in proper format
if (!is.list(sensible)) sensible <- list(sensible)
if (length(type) != length(sensible))
stop("for multiple data types, need to supply list of sensible data pairs", call. = FALSE)
# check all data
for (i in 1:length(type)) {
if (class(data[[i]]) != class(sensible[[i]]) &&
class(data[[i]]) != "integer" && class(sensible[[i]]) != "numeric" ) # allow integer to numeric comparison
stop(sprintf("%scannot compare data (%s) to expected values (%s), data type mismatch",
error_prefix, class(data[[i]]), class(sensible[[i]])), call. = FALSE)
if (is.character(sensible[[i]]) && !all(good_data <- str_detect(sensible[[i]], data[[i]])))
stop(sprintf("%sparsed data (%s, ...) does not match expected pattern (%s)",
error_prefix, str_c(head(data[[i]][!good_data]), collapse = ", "), sensible),
call. = FALSE)
else if (is.numeric(sensible[[i]]) && !all(good_data <- data[[i]] >= sensible[[i]][1] & data[[i]] <= sensible[[i]][2]))
stop(sprintf("%sparsed data (%s, ...) does not match expected value range (%s)",
error_prefix, str_c(signif(head(data[[i]][!good_data])), collapse = ", "),
str_c(sensible[[i]][1], " to ", sensible[[i]][2])),
call. = FALSE)
}
}
# return data
if (length(type) == 1) return(data[[1]]) # return single data
return(data) # return list
}
# @todo for testing
# take_off_trailing_00s(as.raw(c(2, 6, 2, 0, 3, 5, 0, 2, 1, 0, 0, 0, 0, 0, 0)), size = 1)
# take_off_trailing_00s(as.raw(c(2, 6, 2, 0, 3, 5, 0, 2, 1, 0, 0, 0, 0, 0, 0)), size = 2)
# take_off_trailing_00s(as.raw(c(2, 6, 2, 0, 3, 5, 0, 2, 1, 0, 0, 0, 0, 0, 0)), size = 4)
# take_off_trailing_00s(as.raw(c(2, 6, 2, 0, 3, 5, 0, 2, 1, 0, 0, 0, 0, 0, 0)), size = 8)
#
# removes trailing 00 but only up to making the residual data still a multiple of the data size (leave at least 1 multiple), if can't take enough off to make the data a multiple of the data size, don't cut anything at all
# @param raw the raw data
# @param size the size of a data unit
remove_trailing_zeros <- function(raw, size) {
# leave at least 1 multiple
if (length(raw) <= size) return(raw)
# find number of trailing 0s (i.e. everything after the highest byte that is not 00)
is_null_block <- raw == as.raw(0)
if (any(is_null_block == FALSE))
trailing_00s <- is_null_block %>% { length(.) - max(which(. == FALSE)) }
else
trailing_00s <- length(is_null_block)
# leave at least 1 data length
take_off_00s <- min(trailing_00s, length(raw) - size)
# size excess can always be taken off
size_excess <- length(raw) %% size
# see if enough 00s to take anything off
if (take_off_00s < size_excess)
take_off_00s <- 0
# how many data size lengths okay to take off?
if (take_off_00s > size_excess) {
n_size <- floor((take_off_00s - size_excess)/size)
take_off_00s <- n_size * size + size_excess
}
# return
raw[1:(length(raw) - take_off_00s)]
}
# Binary structure =======
# map binary structure starting at a specific Cblock (used mostly for error messages and debugging)
# @param C_block name of the Cblock to start the structure from
# @param length how many bytes to map
# @inheritParams fetch_C_block
# @inheritParams map_binary_structure
# @FIXME: testing
map_C_block_structure <- function(bfile, C_block, occurence = 1, length = 100, ctrl_blocks = get_ctrl_blocks_config()) {
cblock <- fetch_C_block(bfile, C_block, occurence = occurence)
map_binary_structure(bfile, length = length, start = cblock$start, ctrl_blocks = ctrl_blocks)
}
#' Map isodat file binary structure.
#'
#' Map out binary structure for easy visualization (used mostly for error messages and debugging). See the development vignette for details and example application.
#'
#' @param bfile the binary file, stored in each iso_file under \code{$binary} if (and only if) the file was read with \link{iso_turn_debug_on} activated before.
#' @param length how many bytes to map
#' @param start at which byte position to start mapping (index 1 based)
#' @param ctrl_blocks named list of block patterns with size, regexp and [optional] replace function
map_binary_structure <- function(bfile, length = 100, start = bfile$pos, ctrl_blocks = get_ctrl_blocks_config()) {
if(!is(bfile, "binary_file"))
stop("can only generate map for binary file object, passed in: ",
str_c(class(bfile), collapse = ", "), call. = FALSE)
# generate new block record
new_block <- function(start, length, type, rep_text = NA_character_) {
block <- list(
start = start, end = start + length - 1, type = type,
rep_text = rep_text, raw = bfile$raw[start:(start + length - 1)])
rlang::set_names(list(block), start)
}
# loop variables
blocks <- list()
data_buffer <- c()
pos <- last_block_end <- start
while (pos < (start+length)) {
# check for cblocks
if ( nrow(cblock <- filter(bfile$C_blocks, start == pos)) > 0 ) {
if (pos > last_block_end) {
blocks <- c(blocks, new_block(start = last_block_end, length = pos - last_block_end, type = "data"))
}
blocks <- c(blocks, new_block(start = pos, length = 0, type = "cblock", rep_text =
with(cblock, sprintf("C-%s-%s %s", id1, id2, block))))
pos <- last_block_end <- cblock$end + 1
next
}
# check for other control blocks
found_match <- FALSE
for (b in names(ctrl_blocks)) {
# lock for first regexp that matches exactly at the current position
if ( ctrl_blocks[[b]]$auto && length(b_pos <- grepRaw(ctrl_blocks[[b]]$regexp, bfile$raw, offset = pos)) > 0 && b_pos == pos) {
# data block
if (pos > last_block_end) {
blocks <- c(blocks, new_block(start = last_block_end, length = pos - last_block_end, type = "data"))
}
# ctrl block representative text replacement function
replace_text <- b
replace_function <- ctrl_blocks[[b]]$replace
if (!is.null(replace_function) && is.function(replace_function)) {
replace_text <- replace_function(bfile$raw[pos:(pos+ctrl_blocks[[b]]$size-1)])
}
# new control block
blocks <- c(blocks, new_block(start = pos, length = ctrl_blocks[[b]]$size, type = b, rep_text = replace_text))
# move position
pos <- last_block_end <- pos + ctrl_blocks[[b]]$size
found_match <- TRUE
break
}
}
# check match - if no match, advance by just 1
if (!found_match) pos <- pos + 1
}
# data blocks processing
data_block_configs <- get_data_blocks_config() %>% {.[map_lgl(., "auto")] }
data_block_types <- names(data_block_configs)
data_matches <- tibble(data_type = data_block_types, matches = FALSE,
trailing_zeros = 0, n_values = 0, rep_value = NA_character_,
min_value = NA_real_, max_value = NA_real_)
# loop through blocks to interpret data
if (length(blocks) == 0) # no structure blocks found
blocks <- new_block(start, length, "raw")
for (i in 1:length(blocks)) {
# only process if data blocks
bl <- blocks[[i]]
if (bl$type != "data") next
# check for all 0 blocks
if (all(bl$raw == as.raw(0))) {
# recode, this is not a data block
blocks[[i]]$type <- "null"
blocks[[i]]$rep_text <- str_c(length(bl$raw), "x00")
next
}
# set matches
blocks[[i]]$matches <- data_matches
blocks[[i]]$data <- list()
# search through data patterns
for (j in 1:length(data_block_types)) {
# parse data tblock
raw_trim <- remove_trailing_zeros(bl$raw, data_block_configs[[data_block_types[j]]]$size)
data <- parse_raw_data(raw_trim, data_block_types[j], exact_length = TRUE, errors = NULL)
if (is.null(data)) next
# store data
if (length(data) == 1) {
rep_value <- if(is.numeric(data)) str_c(signif(data, 8)) else data
} else {
rep_value <- str_c(signif(data[1:min(3, length(data))], 8), collapse = ",")
if (length(data) > 3) rep_value <- str_c(rep_value, ",... (", length(data), ")")
}
blocks[[i]]$data[[data_block_types[j]]] <- data
blocks[[i]]$matches[j, "matches"] <- TRUE
blocks[[i]]$matches[j, "n_values"] <- length(data)
blocks[[i]]$matches[j, "rep_value"] <- rep_value
blocks[[i]]$matches[j, "trailing_zeros"] <- length(bl$raw) - length(raw_trim)
}
}
return(structure(list(blocks = blocks), class="binary_structure_map"))
}
# generate binary structure map printout
# @inheritParams print.binary_structure_map
# @FIXME: testing
generate_binary_structure_map_printout <- function(bsm, data_as_raw = FALSE, line_break_blocks = c(), pos_info = FALSE) {
# indentation function
nl_indent <- function(nls, lvl, byte_start) {
mapply(function(nl, n, bs) {
n <- if(is.na(n)) 0 else n
if(!nl) ""
else
str_c(c(if (nl) "\n" else "",
if (pos_info) sprintf("%07d: ", bs),
rep(" ", n)), collapse = "")
}, nls, lvl, byte_start)
}
# get blocks
blocks <- lapply(bsm$blocks, function(block) {
block$raw <- str_c(block$raw, collapse = " ")
block[c("start", "type", "rep_text", "raw")]
}) %>% bind_rows()
# data overview
if (data_as_raw) {
# raw data
data_overview <-
lapply(bsm$blocks, function(block) {
if (block$type == "data")
tibble(
rep_value = sprintf("{%s}", str_c(as.character(block$raw), collapse = " ")),
start = block$start)
else NULL
}) %>% bind_rows()
} else {
# processed data
data_overview <-
lapply(bsm$blocks, function(block) {
if (block$type == "data") mutate(block$matches, start = block$start)
else NULL
}) %>% bind_rows() %>%
# only consider actual matches
filter(matches)
if (nrow(data_overview) > 0) {
data_overview <- data_overview %>%
# trailing zeros block
mutate(trailing00_block =
ifelse(.data$trailing_zeros > 0, str_c("<", .data$trailing_zeros, "x00>"), "")) %>%
group_by(.data$start) %>%
do({
# figure out what to do in case of text supplied together with other data types
data <- .
text_value <- if ("text" %in% data$data_type) filter(data, .data$data_type == "text") else NULL
if (!is.null(text_value) && nrow(data > 1) && nchar(text_value$rep_value) >= 4) {
# more than 4 chars in the text, chances are likely this is actually text
data <- text_value
}
# combine
data_text <- with(data, str_c("{", rep_value, "}", trailing00_block))
if (length(data_text) > 1) tibble(rep_value = str_c("{", str_c(data_text, collapse = "|"), "}"))
else tibble(rep_value = data_text)
})
}
}
# process blocks and block data for printing
if (nrow(data_overview) == 0) data_overview <- tibble(start = integer(0), rep_value = character(0))
all_blocks <- blocks %>% left_join(data_overview, by = "start")
# indentation
if (nrow(all_blocks) == 1)
all_blocks <- mutate(all_blocks, nl = TRUE, text_level = 0)
else
all_blocks <- all_blocks %>%
mutate(
nl = .data$start == min(.data$start) | .data$type %in% line_break_blocks | c("", .data$type[1:(n()-1)]) %in% line_break_blocks,
text_level = c(NA, (head(cumsum(.data$type == "stx"), -1) - tail(cumsum(.data$type == "etx"), -1))))
# calculate indentation level
all_blocks <- all_blocks %>%
mutate(indent = .data$text_level - min(.data$text_level, na.rm = TRUE))
# text blocks
all_blocks %>%
mutate(
rep_text = ifelse(is.na(.data$rep_text), str_c("<", .data$raw, ">"), str_c("<", .data$rep_text, ">")),
rep_value = ifelse(is.na(.data$rep_value), str_c("{", .data$raw, "}"), .data$rep_value),
block_text = ifelse(.data$type == "data", .data$rep_value, .data$rep_text),
indent_text = str_c(nl_indent(.data$nl, .data$indent, .data$start), .data$block_text)
) %>%
# combine text
{ str_c(.$indent_text, collapse = "") }
}
#' Print binary structure map
#' @param x object to show.
#' @param ... additional parameters passed to print.default
#' @param data_as_raw whether to show data as raw
#' @param line_break_blocks at which blocks to introduce a line break
#' @param pos_info whether to include position information
#' @export
print.binary_structure_map <- function(x, ..., data_as_raw = FALSE, line_break_blocks = c("cblock", "stx", "etx"), pos_info = TRUE) {
cat("# Binary data structure: ", generate_binary_structure_map_printout(x, data_as_raw, line_break_blocks, pos_info))
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.