Nothing
# nocov start
#' Extract assignments from a SAS FORMAT definition
#'
#' This is modelled entirely on a single chunk of SAS code, but hopefully can be
#' generalised. It relies heavily on lists and regular expression, but, as you
#' will see from the code, R is not a great language with which to write a SAS
#' parser.
#' @param sas_lines is a character vector, with one item per line, e.g. from
#' \code{readLines}
#' @references
# nolint start
#' \url{http://support.sas.com/documentation/cdl/en/proc/61895/HTML/default/viewer.htm#a002473474.htm}
# nolint end
#' @return list (of lists)
#' @keywords programming list internal
#' @noRd
sas_format_extract <- function(sas_lines) {
# collapse everything onto one big line, so we can filter multi-line
# commments. No ability to do multiline regex along a vector.
sas_lines <- paste(sas_lines, collapse = " \\n")
# sas comments are in the form /* ... */ inline/multiline, or * ... ;
sas_lines <- gsub(pattern = "/\\*.*?\\*/", replacement = "", x = sas_lines) # nolint
sas_lines <- gsub(pattern = "\\n\\*.*?;", replacement = "\\n", x = sas_lines) # nolint
sas_lines <- strsplit(sas_lines, split = "\\;")[[1]]
# strip white space and ?undetected newline characters, replace with single
# spaces.
sas_lines <- gsub(pattern = "\\\\n", "", sas_lines) # nolint
sas_lines <- gsub(pattern = "[[:space:]]+", " ", sas_lines)
sas_lines <- trimws(sas_lines)
# drop everything except VALUE statements
sas_lines <- grep(
pattern = "^VALUE.*", x = sas_lines, ignore.case = TRUE,
value = TRUE
)
# put each VALUE declaration in a vector element
sma1 <- .str_match_all(
string = sas_lines,
pattern =
"^V(?:ALUE|alue)[[:space:]]+([[:graph:]]+)[[:space:]]+(.+)[[:space:]]*$"
)
all_sas_assignments <- lapply(sma1, `[`, c(2, 3))
out <- list()
for (m in all_sas_assignments) {
out[m[[1]]] <- list(sas_parse_assignments(m[[2]]))
}
out
}
#' @describeIn sas_format_extract Get just the \code{$RCOMFMT} assignment, which
#' contains all the ICD (not DRG) data. The problem is \code{RENLFAIL} appears
#' twice:
#'
#' \code{"N183", "N184", "N185", "N186", "N189", "N19", "Z4901", "Z4902",
#' "Z9115", "Z940", "Z992"="RENLFAIL" /*Dependence on renal dialysis*/
#'
#' "Z4931", "Z4932"="RENLFAIL" /*Encounter for adequacy testing for
#' peritoneal dialysis*/ }
#'
#' so \code{RENLFAIL} needs special treatment
#' @keywords internal
#' @noRd
sas_format_extract_rcomfmt <- function(sas_lines) {
# ignore DRG assignments
sas_format_extract(sas_lines)[["$RCOMFMT"]]
}
# ICD-10 SAS code seems to be literal with all possible (ICD-10-CM for given
# year) cihldren listed. No ranges are specified (unlike the ICD-9 equivalentt)
sas_icd10_assignments_to_list <- function(x) {
x["NONE"] <- NULL
x[" "] <- NULL
x
}
#' Get assignments from a character string strings
#'
#' The format of assignments is best seen in the SAS source files.
#' @param x is a character string containing space delimited assignments, in SAS
#' declaration format.
#' @param strip_whitespace will strip all white space from the returned values
#' @param strip_quotes will strip all double quotation marks from the returned
#' values
#' @return list with each list item containing a matrix of "char ranges",
#' "assigned value" pairs
#' @keywords internal programming list
#' @noRd
sas_parse_assignments <- function(x, strip_whitespace = TRUE,
strip_quotes = TRUE) {
stopifnot(is.character(x), length(x) == 1)
assert_flag(strip_whitespace)
assert_flag(strip_quotes)
# splitting with clever regex to separate each pair of assignments seems
# tricky, so doing it in steps.
# n.b. this is a list with list per input row.
halfway <- as.list(unlist(
strsplit(x, split = "[[:space:]]*=[[:space:]]*")
))
# we need to match the first unquoted space to get the boundary between the
# previous definition and the next variable name
if (length(halfway) == 2) {
# we have just a single name value pair so just set name to value and return
# list of one item.
if (strip_whitespace) {
halfway <- gsub(
pattern = "[[:space:]]*",
replacement = "",
halfway
)
}
if (strip_quotes) halfway <- gsub(pattern = '"', replacement = "", halfway)
out <- list()
out[[halfway[[2]]]] <- unlist(strsplit(x = halfway[[1]], split = ","))
return(out)
}
mid_tmp <- unlist(
lapply(
.str_match_all(
halfway[seq(2, length(halfway) - 1)],
pattern = '^([^"]|"[^"]*")*? (.*)'
),
`[`, -1
)
)
threequarters <- c(
halfway[[1]],
mid_tmp,
halfway[[length(halfway)]]
)
if (strip_quotes) {
threequarters <- gsub(pattern = '"', replacement = "", threequarters)
}
# spaces may matter still, so don't randomly strip them?
out <- list()
for (pair in seq(from = 1, to = length(threequarters), by = 2)) {
if (strip_whitespace) {
outwhite <- gsub(
pattern = "[[:space:]]*",
replacement = "",
threequarters[pair]
)
} else {
outwhite <- threequarters[pair]
}
# combine here in case there are duplicate labels, e.g. RENLFAIL twice in
# ICD-10 AHRQ
out[[threequarters[pair + 1]]] <-
c(
out[[threequarters[pair + 1]]],
unlist(strsplit(x = outwhite, split = ","))
)
}
out
}
#' Extract quoted or unquoted SAS string definitions
#'
#' Finds all the LET statements in some SAS code and writes them to an R list.
#' The list item name is the SAS variable name, and each list item is a
#' character vector of the contents. This is specifically for string
#' assignments, but probably easy to adapter to numbers if ever needed.
#' @param x is a vector of character strings, typically taken from something
#' like \code{readLines(some_sas_file_path)}
#' @keywords internal programming list
#' @noRd
sas_extract_let_strings <- function(x) {
let_rex <-
"%LET ([[:alnum:]]+)[[:space:]]*=[[:space:]]*%STR\\(([[:print:]]+?)\\)"
a <- .str_match_all(x, let_rex)
a <- lapply(a, trimws)
a <- a[vapply(a, FUN = function(x) length(x) != 0, FUN.VALUE = logical(1))]
vls <- vapply(a, FUN = `[[`, 3, FUN.VALUE = "")
splt <- strsplit(vls, split = ",")
result <- lapply(splt, strip, pattern = "'") # strip single quotes
result <- lapply(result, strip, pattern = '"') # strip double quotes
names(result) <- vapply(a, FUN = function(x) x[[2]], FUN.VALUE = "")
result
}
# horrible kludge for difficult source data
sas_expand_range <- function(start, end) {
if (end == "0449") {
end <- start
} # HIV codes changed
# hmmm, maybe get the diff and test all children of ambigs present later
reals <- expand_range.icd9(start, end,
short_code = TRUE, defined = TRUE,
ex_ambig_start = FALSE, ex_ambig_end = TRUE
)
real_parents <- condense.icd9(reals, defined = TRUE, short_code = TRUE)
merged <- unique(c(reals, real_parents))
real_parents_of_merged <- condense.icd9(merged,
defined = TRUE,
short_code = TRUE
)
halfway <- children.icd9(real_parents_of_merged,
defined = FALSE,
short_code = TRUE
)
nonrealrange <- expand_range.icd9(start, end,
defined = FALSE,
short_code = TRUE,
ex_ambig_start = TRUE,
ex_ambig_end = TRUE
)
sort.icd9(unique(c(halfway, nonrealrange)), short_code = TRUE)
}
# nocov end
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.