Nothing
apply_type_funs <- function
### Convert columns of match.mat using corresponding functions from
### fun.list, then handle any duplicate capture group names.
(match.mat,
### Character matrix (matches X groups).
fun.list
### Named list of functions to apply to captured groups. If there are
### any duplicate names, they must be in alternatives (only one match
### per unique group name, otherwise error).
){
stopifnot(is.character(match.mat))
stopifnot(is.matrix(match.mat))
if(length(fun.list) < ncol(match.mat)){
stop(domain=NA, gettext("regex contains more groups than names; please remove literal groups (parentheses) from the regex pattern, and use named arguments in R code instead"))
}
colnames(match.mat) <- names(fun.list)
dt <- data.table(match.mat)
for(col.i in seq_along(fun.list)){
type.fun <- fun.list[[col.i]]
if(!identical(type.fun, identity)){
col.for.err <- paste0(col.i, "(", names(fun.list)[[col.i]], ")")
match.vec <- match.mat[, col.i]
tryCatch({
fun.result <- type.fun(match.vec)
}, error=function(e){
stop(domain=NA, gettextf("type conversion functions should take one argument (character vector of captured text) and return an atomic vector of the same size; function for group %s raised an error: %s", col.for.err, e$message))
})
if(!is.atomic(fun.result)){
stop(domain=NA, gettextf("each type conversion function must return an atomic vector, but function for group %s did not", col.for.err))
}
if(length(fun.result) != nrow(match.mat)){
stop(domain=NA, gettextf("type conversion function for group %s returned vector of length %s but expected length %s", col.for.err, length(fun.result), nrow(match.mat)))
}
if(any(is.na(match.vec) & !is.na(fun.result))){
stop(domain=NA, gettextf("a non-match(NA) was converted to a match(non-NA) by the conversion function in group %s; please fix conversion function", col.for.err))
}
set(dt, j=col.i, value=fun.result)
}
}
## handle duplicates (error or delete columns).
is.match <- !is.na(match.mat) & match.mat!=""
dt.type.vec <- sapply(dt, typeof)
name.tab <- table(names(fun.list))
dup.name.vec <- names(name.tab)[1 < name.tab]
remove.col.list <- list()
for(dup.name in dup.name.vec){
dup.col.indices <- which(names(fun.list)==dup.name)
dup.type.tab <- table(dt.type.vec[dup.col.indices])
if(1 < length(dup.type.tab)){
stop(domain=NA, gettextf("capture groups with identical names should have conversion functions that all return the same type; problem group name=%s has types %s", dup.name, paste(names(dup.type.tab), collapse = ",")))
}
is.match.name <- is.match[, dup.col.indices, drop=FALSE]
if(any(1 < rowSums(is.match.name))){
stop(domain=NA, gettextf("duplicate capture group names are only allowed in alternatives, problem: %s", dup.name))
}
alt.i.vec <- as.integer(apply(is.match.name, 1, which))
orig.i.vec <- dup.col.indices[alt.i.vec]
## Columns for alternatives other than the first will be removed.
remove.col.vec <- dup.col.indices[-1]
for(remove.col in remove.col.vec){
remove.i <- which(remove.col == orig.i.vec)
## Copy values captured in this alternative to the first
## alternative.
set(
dt, i=remove.i, j=dup.col.indices[[1]],
value=dt[[remove.col]][remove.i])
}
remove.col.list[[dup.name]] <- remove.col.vec
}
## Remove all alternatives other than the first.
if(length(remove.col.list))set(dt, j=unlist(remove.col.list), value=NULL)
dt
### data.table with columns defined by calling the functions in
### fun.list on the corresponding column of match.mat. Even if
### fun.list has duplicated names, the output data.table will have
### unique column names (identically named capture groups in
### alternatives will be combined into a single output column).
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.