R/Regex.R

Defines functions makeRE.notehead makeRE.grid makeRE.timeSignature makeRE.recip makeRE.dur makeRE.tertianPartition makeRE.harm makeRE.roman makeRE.figuredBass makeRE.harte makeRE.chord makeRE.tertian makeRE.diatonicPartition makeRE.signature makeRE.romanKey makeRE.key makeRE.alterations makeRE.pc makeRE.fraction makeRE.double makeRE.bhatk makeRE.solfa makeRE.interval makeRE.tonh makeRE.kern makeRE.tonalChroma makeRE.contours makeRE.qualities makeRE.accidentals makeRE.steps cREs escape escaper escapebraces orRE captureUniq captureRE `%!~i%` `%~m%` `%~n%` `%~i%` `%~l%` REapply regexFindMethod REparse REparser

Documented in captureRE captureUniq orRE REparse REparser

# Regex Parsing ----

#' @name REparser
#' @export
REparser <- function(res, parse.strict = TRUE, parse.exhaust = TRUE, parse.lead = FALSE, parse.rest = FALSE, toEnv = FALSE) {
    # makes a parser which strictly, exhaustively parses a string
    # into a sequence of regexes
    
    rlang::new_function(args = alist(str = ),
                        body = rlang::expr(REparse(str, !!!res, !!parse.strict, !!parse.exhaust, !!parse.lead, !!parse.rest, !!toEnv)))
}


#' Parse String Using Regular expressions
#' 
#' Takes an input string and parses it into a sequence of regular expressions.
#' 
#' If `exhaustive` is TRUE, the string must be exhaustively broken up by the matching regular expressions.
#' Superfluous (non-match) characters at the begginning, end, or in bettween matches, will result in 
#' all `NA` being returned.
#' 
#' ------------------------------------------->             NEEDS DOCUMENTATION             <-------------------------------------------
#' @name REparser
#' @export
REparse <- function(str, res, parse.strict = TRUE, parse.exhaust = TRUE,
                    parse.lead = FALSE, parse.rest = FALSE, reverse = FALSE, 
                    sep = NULL,
                    toEnv = FALSE) {
    res <- res[lengths(res) > 0]
    
    if (any(.names(res) == "")) .stop("In call to REparse, all arguments must be named.")
    
    ##
    matches <- list()
    complete <- !logical(length(str))
    lead <- NULL
    rest <- str
    
    sep <- setNames(if (is.null(sep)) character(length(res)) else lag(rep(sep, length.out = length(res)), fill = ''), names(res))
    
    if (reverse) res <- rev(res)
    
    for (re in names(res)) {
         rest <- stringr::str_remove(rest, pattern = paste0('^', sep[re]))
        
         locs <- stringr::str_locate(rest, res[[re]])
        
        
        
        hits <- !is.na(locs[ , 1]) 
        
        if (parse.exhaust) {
            hits <- if (!reverse) {
                hits & (locs[ , 1] == 1)
            } else {
                hits & (locs[ , 2] == nchar(rest))
            }
        }
        
        complete <- complete & hits
        # 
        if (!parse.exhaust && parse.lead && is.null(lead)) {
            # should only ever happen in first iteration
            lead <- if (!reverse) {
                stringr::str_sub(rest, 0, pmax(locs[ , 'start'] - 1L, 0L))
            } else {
                stringr::str_sub(rest, locs[ , 'end'] + 1L)
            }
        }
        
        
        matches[[re]] <- .ifelse(hits, stringr::str_sub(rest, locs[ , 'start'], locs[ , 'end']), NA_character_)
        
        rest[hits] <- if (!reverse) {
            stringr::str_sub(rest[hits], start = locs[hits, 'end'] + 1L) 
        } else {
            stringr::str_sub(rest[hits], end = locs[hits, 'start'] - 1L)
        }
    }
    
    if (parse.lead) matches <- c(list(Lead = lead), matches)
    if (parse.rest) matches <- c(matches, list(Rest = rest))
    if (parse.strict) matches <- lapply(matches, `[<-`, i = !complete, value = NA_character_)
    
    if (reverse) matches <- rev(matches)
    
    matches <- lapply(matches, `names<-`, value = NULL)
    
    if (toEnv) list2env(matches, parent.frame())

    output <- do.call('cbind', matches)
    
    if (toEnv) invisible(output) else output
    
}



# Regex dispatch ----








regexFindMethod <- function(str, regexes) {
    # this takes a str of character values and a string of REs
    # and returns a single interger value representing the 
    # index (of regexes) to dispatch.
    
    regexes <- getRE(regexes)
    
    Nmatches <- sapply(regexes, \(regex) sum(stringi::stri_detect_regex(str, regex), na.rm = TRUE))
    if (!any(Nmatches > 0L)) return(0L)
    
    #which function to dispatch
    Ncharmatches <- sapply(regexes[Nmatches > 0],
                           \(re) {
                               nchars <- nchar(stringi::stri_extract_first_regex(str, re))
                               nchars[is.na(nchars)] <- 0L
                               sum(nchars)
                           })
    which(Nmatches > 0L)[which.max(Ncharmatches)]
}


REapply <- function(str, regex, .func, inPlace = TRUE, ..., args = list(), outputClass = 'character') {
    if (!is.character(str)) .stop(call. = FALSE,
                                "Sorry, REapply can only apply to an x argument that is a character vector.")
    checks(str, xcharacter)
    checks(regex, xcharacter)
    checks(inPlace, xTF)
    
  
    regex <- getRE(regex)
    matches <- stringi::stri_extract_first_regex(str = str, pattern = regex)
    
    #
    hits <- !is.na(matches)
    
    args <- local({
      args <- c(list(matches), args, list(...))
      firstArgLength <- length(str)
      args[lengths(args) == firstArgLength] <- lapply(args[lengths(args) == firstArgLength], '[', i = hits)
      args
    })
    
    result <- vectorNA(length(str), outputClass)
    if (any(hits)) result[hits] <- do(.func, args)
    
    if (inPlace && outputClass == 'character') result <- stringi::stri_replace_first_fixed(str, matches, result)
    
    result

}












## %% Regex tools ----


#' Match strings against regular expression
#' 
#' The functions give you a concise way to search for regular expressions in `character` vectors.
#' They are "infix" functions, meaning you write the function between its two arguments:
#' `myvector %~% regex`.
#'  
#' 
#' @details 
#' 
#' Each version of the function returns a different type of information about regex matches (if any)
#' in the input vector:
#' 
#' + `%~l%`: returns `logical` (`TRUE`/`FALSE`) indicating where in `x` there are matches.
#' + `%~i%`: returns `integer` indicating the indices of matches in `x`.
#' + `%~n%`: returns `integer` indicating the number (count) of matches in each string.
#' + `%~m%`: returns `character` string of the matched string itself. Returns `NA`
#'   where there is no match.
#' 
#' The basic function (`%~%`) is the same as `%~l%`.
#' There is also a negative versions of the `l` and `i` functions: giving all
#' strings that *don't* match the given regular expression.
#' These are `%!~%`, `%!~l%`, and `%!~i%`.
#'
#' These functions are simply syntactic sugar for
#' existing `R` regular expression matching functions:
#' 
#' + `%~l%`: [base::grepl()]
#' + `%~i%`: [base::grep()]
#' + `%~n%`: [stringi::stri_count_regex()]
#' + `%~m%`: [stringi::stri_extract_first_regex()]
#' 
#' @section Multiple regexes:
#' 
#' If more than one regex is supplied,
#' `%~l%` and `%~i%` return the indices where *any* of the regexes match.
#' In the case of `%~n%`, each matching regex is counted separately, and they are all summed.
#' In the case of `%~m%`, all matches (if any) are pasted together, 
#' including multiple matches of the same string.
#' 
#' 
#' @param x ***A `character` vector to search in.***
#' 
#' Must be `character`.
#' 
#' @param regex ***One or more regular expressions.***
#' 
#' Must be `character`.
#' 
#' If more than one regex is supplied,
#' matches to *any* of the regexes are returned. (See "Multiple regexes" section.)
#'
#' @export
#' @name RegexFind
`%~l%` <- function(x, regex) {
  checks(x, xatomic)
  checks(regex, xcharacter & xminlength(1))
  regex <- escapebraces(regex)
  Reduce('|', lapply(regex, grepl, x = x))
}
#' @export
#' @rdname RegexFind
`%~i%` <- function(x, regex) {
  checks(x, xatomic)
  checks(regex, xcharacter & xminlength(1))
  regex <- escapebraces(regex)
  Reduce('union', lapply(regex, grep, x = x))
}
#' @export
#' @rdname RegexFind
`%~n%` <- function(x, regex) {
  checks(x, xatomic)
  checks(regex, xcharacter & xminlength(1))
  regex <- escapebraces(regex)
  Reduce('+', lapply(regex, stringi::stri_count_regex, str = x))
}
#' @export
#' @rdname RegexFind
`%~m%` <- function(x, regex) {
  checks(x, xatomic)
  checks(regex, xcharacter & xminlength(1))
  regex <- escapebraces(regex)
  
  do.call('.paste', c(list(na.if = all), lapply(regex, stringi::stri_extract_first_regex, str = x)))
}
#' @rdname RegexFind
#' @export
`%~%` <- `%~l%`
#' @rdname RegexFind
#' @export
`%!~%` <- Negate(`%~l%`)
#' @rdname RegexFind
#' @export
`%!~l%` <- Negate(`%~l%`)
#' @rdname RegexFind
#' @export
`%!~i%` <- function(x, pattern) which(!x %~l% x)



# regexParse <- function(str, ..., toEnv = TRUE) {
#     regexes <- list(...)
#     matches <- setNames(vector('list', length(regexes)), .names(regexes))
# 
#     for (i in seq_along(regexes)) { # loop because regexPop change str in place
#         matches[[i]] <- regexPop(str, regexes[[i]])
#     }
# 
#     if (toEnv) list2env(matches[.names(matches) != ""], envir = parent.frame())
# 
#     return(invisible(do.call('cbind', matches)))
# 
# 
# }


#################### Making Regexes ----



#' Making Regular Expressions
#' 
#' `humdrumR` includes some helpful functions for creating new regular expressions which work with the
#' [stringr] package.
#' 
#' `captureRE` will take a character vector and collapse it to a "capture group."
#' The `n` argument can be used to append a number tag, for instance `'*'` (zero or more) to the group.
#' I.e., `captureRE(c("a", "b", "c"), '*')` will output `"[abc]*"`.
#' 
#' `captureUniq` will make a similar capture group to `captureRE`, but with an expression
#' that makes sure that only 1 or more *of the same character* repeats.
#' For instance, `captureUniq(c('a', 'b','c'))` will return `"([abc])\\1*"`---this expression will match
#' `"aaa"` or `"bb"` but not `"aabb"`.
#' 
#' @name regexConstruction
#' @export
captureRE <- function(strs, n = '') {
    strs <- strs[order(nchar(strs), decreasing = TRUE)]
    
    if (length(strs) > 1) {
        multi <- nchar(strs) > 1L & !grepl('-', strs)
        strs <- orRE(orRE(strs[multi], 
                     if (any(!multi)) paste0('[', paste(collapse = '', strs[!multi]), ']')))
        
        # if (any(multi)) strs <- paste0('(?:', strs, ')')
    }
    
    escaper(paste0(strs, n))
}

#' @name regexConstruction
#' @export
captureUniq <- function(strs, zero = TRUE) {
# takes a RE capture group and makes it so it will only match one or more of the SAME string
    
    strs[nchar(strs) > 1L] <- paste0('(', strs[nchar(strs) > 1L], ')')
    strs <- paste0(strs, '+')
    
    strs <- paste(strs, collapse = '|')
    
    paste0('(', strs, ')', if (zero) '?')
    
}

#' @name regexConstruction
#' @export
orRE <- function(...) {
    res <- unlist(list(...))
    res <- res[lengths(res) > 0L]
    if (length(res) == 0L) return(NULL)
    if (length(res) == 1L) return(res[[1]])
    
    paste0('(', do.call('paste', c(as.list(res), sep = '|')), ')')
    
}

# captureUniq <- function(strs, zero = TRUE) paste0('(', captureRE(strs), if (zero) "?", ')\\1*') 

escapebraces <- function(re) {
  re[re %in% c('(', ')', '[', ']')] <- paste0('\\', re[re %in% c('(', ')', '[', ']')]) 
  re
}

escaper <- function(str) {
    stringr::str_replace_all(str, '\\[\\^', '[\\\\^')
    
}


escape <- function(str) {
  stringr::str_replace_all(str, "([+*?])", "\\\\\\1")
}




cREs <- function(REs, parse.exhaust = TRUE, sep = NULL) {
    if (length(REs) == 0) return('')
    # REs <- unlist(paste0('(?:', REs, ')'))
    REs <- unlist(REs)
    
    ## if the regexes use capture groups, we must increment groups in later expressions
    # for instance, if the first regex has (x)\\1 and the second has (.*)\\1,
    # we need to change the latter to (.*)\\2
    hasCapture <- stringr::str_detect(REs, '\\\\[1-9]')
    if (any(hasCapture)) {
        captures <-  stringr::str_extract_all(REs[hasCapture], '\\\\[1-9]')
        noCaptures <- stringr::str_split(REs[hasCapture], '\\\\[1-9]')
        captures <- lapply(captures, \(cap) as.integer(factor(cap)))
        
        shifts <- head(Reduce(function(a, b) a + max(b), captures, init = 0, accumulate = TRUE), length(captures))
        captures <- Map('+', captures, shifts)
        
        REs[hasCapture] <- Map(function(nocap, cap) {paste(c(rbind(nocap, c(paste0('\\', cap),''))), collapse = '')}, noCaptures, captures)
        
    }
    
    output <- if (!parse.exhaust && length(REs) > 1L) {
      Reduce(\(head, last) paste0(head, sep,  '(.*(', last, '))'), REs, right = TRUE )
    } else {
      do.call('.paste', c(as.list(unname(REs)), list(sep = if (is.null(sep)) "" else sep)))
      # paste(REs, collapse = if (is.null(sep)) "" else sep)
    }
    
    list(output)
    
}

#### REs for tonalIntervals ----

makeRE.steps <- function(step.labels = c('C', 'D', 'E', 'F', 'G', 'A', 'B'), step.signed = FALSE, ...)  {
    if (is.null(step.labels)) return('[1-9][0-9]*')
    
    if (step.signed) step.labels <- c(tolower(step.labels), toupper(step.labels))
    
    
        # captureUniq(step.labels, zero = FALSE)

    captureRE(step.labels) 


}

makeRE.accidentals <- function(sharp = '#', flat = '-', natural = 'n', doublesharp = NULL, doubleflat = NULL, ...) {
  
 if (flat %in% c('+', '?')) flat <- paste0('\\', flat)  
 if (sharp %in% c('+', '?')) sharp <- paste0('\\', sharp)  
  
 if (nchar(natural) > 1L)  natural <-  paste0('(', natural, ')') 
  
  paste0(natural, '?', captureUniq(c(sharp, flat, doublesharp, doubleflat)))
}

makeRE.qualities <- function(major = 'M', minor = 'm', perfect = 'P', augment = 'A', diminish = 'd', quality.required = TRUE, ...) {
    paste0('(', captureRE(c(perfect, major, minor), ''), '|', captureUniq(c(diminish, augment), zero = !quality.required), ')')
}

makeRE.contours <- function(octave.integer = TRUE, up = '\\^', down = 'v', ...) {
    if (octave.integer) '-?[0-9]+' else captureUniq(c(up, down))
}

makeRE.tonalChroma <- function(parts = c("step", "species", "octave"), qualities = FALSE, collapse = TRUE, 
                               parse.exhaust = TRUE,
                               sep = NULL, ..., regexname = 'tonalChroma'){
    REs <-  list(sign    = if ('sign' %in% parts)       '[-+]?',
                 step    = if ("step" %in% parts)       makeRE.steps(...),
                 species = if ("species" %in% parts) {if (qualities) makeRE.qualities(...) else makeRE.accidentals(...)},
                 octave  = if ("octave" %in% parts)    makeRE.contours(...)
                 )[parts]
    
    
    if (collapse) setNames(cREs(REs, sep = sep, parse.exhaust = parse.exhaust), regexname) else REs
    
    
}

makeRE.kern <- function(parts = c("step", "species"), 
                        step.labels = unlist(lapply(1:50, strrep, x = c('C', 'D', 'E', 'F', 'G', 'A', 'B'))),
                        step.signed = TRUE,
                        qualities = FALSE, 
                        octave.integer = FALSE,
                        
                        ...) {
    
    makeRE.tonalChroma(parts, step.labels = step.labels, 
                       step.signed = step.signed, 
                       qualities = qualities,
                       octave.integer = octave.integer, ..., regexname = 'kern')
    
}


makeRE.lilypond <- partialApply(makeRE.tonalChroma, step.labels = c('c', 'd', 'e', 'f', 'g', 'a', 'b'),
                                sharp = 'is', flat = 'es', regexname = 'lilypond', 
                                octave.integer = FALSE, up = "'", down = ",")

makeRE.helmholtz <- partialApply(makeRE.tonalChroma, step.labels = c('C', 'D', 'E', 'F', 'G', 'A', 'B'),
                                 step.signed = TRUE, regexname = 'helmholtz', 
                                 flat = 'b', octave.integer = FALSE, up = "'", down = ",")

makeRE.tonh <- function(parts = c('step', 'species', 'octave'), flat = 'es',
                        step.labels = c('C', 'D', 'E', 'F', 'G', 'A', 'B'), ...) {
  
  REs <- makeRE.tonalChroma(parts = parts, flat = flat, step.labels = step.labels, octave.integer = TRUE, collapse = FALSE)
  
  REs$step <- paste0('(', REs$step, '|[HS])')
  REs$species <- paste0('(', REs$species, '|s)')
  
  setNames(cREs(REs), 'tonh')
  
}

makeRE.sciPitch <- partialApply(makeRE.tonalChroma, octave.offset = 4L, octave.integer = TRUE, flat = 'b')

makeRE.interval <- function(parts = c("direction", "species", "step"), step.labels = 1:99, collapse = TRUE, qualities = TRUE, flat = 'b', ...) {
    REs <- makeRE.tonalChroma(parts[parts != 'direction'], collapse  = FALSE, qualities = qualities, step.labels = step.labels,
                              flat = flat, ..., regexname = 'interval')
    
    if ('direction' %in% parts) REs$direction <- '[+-]?'
    REs <- REs[parts]
    
    if (collapse) setNames(cREs(REs), 'interval') else REs
}


makeRE.degree <- partialApply(makeRE.tonalChroma, parts = c("step", "species", "octave"), step.labels = 1:7, octave.integer = TRUE,
                              flat = '-', sharp = '\\+', qualities = FALSE, regexname = 'degree', sep = c("", "/"))

makeRE.deg <- partialApply(makeRE.tonalChroma, parts = c( "octave", "step", "species"), step.labels = 1:7, 
                           octave.integer = FALSE, 
                           flat = '-', sharp = '\\+', qualities = FALSE, regexname = 'deg')

makeRE.solfa <- function(parts = c("octave", "step", "species"), octave.integer = FALSE, flat = '-', ..., collapse = TRUE) {
    
    REs <- makeRE.tonalChroma(parts[parts != "step"], octave.integer = octave.integer, ..., flat = flat, collapse = FALSE)
    
    if ("step" %in% parts) {
        REs$step <- "([sd][eoi]|[fl][eai]|[mt][eiy]|r[aei])"
        REs <- REs[parts]
    }
    
    if (collapse) setNames(cREs(REs), 'solfa') else REs
}

makeRE.solfg <- partialApply(makeRE.tonalChroma, parts = c('step', 'species', 'octave'),
                             step.labels = c('do', 're', 'mi', 'fa', 'so', 'la', 'ti'),
                             flat = '~b', doubleflat = '~bb', natural = '~n', sharp = '~d', doublesharp = '~dd',
                             octave.integer = TRUE, regexname = 'solfg')

makeRE.bhatk <- function(parts = c("step", "octave"), up = "'", down = ',', 
                         step.labels = c('S', 'r', 'R', 'G', 'g', 'M', 'm', 'P', 'D', 'd', 'N', 'n'),
                         octave.integer = FALSE, ..., collapse = TRUE) {
    
    REs <- makeRE.tonalChroma(parts = parts, step.labels = step.labels,
                              up = up, down = down,
                              octave.integer = octave.integer, 
                              ..., collapse = FALSE)
    

    if (collapse) setNames(cREs(REs), 'bhatk') else REs
}
#### REs for numbers ----


makeRE.double <- function(...) c(double = "[+-]?[0-9]+(\\.[0-9]+)?" )
makeRE.fraction <- function(sep = '/', ...) paste0("[1-9][0-9]*", sep, "[1-9][0-9]*")


makeRE.pc <- function(ten = 'A', eleven = 'B', ...) {
  captureRE(c(0:11, ten, eleven))
}

#### REs for diatonic sets ####

makeRE.alterations <- function(..., qualities = FALSE, sep = '') {
    # names(alteration.labels) <- gsub('augment', 'sharp', names(alteration.labels))
    # names(alteration.labels) <- gsub('diminish', 'flat', names(alteration.labels))

    
    makeRE <- partialApply(makeRE.tonalChroma,
                           parts = c("species", "step"), step.signed = FALSE, flat = 'b',
                           step.labels = c(1, 2, 3, 4, 5, 6, 7, 9, 10, 11, 12, 13),
                           regexname = 'alterations')
    if (sep != '') sep <- paste0(sep, '?')
    paste0('(', makeRE(..., qualities = qualities, quality.required = FALSE), sep, ')*')
}

makeRE.key <- function(..., parts = c("step", "species", "mode", "alterations"),
                       step.labels = c('C', 'D', 'E', 'F', 'G', 'A', 'B'), 
                       collapse = TRUE) {
    
    parts <- c("star", parts[parts %in% c("step", "species")], "colon", parts[parts %in% c("mode", "alterations")])
    if (collapse && "mode" %in% parts) {
        REs <- makeRE.tonalChroma(parts = parts[parts %in% c("step", "species")],
                                  step.labels = toupper(step.labels), qualities = FALSE,
                                  step.signed = FALSE, collapse = FALSE, 
                                  ...)
        res <- makeRE.tonalChroma(parts = parts[parts %in% c("step", "species")],
                                  step.labels = tolower(step.labels), qualities = FALSE,
                                  step.signed = FALSE, collapse = FALSE,
                                  ...)
        
        REs['colon'] <- res['colon'] <- ':?'
        REs['star'] <- res['star'] <- '\\*?'
        
        REs['mode'] <- captureRE(c('mix', 'lyd', 'ion'), n = '?')
        res['mode'] <- captureRE(c('phr', 'aeo', 'loc', 'dor'), n = '?')
        
        if ("alterations" %in% parts){
          REs["alterations"] <- makeRE.alterations(...) 
          res["alterations"] <- makeRE.alterations(...) 
        }
        
        majors <- cREs(REs[parts[parts %in% names(REs)]])
        minors <- cREs(res[parts[parts %in% names(REs)]])
        
        REs <- c(step =  cREs(c(majors, minors), sep = '|'))
    } else {
        REs <- makeRE.tonalChroma(parts = parts[parts %in% c("step", "species")],
                                  step.labels = step.labels, qualities = FALSE,
                                  step.signed = TRUE, collapse = FALSE,
                                  ...)
        REs['colon'] <-  ':?'
        REs['star']  <- '\\*?'
        REs['mode'] <- captureRE(c('mix', 'lyd', 'ion', 'phr', 'aeo', 'loc', 'dor'), n = '?')
        
        if ("alterations" %in% parts) REs["alterations"] <- makeRE.alterations(...)
    }
    
    
    
    
    REs <- REs[parts[parts %in% names(REs)]]
    
    if (collapse) setNames(cREs(REs), 'key') else REs
}

makeRE.romanKey <- function(..., collapse = TRUE, sep = '/') {

    numeral <- makeRE.key(step.labels = c('I', 'II', 'III', 'IV', 'V', 'VI', 'VII'),
                          parts = c('species', 'step', 'mode', 'alterations'),
                          collapse = TRUE, 
                          ...)
    
    
    REs <- list(head = paste0('(', numeral, ')'),
                rest = paste0('(', sep, '(', numeral, '))*'))
    
    if (collapse) setNames(cREs(REs), 'romanKey') else REs
    
}



makeRE.signature <- function(flat = '-', ...) {
    
    RE <- cREs(list(steps = '[A-Ga-g]',  accidentals = makeRE.accidentals(flat = flat, ...)))
    paste0('^\\*?k\\[(', RE, ')*\\]')
}

makeRE.diatonicPartition <- function(..., split = '/', mustPartition = FALSE) {
    
    key <- makeRE.key(...)
    romanNumeral <- makeRE.romanKey(...)
    
    re <- orRE(key, romanNumeral)
    
    paste0(romanNumeral, '(', split, re, ')', if (mustPartition) '+' else '*')
}


#### REs for tertian sets ####

makeRE.tertian <- function(..., major = 'M', minor = 'm', augment = '+', diminish = 'o', perfect = 'P', collapse = TRUE) {
    
    REs <- makeRE.tonalChroma(parts = c("step", "species"),
                              step.labels = '[A-G]', qualities = FALSE,
                              step.sign = FALSE, ...)
    
    REs$incomplete <- '[135]?'
    
    qualityRE <- captureRE(c(major, minor, augment, diminish, '.'))
    REs$quality <-  paste0('((', 
                           qualityRE, '{3}',  
                           captureRE(c(perfect, augment, diminish, '.')), 
                           qualityRE, 
                           '?)|(', 
                           qualityRE, '{1,3}))')
    
    REs$inversion <- '(/[1-7])?'
   
    
    REs <- REs[c("tonalChroma", "incomplete", "quality", "inversion")]
    
    if (collapse) setNames(cREs(REs), 'tertian') else REs
}

makeRE.chord <-  function(..., major = '[Mm]aj', minor = 'min', augment = 'aug', diminish = 'dim', 
                                bass.sep = '/', flat = 'b', 
                                collapse = TRUE) {
  REs <- makeRE.tonalChroma(parts = c("step", "species"), flat = flat,
                            step.labels = '[A-G]', qualities = FALSE,
                            step.sign = FALSE, ...)
  
  REs$quality <- captureRE(c(major, minor, augment, diminish), '?')
  
  REs$figurations <- makeRE.alterations(...)
  
  REs$bass <- paste0('(', bass.sep, REs$tonalChroma, ')?')
  
  REs <- REs[c("tonalChroma", "quality", "figurations", "bass")]
  
  
  
  if (collapse) setNames(cREs(REs), 'chord') else REs
  
}

makeRE.harte <- function(..., major = 'maj', minor = 'min', augment = 'aug', diminish = 'dim', 
                         bass.sep = '/', flat = '-', 
                         collapse = TRUE) {
  REs <- makeRE.tonalChroma(parts = c("step", "species"), flat = flat,
                            step.labels = '[A-G]', qualities = FALSE,
                            step.sign = FALSE, ...)
  
  REs$colon <- ':'
  
  
  triads <- c(major, minor, augment, diminish)
  sevenths <- paste0(c(major, minor, diminish, paste0('h', diminish), paste0(minor, major), ''),
                     '7')
  sixths <- paste0(c(major, minor), '6')
  ninths <- paste0(c(major, minor, ''), 9)
  sus <- c('sus2', 'sus4')
  
  quality <-captureRE(c(triads, sevenths,sixths, ninths, sus))
  
  figurations <- paste0('\\(', makeRE.alterations(..., step.labels = 1:13, flat = 'b', sep = ','), '\\)')
  
  REs$figqual <- paste0('(', quality, '|', figurations, ')')
  
  REs$bass <- paste0('(', bass.sep,
                     makeRE.tonalChroma(parts = c("species", "step"), flat = 'b',
                                                       step.labels = 1:13, qualities = FALSE,
                                                       step.sign = FALSE, ...), 
                     ')?')
  
  REs <- REs[c("tonalChroma", "colon", "figqual", "bass")]
  
  
  
  if (collapse) setNames(cREs(REs), 'harte') else REs
  
}

makeRE.figuredBass <- function(..., bass.sep = ' ', collapse = TRUE) {
  REs <- c(bass = unname(makeRE.kern(..., collapse = TRUE)),
           bass.sep = paste0(bass.sep, '?'),
           figurations = makeRE.alterations(...))
  
  if (collapse) setNames(cREs(REs), 'figuredBass') else REs
}

makeRE.roman <- function(..., diminish = 'o', augment = '+', sep = '/', collapse = TRUE) {
    augment <- paste0('[', augment, ']') # because "+" is a special character!
    
    REs <- list()
    REs$accidental <- makeRE.accidentals(...)
    
    upper <- paste0('(?=[IV]+', augment,  '?)', captureRE(c('I', 'II', 'III', 'IV', 'V', 'VI', 'VII')))
    lower <- paste0('(?=[iv]+', diminish, '?)', captureRE(c('i', 'ii', 'iii', 'iv', 'v', 'vi', 'vii')))
    REs$numeral <- orRE(upper, lower)
    
    
    REs$triadalt <- captureRE(c(diminish, augment), n = '?')

    
    REs$figurations <- makeRE.alterations(...)
    REs <- REs[c('accidental', 'numeral', 'triadalt', 'figurations')]
    
    
    key <- makeRE.key(step.labels = c('I', 'II', 'III', 'IV', 'V', 'VI', 'VII'),
                           parts = c('species', 'step', 'mode', 'alterations'),
                           collapse = TRUE, 
                           ...)
    
    REs$of <- paste0('(', sep, '(', key, '))*')
    
    if (collapse) setNames(cREs(REs), 'roman') else REs
}

makeRE.harm <- function(..., diminish = 'o', augment = '+', sep = '/', collapse = TRUE) {
  augment <- paste0('[', augment, ']') # because "+" is a special character!
  
  REs <- list()
  REs$accidental <- makeRE.accidentals(...)
  
  upper <- paste0('(?=[IV]+', augment,  '?)', captureRE(c('I', 'II', 'III', 'IV', 'V', 'VI', 'VII')))
  lower <- paste0('(?=[iv]+', diminish, '?)', captureRE(c('i', 'ii', 'iii', 'iv', 'v', 'vi', 'vii')))
  REs$numeral <- orRE(upper, lower)
  
  
  REs$triadalt <- captureRE(c(diminish, augment), n = '?')
  
  
  REs$figurations <- makeRE.alterations(diminish = 'D', augment = 'A', qualities = TRUE, ...)
  REs$inversion <- '[a-g]?'
  REs <- REs[c('accidental', 'numeral', 'triadalt', 'figurations', 'inversion')]
  
  
  key <- makeRE.key(step.labels = c('I', 'II', 'III', 'IV', 'V', 'VI', 'VII'),
                    parts = c('species', 'step', 'mode', 'alterations'),
                    collapse = TRUE, 
                    ...)
  
  REs$of <- paste0('(', sep, '(', key, '))*')
  
  if (collapse) setNames(cREs(REs), 'harm') else REs
}



makeRE.tertianPartition <- function(..., split = '/', mustPartition = FALSE) {
    
    roman <- makeRE.roman(...)
    
    
    key <- makeRE.diatonicPartition(..., split = split, mustPartition = FALSE)
    
    paste0(roman, '(', split, key, ')', if (!mustPartition) '?')
}



#### REs for durations ####

makeRE.dur <- function(..., sep.time = ':', sep.date = '/', sep.decimal = '\\.', collapse = TRUE) {
  number <- '[0-9]*'
  
  REs <- list(prefix = captureRE(c('~', '>', '<'), n = '?'),
              datetime = paste0('(', number, sep.date, '){0,3}',
                                '(', number, sep.time, '){0,2}',
                                number),
              decimal = paste0('(', sep.decimal, '[0-9]+)?'))
  
  if (collapse) setNames(cREs(REs), 'dur') else REs
  
  
}

makeRE.recip <- function(collapse = TRUE, fractions = TRUE, sep = '%', ...) {
  REs <- list(graceMark1 = '([Qq][^0-9q]*)?',
              recip = paste0('(', if (fractions) paste0('([1-9][0-9]*', sep, ')?'),
                             '[0-9]+\\.*|0{1,2}\\.*)'),
              graceMark2 = '([^qQ]*[Qq])?')
  if (collapse) setNames(cREs(REs), 'recip') else REs
}


makeRE.timeSignature <- function(..., sep = '/', collapse = TRUE) {
  REs <- list(star = '\\*?',
              em   = 'M',
              numerator = '[1-9][0-9]*([+][1-9][0-9]*)*',
              sep = sep,
              denominator = '[1-9][0-9]*'
  )
  
    if (collapse) setNames(cREs(REs), 'timeSignature') else REs
    
}

makeRE.grid <- function(..., sep = '', on = 'X', off = 'O') {
  RE <- paste0(on, '(', sep, off, ')*')
  
  setNames(RE, 'grid')
}

notehead.unicode <- data.frame(stringsAsFactors = FALSE,
                                Unicode = c('\U1D15C', '\U1D15D', '\U1D15E', '\U1D15F', 
                                            '\U1D160', '\U1D161', '\U1D162', '\U1D163', '\U1D164'),
                                Recip = c('0', '1', '2', '4', '8', '16', '32', '64', '128'))

makeRE.notehead <- function(..., sep = " \U2215", collapse = TRUE) {
  
  REs <- list(multiplies = '([1-9][0-9]*)?',
              value = captureRE(notehead.unicode$Unicode),
              divides = paste0('(', sep, '[1-9][0-9]*)?'),
              space = ' ',
              dots = paste0('(',  '\U1D16D\U2009',')*'))
  
  if (collapse) setNames(cREs(REs), 'notehead') else REs
}
Computational-Cognitive-Musicology-Lab/humdrumR documentation built on Oct. 22, 2024, 9:28 a.m.