R/redcasltx.R
In redcas: An Interface to the Computer Algebra System 'REDUCE'

Documented in asltx

## $Id: redcasltx.R,v 1.5 2024/11/05 15:42:44 mg Exp $

## Purpose: execute code which produces LaTeX output generated by TMPRINT and perform
##          certain actions:
##          pre-processing:
##          - handling toggling of FANCY switch - handled by reduce fn arrayltx
##          post-processing
##          - removal of TeXmacs specific markup
##          - reverting conversion of numeric suffix of identifiers to subscript (x_{n} to xn)
##          - translation of reduce identifiers to user-defined LaTeX math identifiers
##          - replacement of \left and \right with the empty string for defined levels of
##            nesting but at least for the innermost (improves readability of the code)
##          - enclosing results in a user-specified LaTeX math environment

## $Log: redcasltx.R,v $
## Revision 1.5  2024/11/05 15:42:44  mg
## Changed default notify to 0
##
## Revision 1.4  2024/11/04 11:46:42  mg
## removed commented debugging writeLines
##
## Revision 1.3  2024/10/03 14:18:37  mg
## Final statement was wrapped into a comment!
##
## Revision 1.2  2024/10/03 13:00:59  mg
## ltx.index now avoids double superscript which occur when an item with raised indices is
## raised to a power
##
## Revision 1.1  2024/07/10 16:43:44  mg
## Initial version
##

## NOTES

## 1. generic map for PSL is different to CSL, in particular the prefix and suffix. Can
##    probably just add them. 2024-04-01 DONE: they are the same when tmprint is
##    loaded. Now done via modified tmprint-psl.red

## 2. redStart now loads the reduce functions in inst/reduce/redcas.red

## 3. added map _{\([0-9]+\)} -> \1

## 4. removed mapping of begin/end as this is now done by reduce:arrayltx

## START: define variables
##
### NOTE!!! these must go in the package environment
##
## define general mapping applied to all output: Note that \left and \right are needed for
## automatic sizing of parentheses. fancy applies them to all parentheses and |x|, which
## is ott. Any removal should be done using the specific map.
## Notes:
## 1. this assumes output has been generated by the reduce procedure asltx
## 2. the order is important
## 3. the math begin and end commands must be done separately because the LF (\012)
##    goes in a different location for each.

.redcas.env[["ltx.from"]] <- c(
    "^([0-9]+: +)+$",                 # blank line
    "^([0-9]+: )*\002latex:\\\\black\\$\\\\displaystyle +",   # line prefix
    "\\$\005$",                       # line suffix
    ":=",                             # reduce assignment
    "~:?=~",                          # reduce assignment
    "_\\{([0-9]+)\\}",                # unwanted conversion of suffix to index
    "\\\\mathrm\\{\\}",               # empty mathrm environment
    "\\\\mathrm\\{([^}]+)\\}",        # remove remaining mathrm
    ## ":(begin):([^:]+):",           # math environment start
    ## ":(end):([^:]+):",             # math environment end
    ## remove trailing space from upper case Greek letters
    '\\\\Gamma ',
    '\\\\Delta ',
    '\\\\Theta ',
    '\\\\Kappa ',
    '\\\\Lambda ',
    '\\\\Xi ',
    '\\\\Pi ',
    '\\\\Sigma ',
    '\\\\Tau ',
    '\\\\Upsilon ',
    '\\\\Phi ',
    '\\\\Chi ',
    '\\\\Psi ',
    '\\\\Omega ',
    ## replace mathit with mathrm for common Greek/Latin upper case
    'mathit\\{A\\}',
    'mathit\\{B\\}',
    'mathit\\{E\\}',
    'mathit\\{Z\\}',
    'mathit\\{H\\}',
    'mathit\\{I\\}',
    'mathit\\{M\\}',
    'mathit\\{N\\}',
    'mathit\\{O\\}',
    'mathit\\{P\\}'
)
.redcas.env[["ltx.to"]] <- c(
    "",                         # blank line
    "",                         # line prefix
    "",                         # line suffix             
    "=",                        # reduce assignment       
    "=",                        # reduce assignment       
    "\\1",                      # unwanted conversion of suffix to index
    "",                         # empty mathrm environment
    "\\1",                      # remove remaining mathrm 
    ## "\\\\\\1{\\2}\12",       # math environment start  
    ## "\12\\\\\\1{\\2}",       # math environment end    
    ## remove trailing space from upper case Greek letters
    '\\\\Gamma',
    '\\\\Delta',
    '\\\\Theta',
    '\\\\Kappa',
    '\\\\Lambda',
    '\\\\Xi',
    '\\\\Pi',
    '\\\\Sigma',
    '\\\\Tau',
    '\\\\Upsilon',
    '\\\\Phi',
    '\\\\Chi',
    '\\\\Psi',
    '\\\\Omega',
    ## replace mathit with mathrm for common Greek/Latin upper case
    'mathrm{A}',
    'mathrm{B}',
    'mathrm{E}',
    'mathrm{Z}',
    'mathrm{H}',
    'mathrm{I}',
    'mathrm{M}',
    'mathrm{N}',
    'mathrm{O}',
    'mathrm{P}'
)

## END: define variables

##  Function: asltx
##   Purpose: display a reduce object (expression or array) using the fancy switch
## Arguments:
##        id: session id
##         x: object(s) to display
##   mathenv: math environment to wrap around expression or each element of array
##      mode: reduce output mode: nat or fancy
##    notify: number seconds after which user is notified that reduce commands still running
##   timeout: number seconds after which to terminate if still running
##     Value: a list of character vectors: tex, containing the LaTeX representation of the
##     input; out, the untranslated output; cmd, the command(s) and raw, the reduce
##     transcript

asltx <- function(id, x, usermap, mathenv="", mode="fancy", notify=0, timeout=0, debug=FALSE) {
    ## check inputs

    ## insert call to reduce asltx procedure
    x <- paste0("asltx(",
                x, ",",
                dQuote(mathenv, q='"'), ",",
                dQuote(mode, q='"'), ",",
                dQuote(x, q='"'),
                ");")
    
    ## call redExec
    ltx.ls <- redExec(id, x, split=TRUE, drop.blank.lines=TRUE, notify=notify, timeout=timeout) ;
    
    ## post-process the output
    ## user mapping and index assignment are optional. Determine whether we need to do them
    do.spec <- FALSE
    do.index <- FALSE
    if (!missing(usermap)) {
        if (is.element("ident",names(usermap))) do.spec <- is.character(usermap[["ident"]])
        if (is.element("index",names(usermap))) do.index <- is.character(usermap[["index"]])
    }

    ## remove some things that we don't need. These are generally related to TeXmacs but
    ## could be more generic.
    ltx.tex <- ltx.map.gen(ltx.ls$out, .redcas.env[["ltx.from"]], .redcas.env[["ltx.to"]])

    ## map the reduce identifiers to LaTeX identifiers
    if (do.spec) {
        ltx.tex <- ltx.map.spec(ltx.tex, usermap$ident)
    }

    ## Convert arguments to indices, e.g.  (i,j) -> _i^j
    if (do.index) {
        ltx.tex <- ltx.index(ltx.tex, usermap$index, debug = debug)
    }

    ## return the results
    return(list(tex=ltx.tex, out=ltx.ls$out, cmd=ltx.ls$cmd, raw=ltx.ls$raw))
}

## Function to apply the generic mappings. 
## Args:
##     x: character vector containing the LaTeX source
##  from: a character vector specifying the search terms
##    to: a character vector specifying replacement terms
## Value: character vector of same length as x with mapping applied
ltx.map.gen <- function(x, from, to) {
    for (i in 1:length(from)) {
        x <- gsub(from[i], to[i], x)
    }
    return(x)
}

## Function to apply the specific mappings. Here we need to use the word separator (\b) to
## avoid mapping more than intended. E.g. if we have a map x -> y, without word separator
## this would also map xs -> ys. However, if a string is not a variable name but should be
## mapped, e.g. "=" to "&=", \b should not be used.
## Args:
##     x: character vector containing the LaTeX source
##   map: a named character vector. The name is the search term (reduce identifier name) and the
##        value is the replacement (LaTeX identifier name or command).
## debug: print the (modified) search terms and the replacement terms
## Value: character vector of same length as x with mapping applied
ltx.map.spec <- function(x, map, debug = FALSE) {
    from <- names(map)
    from <- paste0(ifelse(grepl("[[:alnum:]_]", substr(from,1,1), perl = TRUE),
                          "\\b",
                          ""),
                   from,
                   ifelse(grepl("[[:alnum:]_]", substr(from,nchar(from),nchar(from)), perl = TRUE),
                          "\\b",
                          ""))
    to <- map
    for (i in 1:length(from)) {
        x <- gsub(from[i], to[i], x)
    }
    if (debug) message(sprintf("%s : %s", from, to))
    return(x)
}

## function to transform arguments to indices. By nature, this is only ever
## specific. Since domain is range of ltx.map.id the variable name, this must be applied
## after ltx.map.spce.
## Args:
##     x: character vector containing the LaTeX source
##   map: a named character vector. The name is the search term (identifier name) and the
##        value is the replacement (appropriate combination of "_" and "^").
## debug: print the actual search and replacement terms used
## Value: character vector of same length as x with mapping applied
ltx.index <- function(x, map, debug = FALSE) {
    op.names <- names(map)
    for (i in 1:length(op.names)) {
        src <- paste0(ifelse(substr(op.names[i],1,1) == "\\","","\\b"), op.names[i])
        tgt <- op.names[i]
        for (j in 1:nchar(map[i])) {
            ## src must be followed by (a1, ... aN) and the parenthesis might be modified
            ## by \left or \right. Since a1 etc could be LaTeX commands (e.g. Greek
            ## letters) we need an optional \ as the first character
            src <- paste0(src, ifelse(j == 1, "(?:\\\\left)?\\(", ","), # ?: omits from count
                          "(\\\\?[0-9a-zA-Z]+)",
                          ifelse(j == nchar(map[i]), "(?:\\\\right)?\\)", ""))
            ## the indices must be appended to the tgt name. LaTeX complains if there are
            ## more than two raised (^) or lowered (_) so the following puts consecutive
            ## raised or lowered indices within one set of braces.
            if (j == 1) {
                tgt <- paste0(tgt, substr(map[i], j, j), "\\{\\", j)
            } else {
                if (substr(map[i], j, j)  == substr(map[i], j-1, j-1)) {
                    tgt <- paste0(tgt, "\\", j)
                } else {
                    tgt <- paste0(tgt, "\\}", substr(map[i], j, j), "\\{\\", j)
                }
            }
            if (j == nchar(map[i])) tgt <- paste0(tgt, "\\}")
        }
        if (debug) message(sprintf("%s : %s\n%s\\\\^", src, tgt, src))
        ## Now we can apply the mapping. There is one more complication: if a variable
        ## with any raised index is also raised to a power, we need to enclose the variable
        ## in {} as otherwise TeX will complain about double superscripts.
        if (grepl("^", map[i])) {
            if (any(grepl(paste0(src,"\\^"), x))) {
                x <- gsub(paste0(src, "\\^"), paste0("\\{",tgt, "\\}^"), x, perl = TRUE)
            } 
        }
        ## terms w/o superscript or with superscript and no following exponent. Since
        ## these may occur on lines with items with both raised index and exponent,
        ## this must be unconditional and not an else clause.
        x <- gsub(src, tgt, x, perl = TRUE)
    }
    return(x)
}