R/compiler.R

# CONSTAES : Constant Aesthetics
# CHARAES : Character Aesthetics
GGPLOT2_TOKENS <- c("GGPLOT", "NAME", "CONSTAES",
                    "CHARAES", "THEME", "LAYER",
                    "BOOLEAN", "QUOTED", "UNIT",
                   "BOOLEANAES")
# SCALE "ScaleDiscrete" "Scale"         "ggproto"
# GEOM/STAT "LayerInstance" "Layer"         "ggproto"
# COORD "CoordCartesian" "Coord"          "ggproto"
# FACET "FacetGrid" "Facet"     "ggproto"
# labs ?
# POSITION  "PositionDodge" "Position"      "ggproto"
# THEME "theme" "gg"
GGPLOT2_LITERALS <- c() # needed?
GGPLOT2INVALIDTOKEN <- " <<INVALID_TOKEN_HERE>> "

# MAYBE-LATER don't know how to pass variables between yacc's production rules
ggbashenv <- new.env() # Note: This is a global variable.

ggregex <- list(
    plus_pipe  = "(\\+|\\|)\\s*",
    quoted     = paste0("('|\\\")",                      # start from a quote
                        "[a-zA-Z0-9\\._\\+\\-\\*\\/\\^ ]+",
                        "('|\\\")"),                      # end by a quote
    booleanaes = paste0("[a-zA-Z_][a-zA-Z_0-9\\.]\\s*=\\s*",
                        "(TRUE|FALSE|true|false|True|False)"),
    boolean    = "^(TRUE|FALSE|T|F|t|f|true|false|True|False)$",
    charaes    = paste0("[a-z]+=('|\\\").*?('|\\\")"),
    constaes   = "[a-z\\.]+=c\\([0-9\\.,\\)]+", # FIXME adhoc for binw=c(.1, .1)
    # Note: ggregex$constaes and t_CONSTAES rules are duplicated
    unit       = "[0-9\\.]+\\s*['\"]?(cm|in|inch|inches)['\"]?",
    data       = "data="
)

set_ggbashenv_warning <- function(){
    if (is.null(ggbashenv$show_amb_warn))
        ggbashenv$show_amb_warn <- TRUE
}

Ggplot2Lexer <-
    R6::R6Class(
        "Lexer",
        public = list(
            tokens = GGPLOT2_TOKENS,
            literals = GGPLOT2_LITERALS,
            #states = list(c('ggplot')),
            info = "not-used-now",
            # Note: t_(function) defines precedences implicitly
            t_GGPLOT = function(
            re = "^(g|gg|ggp|ggpl|ggplo|ggplot)\\s+[a-zA-Z_\\.][a-zA-Z_0-9\\.]*",
            t) {
                t$value <- gsub("^g(g|gp|gpl|gplo|gplot)?\\s*",
                                "ggplot2::ggplot(",
                                t$value)
                return(t)
            },
            t_CONSTAES = function(re="[a-z\\.]+\\s*=\\s*-*[0-9\\./\\*-\\+:]*[0-9]", t) {
                if (grepl("^group=", t$value)) {
                    t$type <- "NAME"
                    # aes(group=1)
                    return(t)
                }

                # last [0-9] is needed to interpret
                # size=7 + theme as "size=7" and "+ theme"
                return(t) # integers and floats
            },
            # I believe CONSTAES cannot contain +-*/^, because
            # gg iris + point Sepal.W Sepal.L size=4 + smooth colour="red"
            # will be interpreted as
            # LexToken(CHARAES,colour="blue" size=4 + smooth colour="red",1,33)
            # MAYBE LATER default arguments of functions cannot accept
            # global variables as defaults?
            # ggregex$charaes is falsely evaluated as empty string
            t_CHARAES = function(re="[a-z\\.]+\\s*=\\s*('|\\\").*?('|\\\")", t) {
                return(t)
            },
            t_NAME      = function(re="(\\\"|')?[\\.a-zA-Z0-9_\\(\\)\\-][a-zA-Z_0-9\\.,=\\(\\)\\-\\+\\/\\*]*(\\\"|')?(\\s*inches|\\s*inch|\\s*in|\\s*cm)?", t) {

                if (grepl(ggregex$data, t$value)) {
                    dbgmsg("  t_NAME: DATA ", t$value)
                    t$type <- "CONSTAES"
                } else if (grepl(ggregex$booleanaes, t$value)){
                    dbgmsg("  t_NAME: BOOLEANAES ", t$value)
                    t$type <- "BOOLEANAES"
                } else if (grepl(ggregex$constaes, t$value)) {
                    dbgmsg("  t_NAME: CONSTAES ", t$value)
                    t$type <- "CONSTAES"
                } else if (grepl(ggregex$boolean, t$value)) {
                    dbgmsg("  t_NAME: BOOLEAN ", t$value)
                    t$type <- "BOOLEAN"
                } else if (grepl(ggregex$unit, t$value)) {
                    dbgmsg("  t_NAME: UNIT ", t$value)
                    t$type <- "UNIT"
                    # ex. LexToken(UNIT,.20 cm,1,50)
                } else if (grepl(ggregex$quoted, t$value)) {
                    dbgmsg("  t_NAME: QUOTED ", t$value)
                    t$type <- "QUOTED"
                } else {
                    dbgmsg("  t_NAME: ", t$value)
                }
                return(t)
            },
            #t_LPAREN  = '\\(',
            #t_RPAREN  = '\\)',
            #t_COMMA = ',',
            # t_THEME = "(\\+|\\|)\\s*theme", # t_THEME is preferred to t_LAYER
            t_LAYER = function(re="##\\s*[a-z_2]+", t) {
                # "2" for bin2d
                # TODO missing geom handling here
                t$value <- gsub("##", "+", t$value)
                if (grepl("\\+\\s*theme", t$value)) {
                    t$type <- "THEME"
                    return(t)
                }
                partial <- gsub(paste0(ggregex$plus_pipe, "(geom_)?"),
                                "",
                                t$value)
                ggbashenv$const <- define_ggbash_constants()
                set_ggbashenv_warning()
                gv <- ggbashenv$const$geom_namev
                geom_sth <- gv[find_first_by_prefix(partial, gv,
                                                    ggbashenv$show_amb_warn)]

                t$value <- paste0(" + geom_", geom_sth)
                return(t)
            },
            t_ignore = " \t",
            t_newline = function(re="\\n+", t) {
                t$lexer$lineno <- t$lexer$lineno + nchar(t$value)
                return(NULL)
            },
            t_error = function(t) {
                cat(sprintf("Illegal character '%s'", t$value[1]))
                t$lexer$skip(1)
                return(t)
            }))

dbgmsg <- function(...) {
    if (exists("ggbash_debug"))
        message(...)
}

Ggplot2Parser <-
    R6::R6Class(
        "Parser",
        public = list(
            tokens = GGPLOT2_TOKENS,
            literals = GGPLOT2_LITERALS,
            # Parsing rules
            #precedence = list(),
            # dictionary of names
            names = new.env(hash = TRUE),
            # Note: ggproto contains '+' signs in LAYER tokens
            p_expression_func = function(
                    doc="expression : gg_init
                                    | gg_init aes_func
                                    | gg_init ggproto_list
                                    | gg_init aes_func ggproto_list", p) {
                dbgmsg("p_expression_func LAST")

                ggbashenv$dataset_name <-
                    gsub("ggplot2::ggplot\\(", "", p$get(2))

                ggbashenv$dataset <-
                    eval(as.symbol(ggbashenv$dataset_name),
                         envir = .GlobalEnv)
                ggbashenv$geom <- ""

                if (p$length() == 2) {
                    dbgmsg("GGPLOT only ")
                    p$set(1, paste0(p$get(2), ")"))
                } else if (p$length() == 3 && (! grepl("\\+", p$get(3)))[1] ) {
                    p$set(1,
                          paste0(p$get(2), ", ggplot2::aes(", p$get(3), ")"))
                } else if (p$length() == 3) {
                    p$set(1, paste0(p$get(2), ")", p$get(3)))
                } else {
                    p$set(1,
                          paste0(p$get(2),
                                 ", ggplot2::aes(", p$get(3), ")", p$get(4)))
                }
                },
            p_gg_init = function(doc="gg_init : GGPLOT", p) {

                p$lexer$instance$info <- "refed-in-gg-init" # FIXME do sth

                dbgmsg("p_gg_init: ", p$get(2))
                ggbashenv$dataset_name <-
                    gsub("ggplot2::ggplot\\(", "", p$get(2))
                ggbashenv$dataset <- tryCatch( {
                        eval(as.symbol(ggbashenv$dataset_name),
                             envir = .GlobalEnv)
                    }, error = function(err) {"error"} )
                ggbashenv$colv <- colnames(ggbashenv$dataset)

                for ( i in seq_along(ggbashenv$layer_coll))
                    dbgmsg("layer[", i, "]: ", ggbashenv$layer_coll[[i]])

                if (class(ggbashenv$dataset)[1] == "character") {
                    errinfo <-
                        list(id = "p_gg_init:dataset",
                             type = "No dataset found",
                             input = ggbashenv$dataset_name)
                    show_fixit_diagnostics(errinfo)
                    return(p$set(1, GGPLOT2INVALIDTOKEN))
                    # FIXME doesn't stop
                }

                ggbashenv$conf <-
                    list(aes = c(), non_aes = c(), geom_list = c())
                set_ggbashenv_warning()
                dbgmsg("  set dataset name: ", ggbashenv$dataset_name)

                p$set(1, p$get(2))
            },
            p_ggproto_list = function(doc="ggproto_list : ggproto
                                      | ggproto ggproto_list", p) {
                dbgmsg("p_ggproto_list: ", p$get(2))
                if (p$length() == 2)
                    p$set(1, p$get(2))
                else
                    p$set(1, paste0(p$get(2), p$get(3)))
                },
            p_ggproto_layer =
                function(doc =
                "ggproto : layer_init
                         | layer_init layer_aes_list
                         | layer_init layer_raw_aes
                         | layer_init layer_aes_list layer_raw_aes
                         | layer_init layer_raw_aes layer_aes_list", p) {
                dbgmsg("p_ggproto_layer: ", p$get(2), " NONTERMINAL")

                # ex: ggbashenv$geom is 'point'
                if (p$length() == 2) {
                    return(p$set(1, paste0(p$get(2), "()")))
                }

                # FIXME more general
                dbgmsg("  3rd is : ", p$get(3))
                raw_is_3rd <-
                    grepl("=([0-9\\.\\+\\-\\*\\/\\^]+|\\\"|')", p$get(3)) ||
                    grepl(ggregex$booleanaes, p$get(3)) ||
                    grepl(ggregex$constaes, p$get(3))

                if (grepl("\\.\\.", p$get(3))) {
                    dbgmsg("hit")       # FIXME ugly ..prop..
                    raw_is_3rd <- FALSE
                }

                if (raw_is_3rd) {
                    if (p$length() == 3) {
                        dbgmsg("    len==3 and raw_is_3rd: ",
                               p$get(2), " ", p$get(3))
                        p$set(1, paste0(p$get(2), "(", p$get(3)))
                    } else {
                        dbgmsg("    len!=3 and raw_is_3rd: ",
                               p$get(2), " ", p$get(3), " ", p$get(4))
                        p$set(1, paste0(p$get(2), "(", p$get(3),
                            ", ggplot2::aes(", p$get(4), ")"))
                    }
                } else {
                    if (p$length() == 3) {
                        dbgmsg("    len==3 and raw != 3rd: ",
                               p$get(2), " ", p$get(3))
                        p$set(1,
                            paste0(p$get(2),
                                "(ggplot2::aes(", p$get(3), ")"))
                    } else {
                        dbgmsg("    len==3 and raw != 3rd: ",
                               p$get(2), " ", p$get(3), " ", p$get(4))
                        p$set(1,
                            paste0(p$get(2),
                                "(ggplot2::aes(", p$get(3), ", ", p$get(4)))
                    }
                }
                },
            p_layer_init = function(doc="layer_init : LAYER", p) {
                # initialization
                dbgmsg("p_layer_init: ", p$get(2))
                dbgmsg("  ggbashenv$geom(before): ", ggbashenv$geom)
                prev <- gsub("\\s*(\\+|\\|)\\s*(geom_)?", "", p$get(2))
                gv <- ggbashenv$const$geom_namev
                ggbashenv$geom <-
                    gv[find_first_by_prefix(prev, gv, ggbashenv$show_amb_warn)]
                dbgmsg("  ggbashenv$geom(after): ", ggbashenv$geom)
                ggbashenv$conf$geom_list <-
                    c(ggbashenv$conf$geom_list, ggbashenv$geom)
                ggbashenv$aes_i <- 1

                # for column name search
                ggbashenv$i_layer <- ggbashenv$i_layer + 1

                p$set(1, paste0(" + ggplot2::geom_", prev))
            },
            p_layer_aes_list = function(
                doc="layer_aes_list : layer_aes
                                    | layer_aes layer_aes_list", p) {
                dbgmsg("p_layer_aes_list: ", p$get(2), " NONTERMINAL")

                if (p$length() == 2) {
                    p$set(1, paste0(p$get(2), ")"))
                } else {
                    p$set(1, paste0(p$get(2), ", ", p$get(3)))
                }
            },
            p_layer_aes = function(doc="layer_aes : NAME", p) {
                dbgmsg("p_layer_aes: ", p$get(2))

                # do column-name partial match
                single_quote <- "'"
                double_quote <- '"'

                # if data= supplied, use the column name space
                i <- ggbashenv$i_layer
                if (length(i) == 0 || length(ggbashenv$layer_coll) < i){ # invoke from testthat
                    i <- 1
                    ggbashenv$layer_coll <- list("NA", "NA", "NA", "NA", "NA", "NA", "NA")
                }
                if (ggbashenv$layer_coll[[i]][1] == "NA")
                    colnamev <- ggbashenv$colv # not supplied
                else
                    colnamev <- ggbashenv$layer_coll[[i]]

                must_aesv <- get_required_aes(ggbashenv$geom)
                all_aesv <- get_possible_aes(ggbashenv$geom)
                index <- ggbashenv$aes_i
                if (index < 1) {
                    return(p$set(1, paste0(p$get(2), ")")))
                    # error?
                }
                dummy_aesv <- c(rep("", index - 1), p$get(2))
                column_name <- parse_ggbash_aes(
                    index, dummy_aesv, must_aesv,
                    all_aesv, colnamev, ggbashenv$show_amb_warn)

                if (is.null(column_name)) {
                    errinfo <-
                        list(
                            id = "p_layer_aes:column_prefix",
                            type = "Column name not found",
                            input = p$get(2)
                        )
                    show_fixit_diagnostics(errinfo)
                    return(p$set(1, GGPLOT2INVALIDTOKEN))
                }

                if (! grepl("=", p$get(2)))
                    ggbashenv$aes_i <- ggbashenv$aes_i + 1
                ggbashenv$conf$aes <- c(ggbashenv$conf$aes, column_name)

                dbgmsg("  parsed: ", column_name)

                p$set(1, column_name)
            },
            p_layer_raw_aes = function(
                doc="layer_raw_aes : CHARAES
                                   | CONSTAES
                                   | BOOLEANAES
                                   | CHARAES layer_raw_aes
                                   | CONSTAES layer_raw_aes
                                   | BOOLEANAES layer_raw_aes", p) {
                dbgmsg("p_layer_raw_aes: ", p$get(2))

                if (grepl("data=", p$get(2))) {
                    if (p$length() == 2)
                        return(p$set(1, paste0(p$get(2), ")")))
                    else
                        return(p$set(1, paste0(p$get(2), ", ", p$get(3))))
                }

                all_aesv <- get_possible_aes(ggbashenv$geom)
                layer_params <- get_layer_params(ggbashenv$geom)
                all_rawv <- c(all_aesv, layer_params)
                all_rawv <- unique(all_rawv)
                colnamev <- ggbashenv$colv

                raw_aes <- parse_ggbash_non_aes(p$get(2), all_rawv, colnamev,
                                                ggbashenv$show_amb_warn)
                ggbashenv$conf$non_aes <- c(ggbashenv$conf$non_aes, raw_aes)


                if (is.null(raw_aes)) {
                    errinfo <- list(
                        id = "p_layer_raw_aes:partial_match",
                        type = "No such parameter for the geom",
                        input = p$get(2),
                        table = all_rawv
                    )
                    show_fixit_diagnostics(errinfo)
                    return(p$set(1, GGPLOT2INVALIDTOKEN))
                }

                if (p$length() == 2)
                    p$set(1, paste0(raw_aes, ")"))
                else
                    p$set(1, paste0(raw_aes, ", ", p$get(3)))
                },
            # p_position_func = function(doc="position_func : ", p) {
            #
            # },
            p_aes_func = function(doc="aes_func : NAME
                                                | CONSTAES aes_func
                                                | NAME aes_func", p) {
                dbgmsg("p_aes_func: ", p$get(2))

                colnamev <- ggbashenv$colv

                geom_tmp <- "point" # FIXME more general
                # FIXME defaultZproblem - z should not be removed
                # StatContour$required_aes has "z" as well as "x" and "y"
                # But how p_aes_func can know
                # the following geom is geom_contour()?
                # Currently adhoc fix in parse_ggbash_aes
                must_aesv <- get_required_aes(geom_tmp)
                all_aesv <- get_possible_aes(geom_tmp)

                column_name <-
                    parse_ggbash_aes(1, p$get(2), must_aesv,
                                    all_aesv, colnamev, ggbashenv$show_amb_warn)
                if (is.null(column_name)) {
                    input <- gsub("[a-z]+=", "", p$get(2))
                    errinfo <-
                        list(
                            id = "p_aes_func:prefix_match",
                            type = "No such column names\n",
                            input = input,
                            table = colnamev
                        )
                    show_fixit_diagnostics(errinfo)
                    return(p$set(1, GGPLOT2INVALIDTOKEN))
                }

                # FIXME defaultZproblem - z should not be removed
                if (grepl("(x=|y=)", column_name))
                    column_name <- gsub("[a-z]+=", "", column_name)


                if (p$length() == 2) {
                    p$set(1, paste0(column_name, ")"))
                } else {
                    p$set(1, paste0(column_name, ", ", p$get(3)))
                }
            },
            # see p_ggproto_layer
            p_ggproto_theme = function(doc="ggproto : theme_init
                                       | theme_init theme_elem_list", p) {
                dbgmsg("p_ggproto_theme: ", p$get(2), " -- add ) ")
                if (p$length() == 2) {
                    end <- paste0(p$get(2), ")")
                    p$set(1, end)
                } else {
                    p$set(1, paste0(p$get(2), p$get(3), ")"))
                }
            },
            p_theme_init = function(doc="theme_init : THEME", p) {
                # initialization
                dbgmsg("p_theme_init: ", p$get(2), " -- add (")
                # theme, theme_bw, theme_linedraw, ...
                theme_str <- gsub("\\s|\\+", "", p$get(2))

                # for column name search
                ggbashenv$i_layer <- ggbashenv$i_layer + 1

                p$set(1, paste0(" + ggplot2::", theme_str, "("))
            },
            p_theme_elem_list = function(
                doc="theme_elem_list : theme_elem theme_conf_list
                                | theme_elem theme_conf_list theme_elem_list",
                p) {
                dbgmsg("p_theme_elem_list", p$get(2), " ",
                       p$get(3), " -- add ( and ) ")
                elem <- p$get(2)
                if (p$length() == 3) {
                    # last configuration
                    p$set(1, paste0(elem, "(", p$get(3), ")"))
                    # close ggplot2::element_*(
                    # MAYBE-LATER "none" is now ("none")
                } else {
                    #if (! ggbashenv$elem_class %in% c("logical", "character"))
                        p$set(1, paste0(elem, "(", p$get(3), "), ", p$get(4)))
                    #else
                    #    p$set(1, paste0(elem, p$get(3), "), ", p$get(4)))
                    # text = element_text(...) , ... so no need to close paren
                }
            },
            p_theme_elem = function(doc="theme_elem : NAME", p) {
                dbgmsg("p_theme_elem: ", p$get(2))
                tdf <- ggbashenv$const$themedf

                # 'axis.te:' will be 'axis.te'
                #elem_name_partial <- gsub("\\:", "", p$get(2))
                elem_name_partial <- p$get(2)

                elem_name <- tdf$name[find_first_index(elem_name_partial,
                                                 tdf$name, show_warn = FALSE)]

                # do partial match for theme element
                # (ex. 'legend.t' -> 'legend.text')
                elem_class <- tdf$class[find_first_index(elem_name,
                                                   tdf$name,
                                                   show_warn = FALSE)]

                if (length(elem_class) == 0 || is.na(elem_class)) {
                    errinfo <-
                    list(
                        id = "p_theme_elem:prefix_match",
                        type = "Prefix match for theme element name failed.",
                        raw = p$get(2),
                        input = elem_name_partial,
                        table = tdf$name
                        )
                    show_fixit_diagnostics(errinfo)

                    ggbashenv$error <- TRUE

                    return(p$set(1, GGPLOT2INVALIDTOKEN))

                } else if (length(elem_class) > 1) {
                    message("UNKNOWN ERROR in p_theme_elem: ",
                            paste0(elem_class, collapse = " "))
                    elem_class <- elem_class[1] # What's this error?
                    return(p$set(1, GGPLOT2INVALIDTOKEN))
                }

                ggbashenv$elem_class <- elem_class

                if (grepl("^element_|margin", elem_class)) {
                    modifier <- "ggplot2::"
                    function_name <- paste0(modifier, elem_class)
                } else if (elem_class == "unit") {
                    modifier <- "grid::"
                    function_name <- paste0(modifier, elem_class)
                } else if (elem_class %in% c("logical", "character") ){
                    function_name <- ""
                } else {
                    message("ERROR: cannot get correct ",
                            "classes for a theme element: ",
                            paste0(elem_class, collapse = " "))
                    elem_class <- elem_class[1] # What's this error?
                    return(p$set(1, GGPLOT2INVALIDTOKEN))
                }

                p$set(1, paste0(elem_name, " = ", function_name))
            },
            p_theme_conf_list = function(doc="theme_conf_list : CONSTAES
                                         | CHARAES
                                         | QUOTED
                                         | BOOLEAN
                                         | UNIT
                                         | CONSTAES theme_conf_list
                                         | CHARAES theme_conf_list", p) {
                dbgmsg("p_theme_conf_list: ", p$get(2))

                if (! is.null(ggbashenv$error)) {
                    ggbashenv$error <- NULL # FIXME too compicated
                    # FIXME the error in o_theme_elem cannot stop
                    # even if return(p$set(1, GGPLOT2_INVALIDTOKEN)).
                    # It tries to execute this production rule,
                    # so currently early retrun here.
                    # There should be more elegant error handling.
                    return(p$set(1, NULL))
                }

                conf <- p$get(2)

                if (grepl(ggregex$quoted, conf) &&
                    ! grepl("^element_|margin", ggbashenv$elem_class)) {
                    message("quoted ", conf, " env$elemclass: ", ggbashenv$elem_class)
                    return(p$set(1, conf))
                } else if (grepl(ggregex$boolean, conf)) {
                    message("boolean ", conf, " env$elemclass: ", ggbashenv$elem_class)
                    return(p$set(1, conf))
                } else if (grepl(ggregex$unit, conf)) {
                    number <- gsub("[^0-9\\.]", "", conf)
                    this_unit <- gsub("[0-9\\. ]", "", conf)
                    return(p$set(1, paste0(number, ",'", this_unit, "'")))
                }

                before_equal <- gsub("=.*", "", conf)
                after_equal  <- gsub(".*=", "", conf)

                if (before_equal == "c") {
                    # element_text has "face" and "color",
                    # and current edit distance algorithm matches "c"
                    # to "face" not to "color" (because of the nchar).
                    # This contradicts to the case when we specify "c"
                    # in the element_rect or element_line.
                    # Thus, rewrite here.
                    before_equal = "colour"
                }

                # prefix match
                input <- ggbashenv$elem_class
                tbl <- get_theme_elem_name_conf(input)
                conf_name <- tbl[find_first_index(before_equal,
                                                  tbl, show_warn = FALSE)]

                if (is.na(conf_name)) {
                    errinfo <- list(
                        id = "p_theme_conf_list:partial_match",
                        type = paste0("Partial match for theme ",
                                      "element configuration failed."),
                        input = before_equal,
                        table = tbl
                    )
                    show_fixit_diagnostics(errinfo)
                    return(p$set(1, GGPLOT2INVALIDTOKEN))
                }

                a_conf <- paste0(conf_name, "=", after_equal)

                if (p$length() == 2) {
                    # FIXME add spaces
                    p$set(1, a_conf)
                } else {
                    p$set(1, paste0(a_conf, ", ", p$get(3)))
                }
            },
            p_error = function(p) {
                if (is.null(p)) {
                    errinfo <- list( id = "p_error:null",
                                     type = "Syntax error at EOF")
                } else {
                    errinfo <- list( id = "p_error:non_null",
                                     type = paste0("Syntax error at \"",
                                                   p$value, "\""))
                }
                show_fixit_diagnostics(errinfo)
            }
            )
        )

#' Display useful debugging info for users
#'
#' @param err A list of error information
#'
show_fixit_diagnostics <- function(
    err = list(
        id = "p_theme_elem:prefix_match",
        type = "Prefix match for theme element name failed.",
        input = "axis.tx:",
        elem_name = "axis.tx",
        elem_table = c("axis.text", "axis.title")
    )
) {
    # MAYBE-LATER Is it possible to get the built entire ggplot object here?
    message("COMPILE ERROR: ", err$type)
    m1 <- function(...) message("  ", ...)
    m2 <- function(...) message("    ", ...)
    m3 <- function(...) message("      ", ...)

    similarv <- get_analogue(err$input, err$table)$name

    if (err$id == "p_theme_elem:prefix_match") {

        m1("Is your theme element's name correct?")
        m2("The supplied string is \"", err$raw, "\", but")
        m3("maybe: ", paste0(similarv, collapse = ", "))
    } else if (err$id == "p_layer_aes:column_prefix") {
        colv <- ggbashenv$colv
        similarv <- get_analogue(err$input, colv)$name

        m1("The column name \"", err$input, "\" does not exist.")
        m2("maybe: ", paste0(similarv, collapse = ", "))
    } else if (err$id == "p_theme_conf_list:partial_match") {
        m1("The column name \"", err$input, "\" does not exist.")
        m2("maybe: ", paste0(similarv, collapse = ", "))
    } else if (err$id == "p_aes_func:prefix_match") {
        m1("The column name \"", err$input, "\" does not exist.")
        m2("maybe: ", paste0(similarv, collapse = ", "))
    } else if (err$id == "p_layer_raw_aes:partial_match") {
        m1("The special parameter \"", err$input, "\" does not exist.")
        m2("maybe: ", paste0(similarv, collapse = ", "))
    } else if (err$id == "p_gg_init:dataset") {
        m1("Is your data frame name correct?")
    } else if (err$id == "p_error:non_null") {
        m1("Did you specify a geom before aesthetics?")
        m2(" BAD: gg mtcars + mpg cyl")
        m2("GOOD: gg mtcars + point mpg cyl")
    } else if (err$id == "p_error:null") {

    }
}

# FIXME parentheses for no equal case
replace_with_space <- function(input, i)
    paste0(substr(input, 1, i - 1), " ",
           substr(input, i + 1, nchar(input)))


#' set layer-specific column names
#'
#' this functio nset ggbashenv$layer_coll (column name list),
#' which is used for layer-specific column name partial match.
#'
#' @param input A ggbash string
#'
#'
set_layer_colnames <- function(
    input = "gg(a) + p(x=x, color='red', y=y, data=iris)"
) {
    n_layer <- 0
    n_paren <- 0
    i <- 1
    invalid <- FALSE
    while ( i <= nchar(input)) {
        this <- substr(input, i, i)

        if (n_paren == 0) {
            if (this == "+") {
                n_layer <- n_layer + 1
                ggbashenv$layer_coll[[n_layer]] <- "NA"
            }
        } else if (n_paren == 1) {
            if (grepl("data", substr(input, i, i + 3))) {

                i <- i + 4
                while (substr(input, i, i) != "=") {
                    if (grepl("[^= ]", substr(input, i, i))) {
                        # invalid
                        invalid <- TRUE
                        break
                    }
                    i <- i + 1
                }
                if (invalid) {
                    if (substr(input, i, i) == "(")
                        n_paren <- n_paren + 1
                    else if (substr(input, i, i) == ")")
                        n_paren <- n_paren - 1

                    i <- i + 1
                    invalid <- FALSE
                    next
                }
                i <- i + 1
                data_start <- i

                message("data_start: ", i)
                while (TRUE) {
                    this <- substr(input, i, i)

                    if (n_paren == 1 && this %in% c(")", ",")) {
                        data_end <- i - 1
                        datastr <- substr(input, data_start, data_end)
                        ggbashenv$layer_coll[[n_layer]] <-
                            colnames(eval(parse(text = datastr),
                                          envir = .GlobalEnv))

                        break
                    }

                    if (this == "(")
                        n_paren <- n_paren + 1
                    else if (this == ")")
                        n_paren <- n_paren - 1

                    i <- i + 1
                }

                message("layer_coll[", n_layer, "]: ",
                        ggbashenv$layer_coll[[n_layer]])
            }
        }

        if (this == "(")
            n_paren <- n_paren + 1
        else if (this == ")")
            n_paren <- n_paren - 1

        i <- i + 1
    }
}

#' remove parentheses and marks
#'
#' ggbash have to handle two types of parentheses and tyo types of commas.
#' One is unnecessary and the other is necessary for correct parsing.
#' For example, in \code{gg(iris) + bin2d(x, y, binwidth=c(.1, .1))},
#' all parentheses and commas before "binwidth" are unnecessary.
#' So this should be replaced by
#' \code{gg iris  + bin2d x  y  binwidth=c(.1, .1) },
#' which can be parsed by ggplot2 compiler.
#'
#' This is because while ggbash does not rely on commas and parens for
#' parsing, R relies on them.
#'
#' @param input A character
#'
#' @export
remove_unnecessary_marks <- function(
    input = "gg(m, x=f(cyl), m) + t(l=p0('l:', w))"
){

    n_paren <- 0
    is_after <- FALSE
    state <- ""
    for ( i in 1:nchar(input)) {
        this <- substr(input, i, i)
        if (is_after) {
            if (this == "(")
                n_paren <- n_paren + 1
            else if (this == ")")
                n_paren <- n_paren - 1

            # nested equals are ignored

            if (n_paren == 0 && this == ",")
                input <- replace_with_space(input, i)

            if (n_paren < 0) {
                is_after <- FALSE
                n_paren <- n_paren + 1
                input <- replace_with_space(input, i)
            }
        } else {
            if (this == "(")
                input <- replace_with_space(input, i)
            else if (this == ")")
                input <- replace_with_space(input, i)
            else if (this == ",")
                input <- replace_with_space(input, i)
            else if (this == "=")
                is_after <- TRUE
        }
        state <- paste0(state, n_paren)
    }
    return(input)
}

#' remove aes() function call
#'
#' ggbash() tries to follow ggplot2 original syntax
#' as much as possible.
#' When aes() function calls is specified in a given
#' ggbash string, those are safely removed by this function.
#'
#' @param input A character
#'
#'
#' @importFrom sourcetools tokenize_string
remove_aes <- function(input = "gg(a) + p(aes(x,y),c) + p(aes(l),d)") {

    df <- sourcetools::tokenize_string(input)
    df$r <- 1:nrow(df)
    part <- df[(df$type %in% c("bracket")) |
             df$value == "aes", ]

    i_aes <- which(part$value == "aes")
    df[df$r %in% part$r[(i_aes + rep(0:2, length(i_aes)))], "value"] = ""

    return(paste0(df$value, collapse=""))
}

replace_plus <- function(input = "gg(x) + p(a+b, c+d+f) + p(a)\n  + p(c+d)") {

    input <- gsub("\n", "", input)
    nlen <- nchar(input)

    if (! grepl("\\(", input)) {
        # terminal mode
        target <- " \\+ "
        # in terminal, no parentheses and commas can be used as parsing hints.
        # instead, here I rely on both-spaced plus sign as "depth-0" plus.
        # e.g.    gg mtcars + rect xmin=wt-3 xmax=wt+3 ... (no spaces around +)
        depth0_indices <- 1:nlen
        end_i <- function(index) index
        start_i <- function(index) index + 2
    } else {
        target <- "\\+"

        df <- sourcetools::tokenize_string(input)
        df <- df[ df$type == "bracket", ]
        df$lparen <- ifelse(df$value == "(", 1, 0)
        df$rparen <- ifelse(df$value == ")", 1, 0)
        df$depth <- cumsum(df$lparen) - cumsum(df$rparen)
        df$start <- df$column + 1
        df$end <- c(df$column[-1], nlen+2) - 1
        # ignore first depth==0 interval (there is no plus in there)
        # but last depth==0 interval matters, like
        # ggbash(gg(x=Sepal.W, y=Sepal.L) + p() + line)
        depth0_indices <-
            unlist(
                apply(df[df$depth == 0 , ], 1,
                      function(row) {seq(as.numeric(row["start"]),
                                         as.numeric(row["end"]), by=1)}))

        end_i <- function(index) index - 1
        start_i <- function(index) index + 1
    }

    candidates <- gregexpr(target, input)[[1]]
    for (index in candidates) {
        if (index %in% depth0_indices)
            input <-
                paste0( substr(input, 1, end_i(index)),
                        "#",
                        substr(input, start_i(index), nlen))
    }
    # prevent colour="#112333" to be replaced
    input <- gsub("#([^0-9ABCDEFabcdef])", "##\\1", input)
    return(input)
}

remove_element_whatever <- function(str = "g(x) + theme(l=element_text(sz=20))") {
    # assume "=" and "element_text" don't have spaces between them
    out <- gsub("=element_(text|rect|line|grob|blank)", "", str)
    return(out)
}

remove_theme_unit <- function(str = "g(x) + theme(l=unit(.5, 'cm'))") {

    quote_removed <- gsub("\"|'", "", gsub(".*=unit(.*?)\\).*", "\\1\\)", str))
    out <- gsub("=unit\\([^\\)]*\\)", "%%%%%", str)
    out <- gsub("%%%%%", quote_removed, out)
    # quotes should be removed here because in parse I defined "QUOTED" token
    return(out)
}

coat_adhoc_syntax_sugar <- function(
    cmd = "gg(mtcars,mpg,hwy) + point(size = xyz(gear) +1, shape = 16 / 3 * 4)"
){
    out <- gsub("\\s*,\\s*", ",", cmd) # no comma
    out <- gsub("\\s*=\\s*", "=", out)
    out <- gsub("\\s*-\\s*", "-", out)
    out <- gsub("\\s*/\\s*", "/", out)
    out <- gsub("\\s*\\*\\s*", "\\*", out)
    ggbashenv$layer_coll <- list()
    ggbashenv$i_layer <- 0
    set_layer_colnames(out)
    out <- remove_aes(out)
    out <- remove_element_whatever(out)
    out <- remove_theme_unit(out)
    out <- replace_plus(out)
    out <- gsub("\\s*\\+\\s*", "\\+", out)
    out <- remove_unnecessary_marks(out)
    return(out)
}

#' the core function of ggbash
#'
#' @param cmd A character
#'
#' compile_ggbash returns a built ggplot object.
#'
compile_ggbash <- function(cmd = "gg mtcars + p wt wt"){
    cmd <- coat_adhoc_syntax_sugar(cmd)

    lexer <- rly::lex(Ggplot2Lexer)
    parser <- rly::yacc(Ggplot2Parser)

    ggobj <- parser$parse(cmd, lexer)
    info <- lexer$instance$info # access internal variables
    return(ggobj)
}

lex  <- rly::lex(Ggplot2Lexer)
yacc <- rly::yacc(Ggplot2Parser)

gbash <- function(ggbash_symbols) {
    is_string <- tryCatch(class(ggbash_symbols) == "character",
                          error = function(err) {FALSE})
    if (is_string[1]) {
        cmd <- ggbash_symbols
    } else {
        raw_cmd <- deparse(substitute(ggbash_symbols),
                           width.cutoff = 500) # arbitrary large
        cmd <- raw_cmd
    }
    compile_ggbash(cmd)
}

# FIXME duplicate
bash <- function(ggbash_symbols) {
    is_string <- tryCatch(class(ggbash_symbols) == "character",
                          error = function(err) {FALSE})
    if (is_string[1]) {
        cmd <- ggbash_symbols
    } else {
        raw_cmd <- deparse(substitute(ggbash_symbols),
                           width.cutoff = 500) # arbitrary large
        cmd <- raw_cmd
    }
    gsub("ggplot2::", "", compile_ggbash(cmd))
}
caprice-j/ggbash documentation built on May 13, 2019, 12:11 p.m.