R/fftrees_wordstofftrees.R

Defines functions fftrees_wordstofftrees

Documented in fftrees_wordstofftrees

#' Convert a verbal description of an FFT into an \code{FFTrees} object
#'
#' @description \code{fftrees_wordstofftrees} converts a verbal description
#' of an FFT (provided as a string of text) into
#' a tree definition (of an \code{FFTrees} object).
#' Thus, \code{fftrees_wordstofftrees} provides a simple
#' natural language parser for FFTs.
#'
#' \code{fftrees_wordstofftrees} is the complement function to
#' \code{\link{fftrees_ffttowords}}, which converts an abstract tree definition
#' (of an \code{FFTrees} object) into a verbal description
#' (i.e., provides natural language output).
#'
#' To increase robustness, the parsing of \code{fftrees_wordstofftrees}
#' allows for lower- or uppercase spellings (but not typographical variants)
#' and ignores the else-part of the final sentence (i.e., the part
#' beginning with "otherwise").
#'
#' @param x An \code{FFTrees} object.
#' @param my.tree A character string. A verbal description (as a string of text) defining an FFT.
#'
#' @return An \code{FFTrees} object with a new tree definition as described by \code{my.tree}.
#'
#' @seealso
#' \code{\link{fftrees_ffttowords}} for converting FFTs into verbal descriptions;
#' \code{\link{print.FFTrees}} for printing FFTs;
#' \code{\link{plot.FFTrees}} for plotting FFTs;
#' \code{\link{summary.FFTrees}} for summarizing FFTs;
#' \code{\link{FFTrees}} for creating FFTs from and applying them to data.
#'
#' @importFrom stringr str_extract str_detect
#'
#' @export

fftrees_wordstofftrees <- function(x,
                                   my.tree) {

  # Provide user feedback: ----

  if (!x$params$quiet$ini) {

    # cat(u_f_ini("Aiming to create an FFT from 'my.tree' description:\n"))

    cli::cli_alert("Create an FFT from 'my.tree' description:", class = "alert-start")

  }

  # Parameters / options: ------

  # # Direction markers (symbols/words):
  # directions_df <- data.frame(
  #   direction   = c( "=",  ">", ">=", "<",  "<=", "!=", "equal", "equals", "equal to", "greater", "less"),
  #   negation    = c("!=", "<=", "<",  ">=", ">",   "=",  "!=",    "!=",     "!=",       "<=",      ">=" ),
  #   direction_f = c( "=",  ">", ">=", "<",  "<=", "!=",   "=",     "=",      "=",       ">",       "<"  ),
  #   #
  #   stringsAsFactors = FALSE
  # ) # (local constant)

  # exits_df <- data.frame(     # is NOT used anywhere?
  #   exit.char = x$params$decision.labels,
  #   exit = c("0", "1"),       # 0:FALSE/noise/left vs. 1:TRUE/signal/right
  #   stringsAsFactors = FALSE
  # )


  # Clean up and check my.tree: ------

  # Collapse into one sentence:
  if (length(my.tree) > 1) {
    my.tree <- paste(my.tree, collapse = ". ")
  }

  # Remove line breaks (\n):
  my.tree <- gsub(pattern = "\n", replacement = "", x = my.tree)

  # Use lowercase spelling (for robustness against typos):
  my.tree         <- tolower(my.tree)
  cue_names_l     <- tolower(x$cue_names)
  decision_labels <- tolower(x$params$decision.labels)


  # Verify the presence of both decision labels/exit types (at least once):

  lbl_0 <- decision_labels[1]  # exit type 0: False/noise/0/left
  if (all(grepl(lbl_0, x = my.tree) == FALSE)) {
    warning(paste0("The decision label '", decision_labels[1], "' does not occur in my.tree."))
  }

  lbl_1 <- decision_labels[2]  # exit type 1: True/signal/1/right
  if (all(grepl(lbl_1, x = my.tree) == FALSE)) {
    warning(paste0("The decision label '", decision_labels[2], "' does not occur in my.tree."))
  }

  # Note: As the final else/'otherwise' part is ignored, rake trees CAN mention only 1 exit type.
  #       Thus, enforcing that both exit types are mentioned (at least once) is too restrictive.
  # Done: Turn stops into warnings, but provide feedback which exit type is not being mentioned.


  # Split my.tree into def parts (dropping the final "otherwise" clause): ------
  {
    def <- unlist(strsplit(my.tree, split = "if |when |whenever ", fixed = FALSE))  # Note: Also removes trailing " " after "if"!
    def <- paste0(" ", def)    # add a leading " " again (to include in detecting cue name below)
    def <- def[2:length(def)]  # remove initial empty string
    # print(def)  # 4debugging

    def_fin_2 <- unlist(strsplit(def[length(def)], split = "else|otherwise|other", fixed = FALSE))
    # Done: Generalized to include "else", "in other cases", etc.
    # print(def_fin_2)  # 4debugging

    # Drop the final sub-sentence (its else/otherwise part):
    def <- c(def[-length(def)], def_fin_2[1])

    # Drop trailing spaces (" "):
    # def <- gsub(pattern = "\\.\\s+$", replacement = "\\.", x = def)  # remove trailing spaces (after ".")
    def <- gsub(pattern = "\\s+$", replacement = "", x = def)  # remove ANY trailing spaces

    # print(def)  # 4debugging
  }

  nodes_n <- length(def)


  # Extract FFT elements from def: ------

  # 1. cues_v: ----
  {
    cues_v <- names(unlist(lapply(def[1:nodes_n], FUN = function(node_sentence) {

      # Can I find the name of a cue in this sentence?
      cue_exists <- any(sapply(cue_names_l, FUN = function(cue_i_name) {
        any(stringr::str_detect(node_sentence, paste0(" ", cue_i_name, " ")))
      }))

      if (!cue_exists) {
        stop(paste("I could not find any valid cue names in the sentence: '", node_sentence, "'. Please rewrite", sep = ""))
      }

      if (cue_exists) {
        output <- which(sapply(cue_names_l, FUN = function(cue_i_name) {
          stringr::str_detect(node_sentence, paste0(" ", cue_i_name, " "))
        }))
      }

      return(output)

    })))

    # Convert cue names back to original (non lower) values:
    cues_v <- x$cue_names[sapply(cues_v, FUN = function(c_name) {
      which(cue_names_l == c_name)
    })]
  } # 1. cues_v.


  # 2. classes_v: ----
  {
    classes_v <- rep(NA, nodes_n)

    in_brackets <- stringr::str_detect(def[1:nodes_n], "\\[") | stringr::str_detect(def[1:nodes_n], "\\{")

    classes_v[ in_brackets] <- "c"  # categorical (character, factor, logical)
    classes_v[!in_brackets] <- "n"  # numeric (integer, numeric)

  } # 2. classes_v.


  # 3. exits_v: ----
  {
    exits_v <- unlist(lapply(def[1:nodes_n], FUN = function(node_sentence) {

      y <- unlist(strsplit(node_sentence, " "))

      false_ix <- grep(pattern = tolower(decision_labels[1]), x = y)  # indices of labels (for 0 / FALSE / noise / left exits)
      true_ix  <- grep(pattern = tolower(decision_labels[2]), x = y)  # indices of labels (for 1 / TRUE / signal / right exits)

      if (any(grepl(decision_labels[2], x)) & any(grepl(decision_labels[1], y))) {

        if (min(true_ix) < min(false_ix)) {
          return(exit_types[2])  # == 1 / TRUE / signal / right
        }

        if (min(true_ix) > min(false_ix)) {
          return(exit_types[1])  # == 0 / FALSE / noise / left
        }

      }

      if (any(grepl(decision_labels[2], y)) & !any(grepl(decision_labels[1], y))) {
        return(exit_types[2])  # == 1 / TRUE / signal / right
      }

      if (!any(grepl("v", y)) & any(grepl(decision_labels[1], y))) {
        return(exit_types[1])  # == 0 / FALSE / noise / left
      }

    }))

    # print(exits_v)  # 4debugging
  } # 3. exits_v.


  # 4. thresholds_v: ----
  {
    thresholds_v <- sapply(1:nodes_n, FUN = function(i) {

      # Get definition:
      x <- def[i]

      # Remove the name of the cue:
      x <- gsub(pattern = tolower(cues_v[i]), replacement = "", x = x)

      # Is there a number?
      num_log <- grepl("[0-9]", x = x)

      # Is there a brace?
      bracket_log <- grepl("\\{", x = x)

      # If there is a number and no brace, get the number:
      if (!bracket_log & num_log) {
        threshold_i <- stringr::str_extract(x, "[-+]?\\d+\\.*\\d*")
      }

      # If there is a brace get what's inside the braces (and remove any spaces):
      if (bracket_log) {
        threshold_i <- stringr::str_replace_all(unlist(strsplit(x, "\\{|\\}"))[2], pattern = " ", "")
      }

      return(threshold_i)

    })
  } # 4. thresholds_v.


  # 5. directions_v: ----
  {

    # A. Detect directions and negations: ----

    # Look for directions in sentences:

    directions_v <- names(unlist(lapply(def[1:nodes_n], FUN = function(node_sentence) {
      output <- which(sapply(directions_df$direction, FUN = function(direction_i) {
        stringr::str_detect(node_sentence, direction_i)
      }))

      output <- output[length(output)]

      return(output)

    })))

    directions_ix <- sapply(directions_v, function(direction_i) {
      which(direction_i == directions_df$direction)
    })

    # Look for negations in sentences:

    # Define negation markers:
    # negations_v <- c("not")  # (local constant)

    # Which sentences have negations?
    negations_log <- unlist(lapply(def[1:nodes_n], FUN = function(node_sentence) {
      output <- any(sapply(negations_v, FUN = function(negation_i) {
        stringr::str_detect(node_sentence, negation_i)
      }))

      return(output)

    }))


    # B. Adjust / flip directions: ----

    # Convert negated directions / negations:
    directions_v[negations_log] <- directions_df$negation[directions_ix[negations_log]]

    # Convert to direction_f (formal symbol/forward direction/to signal):
    directions_v <- directions_df$direction_f[match(directions_v, table = directions_df$direction)]

    # If any current exit types/directions are 0/FALSE/left/noise, flip their direction:
    # print(exits_v)    # 4debugging
    cur_exits <- get_exit_type(exits_v, verify = FALSE)
    # print(cur_exits)  # 4debugging

    flip_direction_ix <- (cur_exits == exit_types[1])  # exit type == 0 / FALSE / left / noise

    directions_v[flip_direction_ix] <- directions_df$negation[match(directions_v[flip_direction_ix], table = directions_df$direction)]

  } # 5. directions_v.


  # Set final exit (to .5): ----

  exits_v[nodes_n] <- exit_types[3]


  # Save result in x$trees$definitions (1 line, as df): ----

  # NEW code start: ----

  cur_fft <- data.frame(class = classes_v,
                        cue = cues_v,
                        direction = directions_v,
                        threshold = thresholds_v,
                        exit = exits_v,
                        #
                        stringsAsFactors = FALSE
  )

  my_tree_def <- write_fft_df(fft = cur_fft, tree = 1L)
  # print(my_tree_def)  # 4debugging

  # NEW code end. ----

  # +++ here now +++

  # OLD code start: ----

  # # fft_node_sep <- ";"  # (local constant)
  #
  # my_tree_def_o <- data.frame(
  #   # Add. variables:
  #   tree       = 1L,
  #   nodes      = nodes_n,
  #   # Key variables of fft (all plural):
  #   classes    = paste(classes_v,    collapse = fft_node_sep),
  #   cues       = paste(cues_v,       collapse = fft_node_sep),
  #   directions = paste(directions_v, collapse = fft_node_sep),
  #   thresholds = paste(thresholds_v, collapse = fft_node_sep),
  #   exits      = paste(exits_v,      collapse = fft_node_sep),
  #   #
  #   stringsAsFactors = FALSE
  # )
  # # print(my_tree_def_o)  # 4debugging

  # OLD code end. ----


  # # Check: Verify equality of OLD and NEW code results:
  # if (!all.equal(my_tree_def, my_tree_def_o)) { stop("OLD vs. NEW: my_tree_def diff") }


  # Modify object x:
  x$trees$definitions <- my_tree_def
  x$trees$n <- 1L


  # Provide user feedback: ----

  if (!x$params$quiet$fin) {

    # cat(u_f_fin("Successfully created an FFT from 'my.tree' description.\n"))

    cli::cli_alert_success("Created an FFT from 'my.tree' description.")

  }


  # Output: ------

  return(x)

} # fftrees_wordstofftrees().


# ToDo: ------

# - Abstraction: Store anonymous functions as utility functions
#   (to enable re-use from elsewhere).

# eof.
ndphillips/FFTrees documentation built on May 10, 2024, 3:14 a.m.