inst/doc/RCLabels.R

## ---- include = FALSE---------------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)

## ----setup, include = FALSE---------------------------------------------------
library(magrittr)
library(RCLabels)

## -----------------------------------------------------------------------------
# Create a notation object.
my_notation <- notation_vec(pref_start = "(", pref_end = ") ",
                            suff_start = "[", suff_end = "]")

# Notation objects are character vectors.
my_notation

## -----------------------------------------------------------------------------
arrow_notation
paren_notation
bracket_notation
first_dot_notation
from_notation
of_notation
to_notation
bracket_arrow_notation

## -----------------------------------------------------------------------------
my_label <- paste_pref_suff(pref = "Coal", suff = "from Coal mines in USA", 
                            notation = my_notation)
my_label

## -----------------------------------------------------------------------------
# Split the prefix from the suffix to obtain a named list of strings.
split_pref_suff(my_label, notation = my_notation)

# Flip the prefix and suffix, maintaining the same notation.
flip_pref_suff(my_label, notation = my_notation)

# Change the notation.
switch_notation(my_label, from = my_notation, to = paren_notation)

# Change the notation and flip the prefix and suffix.
switch_notation(my_label, from = my_notation, to = paren_notation, flip = TRUE)

## -----------------------------------------------------------------------------
get_pref_suff(my_label, which = "pref", notation = my_notation)
get_pref_suff(my_label, which = "suff", notation = my_notation)

## -----------------------------------------------------------------------------
labels <- c("a [of b in c]", "d [of e in f]", "g [of h in i]")
labels

split_pref_suff(labels, notation = bracket_notation)

## -----------------------------------------------------------------------------
labels

df <- tibble::tibble(labels = labels)
result <- df %>% 
  dplyr::mutate(
    split = split_pref_suff(labels, notation = bracket_notation, transpose = TRUE)
  )
result$split[[1]]
result$split[[2]]
result$split[[3]]

## -----------------------------------------------------------------------------
prepositions_list

## -----------------------------------------------------------------------------
labels

# Extract the nouns.
get_nouns(labels, notation = bracket_notation)

# Extract the prepositional phrases.
get_pps(labels, notation = bracket_notation)

# Extract the prepositions themselves.
get_prepositions(labels, notation = bracket_notation)

# Extract the objects of the prepositions.
# Objects are named by the preposition of their phrase.
get_objects(labels, notation = bracket_notation)

# The get_piece() function is a convenience function
# that extracts just what you want.
get_piece(labels, piece = "noun", notation = bracket_notation)
get_piece(labels, piece = "pref")
get_piece(labels, piece = "suff")
get_piece(labels, piece = "of")
get_piece(labels, piece = "in")
# An empty string is returned when the preposition is missing.
get_piece(labels, piece = "bogus")

## -----------------------------------------------------------------------------
labels
# Split the labels into pieces, named by "noun" and prepositions.
split_labels <- split_noun_pp(labels, 
                             prepositions = prepositions_list, 
                             notation = bracket_notation)
split_labels

# Recombine split labels.
paste_noun_pp(split_labels, notation = bracket_notation)

# Recombine with a new notation.
paste_noun_pp(split_labels, notation = paren_notation)

## -----------------------------------------------------------------------------
labels

# Set new values for nouns.
modify_nouns(labels, 
             new_nouns = c("Coal", "Oil", "Natural gas"), 
             notation = bracket_notation)

## -----------------------------------------------------------------------------
labels

# Change nouns in several labels to "Production" and "Manufacture",
# as indicated by the modification map.
modify_label_pieces(labels, 
                    piece = "noun", 
                    mod_map = list(Production = c("a", "b", "c", "d"),
                                   Manufacture = c("g", "h", "i", "j")), 
                    notation = bracket_notation)

# Change the objects of the "in" preposition, 
# according to the modification map.
modify_label_pieces(labels, 
                    piece = "in", 
                    mod_map = list(GHA = "c", ZAF = c("f", "i")), 
                    notation = bracket_notation)

# Change the objects of "of" prepositions,
# according to the modification map.
modify_label_pieces(labels, 
                    piece = "of", 
                    mod_map = list(Coal = "b", `Crude oil` = c("e", "h")), 
                    notation = bracket_notation)

## -----------------------------------------------------------------------------
labels

# Eliminate all of the prepositional phrases that begin with "in".
remove_label_pieces(labels, 
                    piece = "in", 
                    notation = bracket_notation)

# Eliminate all of the prepositional phrases that begin with "of" and "in".
# Note that some spaces remain.
remove_label_pieces(labels, 
                    piece = c("of", "in"), 
                    notation = bracket_notation)

## -----------------------------------------------------------------------------
labels <- c("Production [of b in c]", "d [of Coal in f]", "g [of h in USA]")

# With default `pieces` argument, matching is done for whole labels.
match_by_pattern(labels, regex_pattern = "Production")
match_by_pattern(labels, regex_pattern = "Coal")
match_by_pattern(labels, regex_pattern = "USA")

# Check beginnings of labels: match!
match_by_pattern(labels, regex_pattern = "^Production")
# Check at ends of labels: no match!
match_by_pattern(labels, regex_pattern = "Production$")

# Search by prefix or suffix.
match_by_pattern(labels, regex_pattern = "Production", pieces = "pref")
match_by_pattern(labels, regex_pattern = "Production", pieces = "suff")
# When pieces is "pref" or "suff", only one can be specified.
# The following function call gives an error.
# match_by_pattern(labels, regex_pattern = "Production", pieces = c("pref", "to"))

# Search by noun or preposition.
match_by_pattern(labels, regex_pattern = "Production", pieces = "noun")
match_by_pattern(labels, regex_pattern = "Production", pieces = "in")
# Searching can be done with complicated regex patterns.
match_by_pattern(labels, 
                 regex_pattern = make_or_pattern(c("c", "f")),
                 pieces = "in")
match_by_pattern(labels,
                 regex_pattern = make_or_pattern(c("b", "Coal", "USA")),
                 pieces = "in")
match_by_pattern(labels,
                 regex_pattern = make_or_pattern(c("b", "Coal", "USA")),
                 pieces = c("of", "in"))
# Works with custom lists of prepositions.
match_by_pattern(labels,
                 regex_pattern = make_or_pattern(c("b", "Coal", "GBR", "USA")),
                 pieces = c("noun", "of", "in", "to"),
                 prepositions = c("of", "to", "in"))

## -----------------------------------------------------------------------------
labels <- c("Production [of b in c]", "d [of Coal in f]", "g [of h in USA]")
labels

# If `pieces = "all"` (the default), the entire label is available for replacements.
replace_by_pattern(labels,
                   regex_pattern = "Production",
                   replacement = "Manufacture")
replace_by_pattern(labels,
                   regex_pattern = "Coal",
                   replacement = "Oil")
replace_by_pattern(labels,
                   regex_pattern = "USA",
                   replacement = "GHA")

# Replace by prefix and suffix.
replace_by_pattern(labels,
                   regex_pattern = "Production",
                   replacement = "Manufacture",
                   pieces = "pref")
replace_by_pattern(labels,
                   regex_pattern = "Coa",
                   replacement = "Bow",
                   pieces = "suff")
# Nothing should change, because USA is in the suffix.
replace_by_pattern(labels,
                   regex_pattern = "SA",
                   replacement = "SSR",
                   pieces = "pref")
# Now USA --> USSR, because USA is in the suffix.
replace_by_pattern(labels,
                   regex_pattern = "SA",
                   replacement = "SSR",
                   pieces = "suff")
# This will throw an error, because only "pref" or "suff" can be specified.
# replace_by_pattern(labels,
#                    regex_pattern = "SA",
#                    replacement = "SSR",
#                    pieces = c("pref", "suff")

# Replace by noun or preposition.
replace_by_pattern(labels,
                   regex_pattern = "Production",
                   replacement = "Manufacture",
                   pieces = "noun")
replace_by_pattern(labels,
                   regex_pattern = "^Pro",
                   replacement = "Con",
                   pieces = "noun")
# Won't match: wrong side of string.
replace_by_pattern(labels,
                   regex_pattern = "Pro$",
                   replacement = "Con",
                   pieces = "noun")
# No change, because "Production" is a noun.
replace_by_pattern(labels,
                   regex_pattern = "Production",
                   replacement = "Manufacture",
                   pieces = "of")
# Now try with "of".
replace_by_pattern(labels,
                   regex_pattern = "Coal",
                   replacement = "Oil",
                   pieces = "of")
# No change, because "Coal" is not "in" anything.
replace_by_pattern(labels,
                   regex_pattern = "Coal",
                   replacement = "Oil",
                   pieces = "in")
# Now try in "in".
replace_by_pattern(labels,
                   regex_pattern = "USA",
                   replacement = "GBR",
                   pieces = "in")
replace_by_pattern(labels,
                   regex_pattern = "A$",
                   replacement = "upercalifragilisticexpialidocious",
                   pieces = "in")

Try the RCLabels package in your browser

Any scripts or data that you put into this service are public.

RCLabels documentation built on April 25, 2023, 5:11 p.m.