inst/doc/overview.R

## ----echo=FALSE, results='hide'-----------------------------------------------
knitr::opts_chunk$set(
    collapse = TRUE,
    comment = "#>",
    fig.path = "reference/figures/"
)

## ----setup, echo=FALSE, results='hide', message=FALSE-------------------------
library(wordpredictor)

# The level of verbosity in the information messages
ve <- 0

#' @description
#' Used to setup the test environment
#' @param rf The required files.
#' @param ve The verbosity level.
#' @return The list of directories in the test environment
setup_env <- function(rf, ve) {
    # An object of class EnvManager is created
    em <- EnvManager$new(rp = "../", ve = ve)
    # The required files are downloaded
    ed <- em$setup_env(rf)

    return(ed)
}

#' @description
#' Used to clean up the test environment
clean_up <- function(ve) {
    # An object of class EnvManager is created
    em <- EnvManager$new(ve = ve)
    # The test environment is removed
    em$td_env(T)
}

## ----generate-model, results='hide', cache=FALSE------------------------------
# The required files
rf <- c("input.txt")
# The test environment is setup
ed <- setup_env(rf, ve)

# The following code generates n-gram model using default options for data
# cleaning and tokenization. See the following section on how to customize these
# options. Note that input.txt is the name of the input data file. It should be
# present in the ed directory. The generated model file is also placed in this
# directory.

# ModelGenerator class object is created
mg <- ModelGenerator$new(
    name = "def-model",
    desc = "N-gram model generating using default options",
    fn = "def-model.RDS",
    df = "input.txt",
    n = 4,
    ssize = 0.1,
    dir = ed,
    dc_opts = list(),
    tg_opts = list(),
    ve = ve
)

# Generates n-gram model. The output is the file
# ./data/model/def-model.RDS
mg$generate_model()

# The test environment is cleaned up
clean_up(ve)

## ----model-evaluation-1, cache=FALSE------------------------------------------
# The required files
rf <- c("def-model.RDS", "validate-clean.txt")
# The test environment is setup
ed <- setup_env(rf, ve)

# The model file name
mfn <- paste0(ed, "/def-model.RDS")
# The path to the cleaned validation file
vfn <- paste0(ed, "/validate-clean.txt")
# ModelEvaluator class object is created
me <- ModelEvaluator$new(mf = mfn, ve = ve)
# The intrinsic evaluation is performed on first 20 lines
stats <- me$intrinsic_evaluation(lc = 20, fn = vfn)

# The test environment is cleaned up
clean_up(ve)

## ----model-evaluation-2, cache=FALSE------------------------------------------
# The required files
rf <- c("def-model.RDS", "validate-clean.txt")
# The test environment is setup
ed <- setup_env(rf, ve)

# The model file name
mfn <- paste0(ed, "/def-model.RDS")
# The path to the cleaned validation file
vfn <- paste0(ed, "/validate-clean.txt")
# ModelEvaluator class object is created
me <- ModelEvaluator$new(mf = mfn, ve = ve)
# The intrinsic evaluation is performed on first 100 lines
stats <- me$extrinsic_evaluation(lc = 100, fn = vfn)

# The test environment is cleaned up
clean_up(ve)

## ----predict-word, cache=FALSE------------------------------------------------
# The required files
rf <- c("def-model.RDS", "validate-clean.txt")
# The test environment is setup
ed <- setup_env(rf, ve)

# The model file name
mfn <- paste0(ed, "/def-model.RDS")
# An object of class ModelPredictor is created. The mf parameter is the name of
# the model file that was generated in the previous example.
mp <- ModelPredictor$new(mf = mfn, ve = ve)
# Given the words: "how are", the next word is predicted. The top 3 most likely
# next words are returned along with their respective probabilities.
res <- mp$predict_word(words = "how are", 3)
# The test environment is cleaned up
clean_up(ve)

Try the wordpredictor package in your browser

Any scripts or data that you put into this service are public.

wordpredictor documentation built on Jan. 4, 2022, 5:07 p.m.