Nothing
## ----echo=FALSE, results='hide'-----------------------------------------------
knitr::opts_chunk$set(
collapse = TRUE,
comment = "#>",
fig.path = "reference/figures/"
)
## ----setup, echo=FALSE, results='hide', message=FALSE-------------------------
library(wordpredictor)
# The level of verbosity in the information messages
ve <- 0
#' @description
#' Used to setup the test environment
#' @param rf The required files.
#' @param ve The verbosity level.
#' @return The list of directories in the test environment
setup_env <- function(rf, ve) {
# An object of class EnvManager is created
em <- EnvManager$new(rp = "../", ve = ve)
# The required files are downloaded
ed <- em$setup_env(rf)
return(ed)
}
#' @description
#' Used to clean up the test environment
clean_up <- function(ve) {
# An object of class EnvManager is created
em <- EnvManager$new(ve = ve)
# The test environment is removed
em$td_env(T)
}
## ----generate-model, results='hide', cache=FALSE------------------------------
# The required files
rf <- c("input.txt")
# The test environment is setup
ed <- setup_env(rf, ve)
# The following code generates n-gram model using default options for data
# cleaning and tokenization. See the following section on how to customize these
# options. Note that input.txt is the name of the input data file. It should be
# present in the ed directory. The generated model file is also placed in this
# directory.
# ModelGenerator class object is created
mg <- ModelGenerator$new(
name = "def-model",
desc = "N-gram model generating using default options",
fn = "def-model.RDS",
df = "input.txt",
n = 4,
ssize = 0.1,
dir = ed,
dc_opts = list(),
tg_opts = list(),
ve = ve
)
# Generates n-gram model. The output is the file
# ./data/model/def-model.RDS
mg$generate_model()
# The test environment is cleaned up
clean_up(ve)
## ----model-evaluation-1, cache=FALSE------------------------------------------
# The required files
rf <- c("def-model.RDS", "validate-clean.txt")
# The test environment is setup
ed <- setup_env(rf, ve)
# The model file name
mfn <- paste0(ed, "/def-model.RDS")
# The path to the cleaned validation file
vfn <- paste0(ed, "/validate-clean.txt")
# ModelEvaluator class object is created
me <- ModelEvaluator$new(mf = mfn, ve = ve)
# The intrinsic evaluation is performed on first 20 lines
stats <- me$intrinsic_evaluation(lc = 20, fn = vfn)
# The test environment is cleaned up
clean_up(ve)
## ----model-evaluation-2, cache=FALSE------------------------------------------
# The required files
rf <- c("def-model.RDS", "validate-clean.txt")
# The test environment is setup
ed <- setup_env(rf, ve)
# The model file name
mfn <- paste0(ed, "/def-model.RDS")
# The path to the cleaned validation file
vfn <- paste0(ed, "/validate-clean.txt")
# ModelEvaluator class object is created
me <- ModelEvaluator$new(mf = mfn, ve = ve)
# The intrinsic evaluation is performed on first 100 lines
stats <- me$extrinsic_evaluation(lc = 100, fn = vfn)
# The test environment is cleaned up
clean_up(ve)
## ----predict-word, cache=FALSE------------------------------------------------
# The required files
rf <- c("def-model.RDS", "validate-clean.txt")
# The test environment is setup
ed <- setup_env(rf, ve)
# The model file name
mfn <- paste0(ed, "/def-model.RDS")
# An object of class ModelPredictor is created. The mf parameter is the name of
# the model file that was generated in the previous example.
mp <- ModelPredictor$new(mf = mfn, ve = ve)
# Given the words: "how are", the next word is predicted. The top 3 most likely
# next words are returned along with their respective probabilities.
res <- mp$predict_word(words = "how are", 3)
# The test environment is cleaned up
clean_up(ve)
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.