inst/doc/mscsweblm4r_vignette.R

## ----echo=FALSE----------------------------------------------------------
# Build with devtools::install(build_vignettes = TRUE)
library("knitr")
NOT_CRAN <- identical(tolower(Sys.getenv("NOT_CRAN")), "true")
knitr::opts_chunk$set(
  comment = "#>",
  collapse = TRUE,
  purl = NOT_CRAN,
  eval = NOT_CRAN
)

## ----eval=FALSE----------------------------------------------------------
#  if ("mscsweblm4r" %in% installed.packages()[,"Package"] == FALSE) {
#    install.packages("mscsweblm4r")
#  }

## ----eval=FALSE----------------------------------------------------------
#  if ("mscsweblm4r" %in% installed.packages()[,"Package"] == FALSE) {
#    if ("devtools" %in% installed.packages()[,"Package"] == FALSE) {
#      install.packages("devtools")
#    }
#    devtools::install_github("philferriere/mscsweblm4r")
#  }

## ------------------------------------------------------------------------
library('mscsweblm4r')
tryCatch({

  weblmInit()

}, error = function(err) {

  geterrmessage()

})

## ----eval=FALSE----------------------------------------------------------
#  [1] "mscsweblm4r: could not load config info from Sys env nor from file"

## ----eval = FALSE--------------------------------------------------------
#    # Retrieve a list of supported web language models
#    weblmListAvailableModels()

## ----eval = FALSE--------------------------------------------------------
#    # Break a string of concatenated words into individual words
#    weblmBreakIntoWords(
#      textToBreak,                      # ASCII only
#      modelToUse = "body",              # "title"|"anchor"|"query"(default)|"body"
#      orderOfNgram = 5L,                # 1L|2L|3L|4L|5L(default)
#      maxNumOfCandidatesReturned = 5L   # Default: 5L
#    )

## ----eval = FALSE--------------------------------------------------------
#    # Get the words most likely to follow a sequence of words
#    weblmGenerateNextWords(
#      precedingWords,                  # ASCII only
#      modelToUse = "title",            # "title"|"anchor"|"query"(default)|"body"
#      orderOfNgram = 4L,               # 1L|2L|3L|4L|5L(default)
#      maxNumOfCandidatesReturned = 5L  # Default: 5L
#    )

## ----eval = FALSE--------------------------------------------------------
#    # Calculate joint probability a particular sequence of words will appear together
#    weblmCalculateJointProbability(
#      inputWords =,                    # ASCII only
#      modelToUse = "query",            # "title"|"anchor"|"query"(default)|"body"
#      orderOfNgram = 4L                # 1L|2L|3L|4L|5L(default)
#    )

## ----eval = FALSE--------------------------------------------------------
#    # Calculate conditional probability a particular word will follow a given sequence of words
#    weblmCalculateConditionalProbability(
#      precedingWords,                  # ASCII only
#      continuations,                   # ASCII only
#      modelToUse = "title",            # "title"|"anchor"|"query"(default)|"body"
#      orderOfNgram = 4L                # 1L|2L|3L|4L|5L(default)
#    )

## ------------------------------------------------------------------------
tryCatch({

  # Retrieve a list of supported web language models
  weblmListAvailableModels()

}, error = function(err) {

 # Print error
 geterrmessage()

})

## ------------------------------------------------------------------------
tryCatch({

  # Break a sentence into words
  weblmBreakIntoWords(
    textToBreak = "testforwordbreak", # ASCII only
    modelToUse = "body",              # "title"|"anchor"|"query"(default)|"body"
    orderOfNgram = 5L,                # 1L|2L|3L|4L|5L(default)
    maxNumOfCandidatesReturned = 5L   # Default: 5L
  )

}, error = function(err) {

  # Print error
  geterrmessage()

})

## ------------------------------------------------------------------------
tryCatch({

  # Generate next words
  weblmGenerateNextWords(
    precedingWords = "how are you",  # ASCII only
    modelToUse = "title",            # "title"|"anchor"|"query"(default)|"body"
    orderOfNgram = 4L,               # 1L|2L|3L|4L|5L(default)
    maxNumOfCandidatesReturned = 5L  # Default: 5L
  )

}, error = function(err) {

  # Print error
  geterrmessage()

})

## ------------------------------------------------------------------------
tryCatch({

  # Calculate joint probability a particular sequence of words will appear together
  weblmCalculateJointProbability(
    inputWords = c("where", "is", "San", "Francisco", "where is",
                   "San Francisco", "where is San Francisco"),  # ASCII only
    modelToUse = "query",                     # "title"|"anchor"|"query"(default)|"body"
    orderOfNgram = 4L                         # 1L|2L|3L|4L|5L(default)
  )

}, error = function(err) {

  # Print error
  geterrmessage()

})

## ------------------------------------------------------------------------
tryCatch({

  # Calculate conditional probability a particular word will follow a given sequence of words
  weblmCalculateConditionalProbability(
    precedingWords = "hello world wide",       # ASCII only
    continuations = c("web", "range", "open"), # ASCII only
    modelToUse = "title",                      # "title"|"anchor"|"query"(default)|"body"
    orderOfNgram = 4L                          # 1L|2L|3L|4L|5L(default)
  )

}, error = function(err) {

  # Print error
  geterrmessage()

})

Try the mscsweblm4r package in your browser

Any scripts or data that you put into this service are public.

mscsweblm4r documentation built on May 2, 2019, 3:46 p.m.