knitr::opts_chunk$set( collapse = TRUE, warning = FALSE, message = FALSE, comment = "#>", fig.path = "man/figures/README-" )
An R package for SCDV (Sparse Composite Document Vectors) algorithm
# Wait for a while... # install.packages("scdv") # The development version from GitHub: # install.packages("devtools") devtools::install_github("teramonagi/scdv")
library(scdv) # Get example document from Project Gutenberg (http://www.gutenberg.org/wiki/Main_Page) urls <- c( "http://www.gutenberg.org/files/98/98-0.txt", "http://www.gutenberg.org/files/1342/1342-0.txt" ) x <- purrr::map(urls, ~ httr::content(httr::GET(.x))) # pre-processing for each document doc <- purrr::map(x, ~ tokenizers::tokenize_words(.x, stopwords = stopwords::stopwords("en"))[[1]]) doc[[1]][1:10]
# Set the number of cluster (k), and the word2vec dimension (dimension) k <- 10 dimension <- 30 # Calculate Sparse Composite Document Vector dv <- scdv::scdv(doc, k, dimension, word2vec_args = list(show_by=25))
# Calculate embedding expression by word2vec wv <- scdv::word2vec(doc, dimension, args = list(show_by=25))
# Sample row and visualize scdv::visualize(wv[sample(nrow(wv), size = 10), ]) # You can also visualize document vecotr like #scdv::visualize(dv)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.