Nothing
## ----setup, include = FALSE---------------------------------------------------
knitr::opts_chunk$set(
collapse = TRUE,
comment = "#>"
)
library("kableExtra")
## ---- message=FALSE-----------------------------------------------------------
library("LexisNexisTools")
## ---- eval=FALSE--------------------------------------------------------------
# lnt_sample()
## ---- eval=FALSE--------------------------------------------------------------
# report <- lnt_rename()
## ---- eval=FALSE--------------------------------------------------------------
# report <- lnt_rename(x = getwd(), report = TRUE)
## ---- eval=FALSE--------------------------------------------------------------
# my_files <- list.files(pattern = ".txt", path = getwd(),
# full.names = TRUE, recursive = TRUE, ignore.case = TRUE)
# report <- lnt_rename(x = my_files, report = TRUE)
#
# report
## ---- echo=FALSE--------------------------------------------------------------
library(kableExtra)
temp <- paste0(tempfile(), ".TXT")
silent <- file.copy(
from = system.file("extdata", "sample.TXT", package = "LexisNexisTools"),
to = temp,
overwrite = TRUE
)
report <- lnt_rename(x = temp, simulate = FALSE, report = TRUE, verbose = FALSE)
report$name_orig <- "sample.TXT"
newfile <- report$name_new
report$name_new <- basename(report$name_new)
kable(report, format = "markdown")
## ----eval=FALSE---------------------------------------------------------------
# LNToutput <- lnt_read(x = getwd())
## ----echo=FALSE, message=FALSE------------------------------------------------
LNToutput <- lnt_read(x = newfile)
LNToutput@meta$Source_File <- basename(LNToutput@meta$Source_File)
## ---- eval=FALSE--------------------------------------------------------------
# meta_df <- LNToutput@meta
# articles_df <- LNToutput@articles
# paragraphs_df <- LNToutput@paragraphs
#
# # Print meta to get an idea of the data
# head(meta_df, n = 3)
#
## ---- echo=FALSE--------------------------------------------------------------
meta_df <- LNToutput@meta
articles_df <- LNToutput@articles
paragraphs_df <- LNToutput@paragraphs
meta_df$Source_File <- basename(meta_df$Source_File)
# Print meta to get an idea of the data
kable(head(meta_df, n = 3), format = "markdown")
## ---- message=FALSE-----------------------------------------------------------
meta_articles_df <- lnt_convert(LNToutput, to = "data.frame")
# Or keep the paragraphs
meta_paragraphs_df <- lnt_convert(LNToutput, to = "data.frame", what = "Paragraphs")
## ----eval=FALSE---------------------------------------------------------------
# rDNA_docs <- lnt_convert(LNToutput, to = "rDNA")
#
# quanteda_corpus <- lnt_convert(LNToutput, to = "quanteda")
#
# tCorpus <- lnt_convert(LNToutput, to = "corpustools")
#
# tidy <- lnt_convert(LNToutput, to = "tidytext")
#
# Corpus <- lnt_convert(LNToutput, to = "tm")
#
# dbloc <- lnt_convert(LNToutput, to = "SQLite")
## ---- eval=FALSE--------------------------------------------------------------
# # Either provide a LNToutput
# duplicates_df <- lnt_similarity(LNToutput = LNToutput,
# threshold = 0.97)
## ---- results='hide', message=FALSE-------------------------------------------
# Or the important parts separatley
duplicates_df <- lnt_similarity(texts = LNToutput@articles$Article,
dates = LNToutput@meta$Date,
IDs = LNToutput@articles$ID,
threshold = 0.97)
## ---- eval=FALSE--------------------------------------------------------------
# lnt_diff(duplicates_df, min = 0, max = Inf)
## -----------------------------------------------------------------------------
duplicates_df <- duplicates_df[duplicates_df$rel_dist < 0.2]
LNToutput <- LNToutput[!LNToutput@meta$ID %in% duplicates_df$ID_duplicate, ]
## -----------------------------------------------------------------------------
LNToutput[1, ]
## ---- eval=FALSE--------------------------------------------------------------
# #' generate new dataframes without highly similar duplicates
# meta_df <- LNToutput@meta
# articles_df <- LNToutput@articles
# paragraphs_df <- LNToutput@paragraphs
#
# # Print e.g., meta to see how the data changed
# head(meta_df, n = 3)
## ---- echo=FALSE--------------------------------------------------------------
meta_df <- LNToutput@meta
articles_df <- LNToutput@articles
paragraphs_df <- LNToutput@paragraphs
kable(head(meta_df, n = 3), format = "markdown")
## -----------------------------------------------------------------------------
lnt_lookup(LNToutput, pattern = "statistical computing")
## -----------------------------------------------------------------------------
LNToutput@meta$stats <- lnt_lookup(LNToutput, pattern = "statistical computing")
LNToutput <- LNToutput[!sapply(LNToutput@meta$stats, is.null), ]
LNToutput
## -----------------------------------------------------------------------------
lnt_lookup(LNToutput, pattern = "stat.*?")
## -----------------------------------------------------------------------------
table(unlist(lnt_lookup(LNToutput, pattern = "stat.+?\\b")))
## ----echo = FALSE-------------------------------------------------------------
unlink("sample.TXT")
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.