Nothing
#' A quick way to obtain the word-occurrence data.frame from a set of documents.
#'
#' @param mywd A character variable containing the working directory.
#' @param language The language used ("French", "English", "Spanish").
#' @param excludeSW A logical to exclude stop words.
#' @return A single word-occurrrence data.frame.
#' @examples
#' \dontrun{
#' data("loremIpsum")
#' loremIpsum01 <- loremIpsum[1:100]
#' loremIpsum02 <- loremIpsum[101:200]
#' loremIpsum03 <- loremIpsum[201:300]
#' loremIpsum04 <- loremIpsum[301:400]
#' loremIpsum05 <- loremIpsum[401:500]
#' subDir <- "RESULTS"
#' dir.create(file.path(getwd(), subDir), showWarnings = FALSE)
#' write(x = loremIpsum01, file = "RESULTS/loremIpsum01.txt")
#' write(x = loremIpsum02, file = "RESULTS/loremIpsum02.txt")
#' write(x = loremIpsum03, file = "RESULTS/loremIpsum03.txt")
#' write(x = loremIpsum04, file = "RESULTS/loremIpsum04.txt")
#' write(x = loremIpsum05, file = "RESULTS/loremIpsum05.txt")
#' wordOccuDF <- getwordOccuDF(mywd = paste0(getwd(), "/RESULTS"),
#' excludeSW = FALSE)
#' file.remove(list.files(full.names = TRUE,
#' path = paste0(getwd(), "/RESULTS"), pattern = "loremIpsum"))
#' }
#' @export
getwordOccuDF <- function(mywd, language = "English", excludeSW = TRUE){
listFilesExt <- getListFiles(mywd)
if (length(listFilesExt$pdf) > 0){
wordFreqPDF <- getPDF(myPDFs = paste0(mywd, "/", listFilesExt$pdf))
} else {wordFreqPDF <- NULL}
wordFreqTXT <- getTXT(myTXTs = paste0(mywd, "/", listFilesExt$txt))
wordFreq <- append(wordFreqPDF, wordFreqTXT)
if (excludeSW == TRUE){
wordFreq <- excludeStopWords(wordF = wordFreq, lang = language)
}
wordFreq <- truncNumWords(maxWords = Inf, wordF = wordFreq)
mergedD <- mergeWordFreq(wordF = wordFreq)
return(mergedD)
}
# #' @param corA A logical to for correspondence analysis.
#' A quick way to compute a set of analysis from the word-occurrence data.frame.
#'
#' @param dataset A single word-occurrrence data.frame.
#' @param wcloud A logical to for word cloud analysis.
#' @param sumStats A logical to for summary statistics analysis.
#' @param freqW A logical to for word frequency analysis.
#' @param clust A logical to for cluster analysis.
#' @param metacom A logical to for metacommunity analysis.
#' @return A set of analyses available from the \code{inpdfr} package.
#' @examples
#' \dontrun{
#' data("wordOccuDF")
#' getAllAnalysis(dataset = wordOccuDF, wcloud = FALSE, sumStats = FALSE)
#' }
#' @export
getAllAnalysis <- function(dataset, wcloud = TRUE, sumStats = TRUE, freqW = TRUE,
clust = TRUE, metacom = TRUE){ # corA = TRUE,
if(wcloud == TRUE){
makeWordcloud(wordF = dataset, wcminFreq = 50, wcmaxWords = Inf,
wcRandOrder = FALSE, wcCol = RColorBrewer::brewer.pal(8, "Dark2"),
getPlot = c(FALSE, TRUE))
}
if(sumStats == TRUE){
getSummaryStatsBARPLOT(wordF = dataset)
getSummaryStatsHISTO(wordF = dataset)
getSummaryStatsOCCUR(wordF = dataset)
}
if(freqW == TRUE){
getMostFreqWord(wordF = dataset, numWords = 5)
getMostFreqWord(wordF = dataset, numWords = 50)
getMostFreqWord(wordF = dataset, numWords = 100)
getXFreqWord(wordF = dataset, 50)
}
# if(corA == TRUE){
# doCA(wordF = dataset)
# }
if(clust == TRUE){
doCluster(wordF = dataset, myMethod = "ward.D2", gp = FALSE, nbGp = 5)
doKmeansClust(wordF = dataset, nbClust = 4, nbIter = 10, algo = "Hartigan-Wong")
}
if(metacom == TRUE){
doMetacomEntropart(wordF = dataset)
doMetacomMetacom(wordF = dataset, numSim = 10, limit = "Inf")
}
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.