Nothing
#' Yearly occurrences of top keywords/terms
#'
#' It calculates yearly occurrences of top keywords/terms.
#'
#' @param M is a data frame obtained by the converting function \code{\link{convert2df}}.
#' It is a data matrix with cases corresponding to articles and variables to Field Tag in the original WoS or SCOPUS file.
#' @param Tag is a character object. It indicates one of the keyword field tags of the
#' standard ISI WoS Field Tag codify (ID, DE, KW_Merged) or a field tag created by \code{\link{termExtraction}} function (TI_TM, AB_TM, etc.).
#' @param sep is the field separator character. This character separates strings in each keyword column of the data frame. The default is \code{sep = ";"}.
#' @param top is a numeric. It indicates the number of top keywords to analyze. The default value is 10.
#' @param cdf is a logical. If TRUE, the function calculates the cumulative occurrences distribution.
#' @param remove.terms is a character vector. It contains a list of additional terms to delete from the documents before term extraction. The default is \code{remove.terms = NULL}.
#' @param synonyms is a character vector. Each element contains a list of synonyms, separated by ";", that will be merged into a single term (the first word contained in the vector element). The default is \code{synonyms = NULL}.
#' @return an object of class \code{data.frame}
#' @examples
#'
#' data(scientometrics, package = "bibliometrixData")
#' topKW <- KeywordGrowth(scientometrics, Tag = "ID", sep = ";", top = 5, cdf = TRUE)
#' topKW
#'
#' # Plotting results
#' \dontrun{
#' install.packages("reshape2")
#' library(reshape2)
#' library(ggplot2)
#' DF <- melt(topKW, id = "Year")
#' ggplot(DF, aes(Year, value, group = variable, color = variable)) + geom_line
#' }
#'
#' @export
KeywordGrowth <- function(M, Tag = "ID", sep = ";", top = 10, cdf = TRUE, remove.terms = NULL, synonyms = NULL) {
i <- which(names(M) == Tag)
PY <- as.numeric(M$PY)
Tab <- (strsplit(as.character(M[, i]), sep))
Y <- rep(PY, lengths(Tab))
A <- data.frame(Tab = unlist(Tab), Y = Y)
A$Tab <- trim.leading(A$Tab)
A <- A[A$Tab != "", ]
A <- A[!is.na(A$Y), ]
### remove terms
terms <- data.frame(Tab = toupper(remove.terms))
A <- anti_join(A, terms)
# end of block
### Merge synonyms in the vector synonyms
if (length(synonyms) > 0 & is.character(synonyms)) {
s <- strsplit(toupper(synonyms), ";")
snew <- trimws(unlist(lapply(s, function(l) l[1])))
sold <- (lapply(s, function(l) trimws(l[-1])))
for (i in 1:length(s)) {
A <- A %>%
mutate(
# Tab = str_replace_all(Tab, paste(sold[[i]], collapse="|",sep=""),snew[i])
# Tab= str_replace_all(Tab, str_replace_all(str_replace_all(paste(sold[[i]], collapse="|",sep=""),"\\(","\\\\("),"\\)","\\\\)"),snew[i]),
Tab = stringi::stri_replace_all_regex(Tab, stringi::stri_replace_all_regex(stringi::stri_replace_all_regex(paste(sold[[i]], collapse = "|", sep = ""), "\\(", "\\\\("), "\\)", "\\\\)"), snew[i])
)
}
}
# end of block
Ymin <- min(A$Y)
Ymax <- max(A$Y)
Year <- Ymin:Ymax
Tab <- names(sort(table(A$Tab), decreasing = TRUE))[1:top]
words <- matrix(0, length(Year), top + 1)
words <- data.frame(words)
names(words) <- c("Year", Tab)
words[, 1] <- Year
for (j in 1:length(Tab)) {
word <- (table(A[A$Tab %in% Tab[j], 2]))
words[, j + 1] <- trim.years(word, Year, cdf)
}
return(words)
}
trim.years <- function(w, Year, cdf) {
Y <- as.numeric(names(w))
W <- matrix(0, length(Year), 1)
for (i in 1:length(Year)) {
if (Y[1] == Year[i] & length(Y) > 0) {
W[i, 1] <- w[1]
Y <- Y[-1]
w <- w[-1]
}
}
if (isTRUE(cdf)) W <- cumsum(W)
names(W) <- Year
W <- data.frame(W)
return(W)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.