Nothing
#' @title map_word_to_step
#' @param wordsMatrix The word matrix generated by the function TermDocumentMatrix in the tm package.
#' @param stepsMatrix Manually constructed metabolic process matrix.
#' @return Specific metabolic process matrix based on text content mapping.
#' @import stringr
#' @export
#' @examples
#' \donttest{matrixProcess <- map_word_to_step(wordsMatrix, stepsMatrix)}
map_word_to_step <- function(wordsMatrix, stepsMatrix){
stepsMatrix[is.na(stepsMatrix)] <- 0
# Extract the frequently used, longest and shortest words in the vocabulary matrix and de-duplicate
allwords <- c(wordsMatrix$prevalent, wordsMatrix$longest, wordsMatrix$shortest) %>% unique()
# Record the location information of each word
freq <- rep(wordsMatrix$freq, 3)
# The mapping score is obtained based on whether the marker words of each metabolic process is mapped to the text vocabulary matrix
score <- lapply(stepsMatrix$MarkerWords, function(x){
# Marker words for each process
words <- str_split(x,';') %>% unlist()
total <- 0
# Statistical mapping results for each tagged vocabulary
for (i in words) {
result <- sum(grepl(i, allwords, ignore.case = T))
if(result != 0){
result = 1
} else {
result = 0
}
total = total + result
}
return(total/length(words))
}) %>% unlist()
# Calculate the use of each metabolic process in the text based on the frequency of the text vocabulary
degree <- lapply(stepsMatrix$MarkerWords, function(x){
words <- str_split(x,';') %>% unlist()
total <- c()
for (i in words) {
# Obtain the minimum frequency after mapping each tagging vocabulary
result <- min(freq[grepl(i, allwords, ignore.case = T)])
total <- c(total, result)
# NA is forced to convert to inf, and inf is converted to 0
total[is.infinite(total)] = 0
total <- sum(total)
}
return(total)
}) %>% unlist()
# Extract the metabolic processes and frequencies with mapping scores greater than a preset threshold
stepsMatrix[!score >= stepsMatrix$ThresholdValue,-c(1:5)] <- 0
degree[!score >= stepsMatrix$ThresholdValue] = 0
# Remove unwanted information: tagging vocabulary information, threshold information, grouping information
matrix <- stepsMatrix[,-c(1,2,4)]
matrix <- data.frame(matrix)
rownames(matrix) <- matrix$Steps
matrix <- as.data.frame(matrix)
matrix <- matrix[,-1]
matrix$degree <- degree
matrix <- matrix[,c(ncol(matrix), 1:(ncol(matrix)-1))]
return(matrix)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.