Nothing
# #################################################
# Function for generating a frequency list of words or other (linguistic)
# features. It basically counts the elements of a vector and returns a vector
# of these elements in descending order of frequency.
# Refer to help(make.frequency.list) for farther details.
# #################################################
make.frequency.list.optimized = function(data,
value = FALSE,
head = NULL,
relative = TRUE) {
#####################################
# first, sanitize the input dataset
# test if the dataset belongs to 'stylo.corpus' class
if(inherits(data, "stylo.corpus") | is.list(data) == TRUE) {
# unlist, or make one long text out of the corpus samples
# data = unlist(data)
# otherwise, test if the dataset is a vector
} else if(is.vector(data) == FALSE) {
# whet it is not, produce an error message and stop
stop("unable to make a list of frequencies")
}
# test if the dataset has at least two elements
if(length(data) < 3) {
stop("you try to measure frequencies of an empty vector!")
}
#####################################
aggregate.values = c()
aggregate.length = 0
for(k in 1: length(data)) {
#for(k in 1: 3) {
message(sprintf("%7s", k), appendLF = FALSE)
# cat(k)#round(k/length(data), 3))
current.values = sort(table(data[[k]]), decreasing = TRUE)
if(length(current.values) > 10000) {
current.values = current.values[1:10000]
}
current.length = length(data[[k]])
current.names = names(current.values)
# for the first time, there is nothing to combine
if(k == 1) {
aggregate.values = current.values
}
aggregate.names = unique(c(current.names, names(aggregate.values)))
aggregate.length = aggregate.length + current.length
tmp.values1 = current.values[aggregate.names] / current.length
tmp.values2 = aggregate.values[aggregate.names] / aggregate.length
names(tmp.values1) = aggregate.names
names(tmp.values2) = aggregate.names
tmp.values = rbind(tmp.values1, tmp.values2)
tmp.values[which(is.na(tmp.values))] = 0
aggregate.values = sort(colMeans(tmp.values), decreasing = TRUE)
if(length(aggregate.values) > 10000) {
aggregate.values = aggregate.values[1:10000]
}
message("\b\b\b\b\b\b\b", appendLF = FALSE)
}
message(" ")
frequent.features = aggregate.values[1:5000]
# #####################################
# # the dataset sanitized, let counting the features begin!
# frequent.features = sort(table(data), decreasing = TRUE)
# #####################################
#
#
# # if relative frequencies were requested, they are normalized
# if(relative == TRUE) {
# frequent.features = frequent.features / length(data) * 100
# }
#
# # additionally, one might limit the number of the most frequent features;
# # this will return first n elements only (this is the argument 'head')
# if(is.numeric(head) == TRUE) {
# # sanitizing the indicated number
# head = abs(round(head))
# if(head == 0) head = 1
# # cutting off the list
# frequent.features = frequent.features[1:head]
# }
#
# # in most cases, one needs just a list of features, without frequencies
if(value == FALSE) {
frequent.features = names(frequent.features)
}
return(frequent.features)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.