library(data.table)
rs = data.table(RS.data)
#####
# Create a test code
#####
code = create.code(excerpts = rs$text, expressions = c("data","number"))
code = autocode(code, simplify=F)
code$testSet = as.matrix(data.frame(
ID = c(3476,1679,342,1719,651,359,179,784,728,3364),
X1 = c(0,0,1,0,0,0,0,0,0,1)
))
#####
# END
#####
pull.more = function() {
}
filterOut = function(x, corpus) {
filtered = lapply(x, function(y) {
list(excerpt = y, words = list(corpus[sapply(corpus$docs, function(z) {
y %in% z
}),V1]))
})
fDT = rbindlist(filtered)
fDT
}
col = c("text")
exDT = data.table(text = code$excerpts)
dw = exDT[, {
wds = strsplit(as.character(.SD[[col]]), " ")[[1]]
wds = tolower(gsub('[[:punct:]]| ', '', wds))
wds = wds[grep(x=wds, pattern="^$", invert=T)]
# browser()
wds
}, by=1:nrow(exDT), .SDcols = col]
dd3 = dw[, list(freq=.N, docs=list(.SD$nrow), seen=F), by=V1, .SDcols=c("nrow", "V1")]
setorder(dd3, -freq)
yesses = code$testSet[which(code$testSet[,2] == 1),1]
yesWords = unique(dw[(nrow %in% yesses)]$V1)
nos = code$testSet[which(code$testSet[,2] == 0),1]
noWords = unique(dw[(nrow %in% nos)]$V1)
unseens = (1:length(code$excerpts))[-code$testSet[,1]]
unseenWords = unique(dw[(nrow %in% unseens)]$V1)
topUnseen = dd3[V1 %in% newWordsFiltered,.SD[1:20]]
freqUnseen = sort(table(unlist(topUnseen$docs)), decreasing = T)[1:2]
freqInds = as.numeric(names(freqUnseen))
cat("Excerpts to include: ", freqInds, "\n")
print(code$excerpts[freqInds])
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.