library(data.table) rs = data.table(RS.data)
pull.more = function() {
}
code = create.code(excerpts = rs$text, expressions = c("data","number")) code = autocode(code, simplify=F) code$testSet = as.matrix(data.frame( ID = c(3476,1679,342,1719,651,359,179,784,728,3364), X1 = c(0,0,1,0,0,0,0,0,0,1) ))
col = c("text") exDT = data.table(text = code$excerpts) dw = exDT[, { wds = strsplit(as.character(.SD[[col]]), " ")[[1]] wds = tolower(gsub('[[:punct:]]| ', '', wds)) wds = wds[grep(x=wds, pattern="^$", invert=T)] # browser() wds }, by=1:nrow(exDT), .SDcols = col]
dd3 = dw[, list(freq=.N, docs=list(.SD$nrow), seen=F), by=V1, .SDcols=c("nrow", "V1")] setorder(dd3, -freq)
filterOut = function(x, corpus) {
browser()
filtered = lapply(x, function(y) {
list(excerpt = y, words = list(corpus[sapply(corpus$docs, function(z) {
y %in% z
}),V1]))
})
fDT = rbindlist(filtered)
fDT
}
yesses = code$testSet[which(code$testSet[,2] == 1),1] yesWords = unique(dw[(nrow %in% yesses)]$V1) nos = code$testSet[which(code$testSet[,2] == 0),1] noWords = unique(dw[(nrow %in% nos)]$V1)
unseens = (1:length(code$excerpts))[-code$testSet[,1]] unseenWords = unique(dw[(nrow %in% unseens)]$V1)
topUnseen = dd3[V1 %in% newWordsFiltered,.SD[1:20]] freqUnseen = sort(table(unlist(topUnseen$docs)), decreasing = T)[1:2] freqInds = as.numeric(names(freqUnseen))
cat("Excerpts to include: ", freqInds, "\n")
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.