demo/backup/grep_cTesting.R

bigfile = read.csv("~/Downloads/primary_debates_cleaned_separated.csv", stringsAsFactors = F)

words = c("education","school", "learn", "children")

education.code = create.code(name = "Education", definition = "Test education code", excerpts = bigfile$Text, expressions = words)

education.code = autocode(x = education.code, simplify=F)
education.df = autocode(x = education.code, simplify=T)

education.code = handcode(code = education.code, n = 4)

rOne = function(expressions.to.use, excerpts) {
  unlist(lapply(excerpts, function(x) {
    any(sapply(expressions.to.use, function(c) {
      grepl(pattern = c, x = x, perl=T, ignore.case=T)
    })) * 1
  }))
}
rTwo = function(expressions.to.use, excerpts) {
  (rowSums(data.frame(lapply(expressions.to.use, grepl, excerpts, perl = T, ignore.case = T)) * 1) >= 1) * 1
}
rThree = function(expressions.to.use, excerpts) {
  grepl(concat.expr, excerpts, ignore.case = T, perl = T)
}
# cOne = function(expressions.to.use, excerpts) {
#   grep_c(expressions.to.use, excerpts)
# }

# microbenchmark::microbenchmark(
#   r = autocode(x = education.code, simplify=F),
#   c = autocode(x = education.code, simplify=F, use_c = T)
# )
concat.expr = paste(education.code$expressions, collapse="|")
microbenchmark::microbenchmark(
  r1 = rOne(education.code$expressions, education.code$excerpts),
  r2 = rTwo(education.code$expressions, education.code$excerpts),
  r3 = rThree(education.code$expressions, education.code$excerpts),
  # c1 = cOne(education.code$expressions, education.code$excerpts),
  times = 10
)

expressions.to.use = education.code$expressions
excerpts = education.code$excerpts
codes = (rowSums(data.frame(lapply(expressions.to.use, grepl, excerpts, perl = T, ignore.case = T)) * 1) >= 1) * 1
epistemic-analytics/ncodeR documentation built on June 15, 2019, 12:03 a.m.