#' Politeness Features
#'
#' @description Detects linguistic markers of politeness in natural language.
#' This function is the workhorse of the \code{politeness} package, taking an N-length vector of text documents and returning an N-row data.frame of feature counts.
#' @param text character A vector of texts, each of which will be tallied for politeness features.
#' @param parser character Name of dependency parser to use (see details). Without a dependency parser, some features will be approximated, while others cannot be calculated at all.
#' @param metric character What metric to return? Raw feature count totals, Binary presence/absence of features, or feature counts per 100 words. Default is "count".
#' @param drop_blank logical Should features that were not found in any text be removed from the data.frame? Default is FALSE
#' @param uk_english logical Does the text contain any British English spelling? Including variants (e.g. Canadian). Default is FALSE
#' @param num_mc_cores integer Number of cores for parallelization. Default is 1, but we encourage users to try parallel::detectCores() if possible.
#' @details Some politeness features depend on part-of-speech tagged sentences (e.g. "bare commands" are a particular verb class).
#' To include these features in the analysis, a POS tagger must be initialized beforehand - we currently support SpaCy which must
#' be installed separately in Python (see example for implementation).
#' @return a data.frame of politeness features, with one row for every item in `text`. Possible politeness features are listed in \code{\link{feature_table}}
#' @references
#' Brown, P., & Levinson, S. C. (1987). Politeness: Some universals in language usage (Vol. 4). Cambridge university press.
#'
#' Danescu-Niculescu-Mizil, C., Sudhof, M., Jurafsky, D., Leskovec, J., & Potts, C. (2013). A computational approach to politeness with application to social factors. arXiv preprint arXiv:1306.6078.
#'
#' Voigt, R., Camp, N. P., Prabhakaran, V., Hamilton, W. L., ... & Eberhardt, J. L. (2017). Language from police body camera footage shows racial disparities in officer respect. Proceedings of the National Academy of Sciences, 201702413.
#'
#' @examples
#'
#' data("phone_offers")
#'
#' politeness(phone_offers$message, parser="none",drop_blank=FALSE)
#'
#' colMeans(politeness(phone_offers$message, parser="none", metric="binary", drop_blank=FALSE))
#' colMeans(politeness(phone_offers$message, parser="none", metric="count", drop_blank=FALSE))
#'
#' dim(politeness(phone_offers$message, parser="none",drop_blank=FALSE))
#' dim(politeness(phone_offers$message, parser="none",drop_blank=TRUE))
#'
#'\dontrun{
#' # Detect multiple cores automatically for parallel processing
#' politeness(phone_offers$message, num_mc_cores=parallel::detectCores())
#'
#' # Connect to SpaCy installation for part-of-speech features
#' install.packages("spacyr")
#' spacyr::spacy_initialize(python_executable = PYTHON_PATH)
#' politeness(phone_offers$message, parser="spacy",drop_blank=FALSE)
#'
#'}
#'
#'
#'
#'
#'@export
politeness<-function(text, parser=c("none","spacy"),
metric=c("count","binary","average"),
drop_blank=FALSE,
uk_english=FALSE,
num_mc_cores=1){
text=as.character(unlist(text))
text[is.na(text)]<-" "
if(uk_english){
text<-usWords(text)
}
text<-paste(text," ")
########################################################
# See if I can force people into spacy
########################################################
# if(parser[1]=="spacy"){
#
# }
# my_function <- function(x, y){
# tryCatch(
# #try to do this
# {
# #some expression
# },
# #if an error occurs, tell me the error
# error=function(e) {
# message('An Error Occurred')
# print(e)
# parser<-"none"
# }
# )
# }
########################################################
# Generates broad token lists for feature creation below
if(length(text)<2000){
sets<-getTokenSets(text=text,parser=parser[1],num_mc_cores=num_mc_cores)
} else{
# Batched loop to minimize memory load on SpaCy for big files
textList<-split(text, ceiling(seq_along(text)/1000))
setList<-lapply(1:length(textList),function(x) NA)
sets<-list()
tpb<-utils::txtProgressBar(0,length(textList))
for (x in 1:length(textList)){
setList[[x]]<-getTokenSets(text=textList[[x]],parser=parser[1],num_mc_cores=num_mc_cores)
utils::setTxtProgressBar(tpb,x)
}
sets[["dicts"]]<-do.call(rbind,lapply(setList,function(x) x[["dicts"]]))
.names<-names(setList[[1]])[names(setList[[1]])!="dicts"]
for(n in .names){
sets[[n]]<-do.call(c,lapply(setList,function(x) x[[n]]))
}
}
########################################################
features<-list()
features[["Hedges"]]<-textcounter(hedge_list,sets[["c.words"]],words=TRUE, num_mc_cores=num_mc_cores)
features[["Impersonal.Pronoun"]]<-sets[["dicts"]][,"Pronouns"]
features[["Swearing"]]<-sets[["dicts"]][,"Swearing"]
features[["Negation"]]<-sets[["dicts"]][,"Negation"]
features[["Filler.Pause"]]<-sets[["dicts"]][,"FilledPause"]
features[["Informal.Title"]]<-sets[["dicts"]][,"InformalTitle"]
features[["Formal.Title"]]<-sets[["dicts"]][,"FormalTitle"]
# Titles - "boss" vs "my boss", "son" vs "our son"
features[["Could.You"]]<-textcounter(c("could you","would you","might you"),sets[["clean"]], num_mc_cores=num_mc_cores)
features[["Can.You"]]<-textcounter(c("can you","will you"),sets[["clean"]], num_mc_cores=num_mc_cores)
features[["By.The.Way"]]<-textcounter(c("by the way"),sets[["clean"]], num_mc_cores=num_mc_cores)
features[["Let.Me.Know"]]<-textcounter(c("let me know"),sets[["clean"]], num_mc_cores=num_mc_cores)
features[["Goodbye"]]<-textcounter(c("goodbye", " bye ", "see you later"),sets[["clean"]], num_mc_cores=num_mc_cores)
features[["For.Me"]]<-textcounter(c(" for me ","for us "),sets[["clean"]], num_mc_cores=num_mc_cores)
features[["For.You"]]<-textcounter(" for you ",sets[["clean"]], num_mc_cores=num_mc_cores)
features[["Reasoning"]]<-textcounter(c("reason", "why i ", "why we ", "explain", "caused","because"),sets[["clean"]],
num_mc_cores=num_mc_cores)
features[["Contrast.Conjunction"]]<-textcounter(c("but","however","instead",
"although","even though",
"despite","and yet",
"nevertheless","nonetheless"),sets[["clean"]],
num_mc_cores=num_mc_cores)
features[["Reassurance"]]<-textcounter(c("is okay", "not worry", "no big deal", "not a big deal", "no problem",
"no worries", "is fine", "you are good", "it's fine", "it's okay") ,sets[["clean"]],
num_mc_cores=num_mc_cores)
features[["Ask.Agency"]]<-textcounter(c("do me a favor", " let me ", " allow me ", " can i ", " should i ",
" may i ", "might i ", " could i "),sets[["clean"]],
num_mc_cores=num_mc_cores)
features[["Give.Agency"]]<-textcounter(c("let you", "allow you", "you can ", " you may ", " you could "),sets[["clean"]],
num_mc_cores=num_mc_cores)
features[["Hello"]]<-(textcounter(c("hi","hello","hey","greetings","howdy"),sets[["c.words"]],words=TRUE,
num_mc_cores=num_mc_cores)+
textcounter(c("good morning", "good evening", "good afternoon"),sets[["clean"]],
num_mc_cores=num_mc_cores))
features[["Please"]]<-textcounter("please",sets[["c.words"]],words=TRUE,
num_mc_cores=num_mc_cores)
features[["First.Person.Plural"]]<-textcounter(c("we", "us", "our", "ours", "ourselves"),sets[["c.words"]],words=TRUE,
num_mc_cores=num_mc_cores)
features[["First.Person.Single"]]<-textcounter(c("i","me","my","mine","myself"),sets[["c.words"]],words=TRUE,
num_mc_cores=num_mc_cores)
features[["Second.Person"]]<-textcounter(c("you","your","yours","yourself", "yourselves","y'all"),sets[["c.words"]],words=TRUE,
num_mc_cores=num_mc_cores)
features[["Third.Person"]]<-textcounter(c("he","him","his","himself",
"she","her","hers","herself",
"they","them","their","theirs","themselves"),
sets[["c.words"]],words=TRUE,
num_mc_cores=num_mc_cores)
features[["Questions"]]<-textcounter("?",text, num_mc_cores=num_mc_cores)
#if(parser[1]=="none"){
if(parser[1]!="spacy"){
if(length(parser)==2){
cat("Warning: Please install and initialize SpaCy, using the spacyr R package. This is an INCOMPLETE version of the package.")
}
features[["Positive.Emotion"]]<-textcounter(positive_list,sets[["c.words"]],words=TRUE, num_mc_cores=num_mc_cores)
features[["Negative.Emotion"]]<-textcounter(negative_list,sets[["c.words"]],words=TRUE, num_mc_cores=num_mc_cores)
features[["Gratitude"]]<-(unlist(lapply(sets[["c.words"]], function(x) sum(startsWith(unlist(x), prefix="thank"))))+
unlist(lapply(sets[["c.words"]], function(x) sum(startsWith(unlist(x), prefix="grateful"))))+
unlist(lapply(sets[["c.words"]], function(x) sum(startsWith(unlist(x), prefix="gratitude")))))
features[["Apology"]]<-textcounter(c("sorry"," woops","oops","whoops","apologize"),sets[["c.words"]],words=TRUE,
num_mc_cores=num_mc_cores)
features[["Truth.Intensifier"]]<-(textcounter(c("really", "actually", "honestly", "surely"),sets[["c.words"]],words=TRUE,
num_mc_cores=num_mc_cores)+
textcounter(c("in fact"),sets[["clean"]],num_mc_cores=num_mc_cores))
} else {
features[["Positive.Emotion"]]<-(textcounter(positive_list,sets[["unneg.words"]],words=TRUE, num_mc_cores=num_mc_cores)
+textcounter(negative_list,sets[["neg.words"]],words=TRUE, num_mc_cores=num_mc_cores))
features[["Negative.Emotion"]]<-(textcounter(positive_list,sets[["neg.words"]],words=TRUE, num_mc_cores=num_mc_cores)
+textcounter(negative_list,sets[["unneg.words"]],words=TRUE, num_mc_cores=num_mc_cores))
features[["Agreement"]]<-(unlist(lapply(sets[["p.unnegs"]],
function(x) sum(textcounter(c("nsubj(agree, i)","nsubj(concur, i)",
"nsubj(agree, we)","nsubj(concur, we)",
"acomp('re, right)","acomp(are, right)",
"acomp('re, correct)","acomp(are, correct)",
"acomp('s, right)","acomp(is, right)",
"acomp('s, correct)","acomp(is, correct)",
"acomp('s, true)","acomp(is, true)"),x, words=TRUE,
num_mc_cores=num_mc_cores))))+
textcounter(apply(expand.grid(c("good","great","excellent","brilliant","fair","amazing"),
c("idea", "point","suggestion")),1,paste, collapse=" "),
sets[["clean"]],num_mc_cores=num_mc_cores))
features[["Disagreement"]]<-(unlist(lapply(sets[["p.negs"]],
function(x) sum(textcounter(c("nsubj(agree, i)","nsubj(concur, i)",
"nsubj(agree, we)","nsubj(concur, we)",
"nsubj(think, we)","nsubj(think, i)",
"nsubj(believe, we)","nsubj(believe, i)",
"acomp('re, right)","acomp(are, right)",
"acomp('re, correct)","acomp(are, correct)",
"acomp('s, right)","acomp(is, right)",
"acomp('s, correct)","acomp(is, correct)",
"acomp('s, true)","acomp(is, true)"),x, words=TRUE,
num_mc_cores=num_mc_cores))))
+unlist(lapply(sets[["p.unnegs"]],
function(x) sum(textcounter(c("nsubj(disagree, i)","nsubj(object, i)",
"nsubj(disagree, we)","nsubj(object, we)",
"acomp('re, wrong)","acomp(are, wrong)",
"acomp('re, incorrect)","acomp(are, incorrect)",
"acomp('s, wrong)","acomp(is, wrong)",
"acomp('s, incorrect)","acomp(is, incorrect)",
"acomp('s, untrue)","acomp(is, untrue)",
"det(lie, a)","det(myth, a)",
"acomp('s, false)","acomp(is, false)"),x, words=TRUE,
num_mc_cores=num_mc_cores))))
+textcounter(apply(expand.grid(c("bad","terrible","horrible","awful","dumb","stupid"),
c("idea", "point","suggestion")),1,paste, collapse=" "),
sets[["clean"]],num_mc_cores=num_mc_cores)
)
features[["Acknowledgement"]]<-unlist(lapply(sets[["p.unnegs"]],function(x) sum(textcounter(c("nsubj(understand, i)","nsubj(see, i)","nsubj(acknowledge, i)",
"nsubj(hear, i)","nsubj(get, i)","dobj(take, point)",
"nsubj(understand, we)","nsubj(see, we)","nsubj(acknowledge, we)",
"nsubj(hear, we)","nsubj(get, we)"),x, words=TRUE,
num_mc_cores=num_mc_cores))))
features[["Subjectivity"]]<-unlist(lapply(sets[["p.nonum"]],function(x) sum(textcounter(c("nsubj(think, i)","nsubj(believe, i)","nsubj(suspect, i)",
"nsubj(thought, i)","nsubj(felt, i)",
"nsubj(presume, i)","nsubj(reckon, i)","nsubj(feel, i)",
"poss(perspective, my)","poss(belief, my)","poss(view, my)",
"poss(knowledge, my)","poss(opinion, my)",
"nsubj(think, we)","nsubj(believe, we)","nsubj(suspect, we)",
"nsubj(thought, we)","nsubj(felt, we)",
"nsubj(presume, we)","nsubj(reckon, we)","nsubj(feel, we)",
"poss(perspective, our)","poss(belief, our)","poss(view, our)",
"poss(knowledge, our)","poss(opinion, our)"),x, words=TRUE,
num_mc_cores=num_mc_cores))))
features[["Bare.Command"]]<-unlist(lapply(sets[["pos.nums"]],function(x) sum(grepl("(1-",unlist(x),fixed=TRUE)
&grepl("-vb)",unlist(x),fixed=TRUE)
&(!grepl(paste0("-be-"),unlist(x)))
&(!grepl(paste0("-do-"),unlist(x)))
&(!grepl(paste0("-have-"),unlist(x)))
&(!grepl(paste0("-hope-"),unlist(x)))
&(!grepl(paste0("-excuse-"),unlist(x)))
&(!grepl(paste0("-thank-"),unlist(x)))
&(!grepl(paste0("-please-"),unlist(x)))
&(!grepl(paste0("-hang-"),unlist(x)))
&(!grepl(paste0("-let-"),unlist(x)))
)))
# what about when the first word is a negation, e.g. "don't go there"
######### QUESTION TYPES #########
q.words<-c("who","what","where","when","why","how","which") # now in spacyParser
features[["WH.Questions"]]<-(unlist(lapply(sets[["ques.pre.root"]],
function(x) sum(textcounter(q.words,x))))
)
features[["Repair.Questions"]]<-textcounter(c("punct(who, ?)","punct(what, ?)","punct(pardon, ?)","punct(sorry, ?)",
"punct(huh, ?)","punct(hm, ?)"),
sets[["p.nonum"]], words=TRUE,
num_mc_cores=num_mc_cores)
# Original list "What?","Sorry?","Excuse me?","Huh?","Who?","Pardon?","Say again?","Say it again?","What's that?","What is that?") # "what did you say?"
features[["Tag.Questions"]]<-unlist(lapply(text,
function(x) sum(textcounter(c("right?","you know?","eh?",
# "what do you think?",
"do you?","don't you?"),x,
num_mc_cores=num_mc_cores))))
# false positive WH: "Given what has happened, do you know enough to decide?"
features[["YesNo.Questions"]]<-(features[["Questions"]]
-features[["WH.Questions"]]
-features[["Repair.Questions"]]
-features[["Tag.Questions"]]
)
features[["Questions"]]<-NULL
#########################################
features[["Gratitude"]]<-(unlist(lapply(sets[["c.words"]], function(x) sum(startsWith(unlist(x), prefix="thank"))))+
unlist(lapply(sets[["c.words"]], function(x) sum(startsWith(unlist(x), prefix="grateful"))))+
unlist(lapply(sets[["c.words"]], function(x) sum(startsWith(unlist(x), prefix="gratitude"))))+
unlist(lapply(sets[["p.unnegs"]], function(x) sum(grepl("(appreciate, we)",x,fixed=TRUE))))+
unlist(lapply(sets[["p.unnegs"]], function(x) sum(grepl("(appreciate, i)",x,fixed=TRUE)))))
features[["Apology"]]<-(textcounter(c("woops","oops","whoops"),sets[["unneg.words"]],words=TRUE,
num_mc_cores=num_mc_cores)
+textcounter(c("acomp(am, sorry)","acomp('m, sorry)",
"root(root, sorry)",
"nsubj(apologize, we)","nsubj(apologize, i)",
"nsubj(regret, i)", "nsubj(regret, we)",
"dobj(excuse, me)",
"poss(forgiveness, your)",
"dobj(forgive, me)"),sets[["p.unnegs"]], words=TRUE,
num_mc_cores=num_mc_cores)
+(textcounter("nsubj(are, we)",sets[["p.unnegs"]],words=TRUE)&textcounter("acomp(are, sorry)",sets[["p.unnegs"]],words=TRUE))
+(textcounter("nsubj('re, we)",sets[["p.unnegs"]],words=TRUE)&textcounter("acomp('re, sorry)",sets[["p.unnegs"]],words=TRUE))
+(textcounter("xcomp(want, apologize)",sets[["self.unnegs"]]))
+(textcounter("xcomp(have, apologize)",sets[["self.unnegs"]]))
+(textcounter("xcomp(need, apologize)",sets[["self.unnegs"]]))
+(textcounter("xcomp(like, apologize)",sets[["self.unnegs"]]))
+(textcounter("intj(say, sorry)",sets[["self.unnegs"]]))
)
features[["Truth.Intensifier"]]<-(textcounter(c("really", "actually", "honestly", "surely"),sets[["c.words"]],words=TRUE,
num_mc_cores=num_mc_cores)
+textcounter(c("det(point, the)","det(reality, the)","det(truth, the)","pobj(fact, in)","case(fact, in)"),sets[["p.nonum"]], words=TRUE,
num_mc_cores=num_mc_cores))
features[["Adverb.Limiter"]]<-unlist(lapply(sets[["p.nonum"]] ,function(x) sum(grepl("advmod",unlist(x))&
(grepl("just)",unlist(x),fixed=TRUE)
|grepl("only)",unlist(x),fixed=TRUE)
|grepl("merely)",unlist(x),fixed=TRUE)
|grepl("simply)",unlist(x),fixed=TRUE)))
))
}
features[["Affirmation"]]<-textcounter(c("yeah","yes","ok","okay","perfect","fine","wow","great","amazing"),
sets[["c.words"]],words=TRUE,start=TRUE,
num_mc_cores=num_mc_cores)
# listed at the end for backward compatibility
features[["Conjunction.Start"]]<-textcounter(c("so","then","and","but","or","however"),
sets[["c.words"]],words=TRUE,start=TRUE,
num_mc_cores=num_mc_cores)
if(metric[1]=="binary"){
features<-parallel::mclapply(features, function(x) 1*(x>0), mc.cores=num_mc_cores)
} else if (metric[1]=="average"){
word_counts <- stringr::str_count(text, "[[:alpha:]]+")
features<-parallel::mclapply(features, function(x) x*100/word_counts, mc.cores=num_mc_cores)
}
feature.data<-as.data.frame(features)
feature.data[feature.data<0]<-0
if(drop_blank){
feature.data<-feature.data[,colMeans(feature.data,na.rm=TRUE)!=0, drop=FALSE]
}
return(feature.data)
}
###############################################################
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.