#' @export get_group_threads get_owner_from_threads
#' @name get_group_threads
#' @aliases get_group_threads
#' @aliases get_owner_from_threads
#' @author Caterina Ciampanelli, Anna Petrina, Livio Finos
#' @param mess a data.frame as given by \code{getMessages}
#' @title get infos from threads
#' @description \code{get_owner_from_threads} return the name of the user, \code{get_group_threads} return a TRUE/FALSE factor indicating if the chat coms from a group (TRUE) or a personal (FALSE) chat.
get_group_threads <- function(mess){
nomi_threads=strsplit(levels(mess$thread),",")
thread_gruppo=sapply(nomi_threads,length)>2
gruppo=mess$thread
levels(gruppo)=thread_gruppo
gruppo
}
get_owner_from_threads <- function(mess){
nomi_threads=strsplit(levels(factor(mess$thread)),",")
nome_utente=names(which.max(table(unlist(nomi_threads[mess$gruppo=="FALSE"]))))
nome_utente=gsub("^ ","",nome_utente)
nome_utente=gsub(" $","",nome_utente)
nome_utente
}
escludiCaratteriFinaliReplicatiPiuVolte <- function(testo,
listaCatatteri=c("a","e","i","o","u")){
for(char in listaCatatteri){
testo=gsub(paste(char,"+ ",sep=""),paste(char," ",sep=""),testo)
testo=gsub(paste(char,"+$",sep=""),char,testo)
}
testo
}
char_emoticons=c(EMOTEGOOD="\\:\\)+|\\:\\-\\)+|\\:\\]+|\\:\\-\\]+|\\=\\)+|\\=\\]+|\\=\\>|\\:\\>|\\^\\^|\\^\\_+\\^|\\^\\-\\^|\\^o\\^|\\:[[:blank:]]\\)+|[[:blank:]]\\([[:blank:]]?\\:|\\:\\'D+",
EMOTELOVE="\\<3+|<U+2764>|<U+2665>|\\:\\*+",
EMOTEBAD="\\:\\(+|\\:\\-\\(+|\\:\\[+|\\:\\-\\[+|\\=\\[+|\\=\\(+|\\:[[:blank:]]\\(|[[:blank:]]\\([[:blank:]]?\\:|\\:\\'+\\(+|\\:\\'\\[|D\\:|\\:\\-\\[|\\:\\|\\:/+|\\=/+|\\:x|\\#\\_+\\#|X\\_+X|x\\_+x|X\\.X|x\\.x|>\\.<|>\\_+<|>\\_+>|>\\.>",
EMOTEWINK="\\;\\)+|\\;\\-\\)+|\\;\\]|\\;\\-\\]|\\;\\>|;d+|;D+|;o",
EMOTESHOCK="O\\.o|o\\.o|O\\.O|o\\.O|O\\_+o|o\\_+o|O\\_+O|o\\_+O|\\:OO+|\\=O+|\\-\\.\\-|u\\.u|u\\.ù|ù\\.u|u\\_+u|çç|ç_+ç|t_+t|ù\\_+ù|ù\\.ù|\\:oo+|0\\_+0|\\=\\_+\\=|\\.\\_+\\.|òò|ò\\_+ò|\\*u+\\*|\\-\\_+\\-|ùù|\\-\\,\\-|\\-\\-\\'|\\.\\-\\.|\\'\\-\\'",
EMOTEAMAZE="\\:P+[^e]|\\:p+[^e]|\\=P+|\\=p+|XD+|xD+|xd+|[[:blank:]]d\\:|\\:P+[^e]|\\:p+[^e]|\\=P+|\\=p+|XD+|xD+|xd+|[[:blank:]]d\\:")
char_non_word=c(domanda="\\?", esclamativo="\\!", virgole_punti="(,|;|\\.)")
char_wow=c(EMOTEZZZ ="(#?zz+|#?u+ff[aif]+?|#?r+o+n+f+|#uff|ronf)",
EMOTESIII ="(#?sii+|#si+|#?yes+|#?s\uc38c\uc38c+)",
EMOTENOOO ="(#?noo+|#no+|#?nuu+)",
EMOTEAHHH ="(#?ahh+)",
EMOTEEHHH ="(#?ehh+)",
EMOTEOHOH ="(#?o?h?o+h[oh]+)",
EMOTEIHIH ="(#?i?h?i+h[ih]+)",
EMOTEUHUH ="(#?i?h?i+h[ih]+)",
EMOTEAHAH ="(#?a?h?ah[^h][ah]+)",
EMOTEEHEH ="(#?e?h?eh[^h][eh]+)",
EMOTEAZZ ="(#?azz+)",
EMOTEDAIII ="(#?[dv]aii+|#[dv]ai|forzaa+)",
cazzo ="(#?cazz[oi]+|ca\\*\\*o|c\\*\\*\\*+o)",
cazzata ="(#?cazzat[a]+)",
merda ="(#?merd[a]+)",
EMOTEAAA ="(#?aaa+)",
EMOTEOOO ="(#?ooo+)",
EMOTEEEE ="(#?eee+)",
EMOTELOL ="(#?l+o+l+|#?r+o+f+t+l+)",
EMOTESOS ="(#?aiutoo+|#?sos|#?help+)",
EMOTEBASTA ="(#ba+sta+|ba+staa+)",
EMOTEWOW ="#?([uw]+[ao]+[uw]+)")
summary.txts <- function(txts){
# txts=posts
nchar.mess=sapply(txts,nchar)
# hist(nchar.mess,xlim=c(1,100),10000)
statistics=c(number=length(txts),
total_nchar=sum(nchar.mess),
nchar_per_text=summary(nchar.mess))
# print(summary)
# quantiles.nchar=quantile(nchar.mess,c(.1,.25,.5,.75))
statistics
}
########
summary.txts_words <- function(txts,nMostFrequentWords=Inf){
posts.norm=normalizzaTesti(txts,contaStringhe = c("\\?","\\!"),normalizzaslang = TRUE,normalizzahtml = TRUE)
freqDomEsc=colSums(attributes(posts.norm)$counts)
stringhe=c(char_emoticons,char_wow,char_non_word)
#library(tm)
corpus <- tm::Corpus(tm::VectorSource(paste(posts.norm,collapse = " ")))
#[Nell'ambito dei motori di ricerca, l'espressione - scritta anche stopwords - indica quelle parole che, per la loro alta frequenza in una lingua, sono di solito ritenute poco significative dai motori, che le ignorano]
freq_emoticons <- as.matrix(tm::DocumentTermMatrix(corpus
, control = list( stemming = FALSE,
dictionary=stringhe) )
)
colnames(freq_emoticons)=names(freq_emoticons)
freq_emoticons=data.frame(freq_emoticons)
posts.norm=removeStopwords(posts.norm, c(names(stringhe),stopwords("en"),
"the","just",
"ok","va","non",
stopwords("es"),itastopwords,
"est","et"))
posts.norm=escludiCaratteriFinaliReplicatiPiuVolte(posts.norm)
posts.words=unlist(strsplit(posts.norm," "))
posts.words=posts.words[sapply(posts.words,function(x)x!="")]
tabParole=table(posts.words)
freq_links=tabParole["wwwurlwww"]
tabParole=tabParole[setdiff(names(tabParole),"wwwurlwww")]
most.word=sort(tabParole,decreasing = TRUE)
list(freq_parole=most.word[1:min(length(most.word),nMostFrequentWords)],
freq_emoticons=freq_emoticons,
freq_links =freq_links)
}
getMessages_summary_string_counts <- function(mess,stringhe=NULL){
if(is.null(stringhe)) stringhe=c(myFBr:::char_emoticons,myFBr:::char_non_word,myFBr:::char_wow)
mess$gruppo <- myFBr:::get_group_threads(mess)
# table(mess$gruppo)
nome_utente = myFBr:::get_owner_from_threads(mess)
fun_temp <- function(dati){
conteggi = plyr::llply(stringhe, function(stringa) stringr::str_count(dati$text,
stringa))
colSums(as.data.frame(conteggi))
}
list(user_grp=fun_temp(mess[(mess$user==nome_utente)&(mess$gruppo==TRUE),]),
user_personal=fun_temp(mess[(mess$user==nome_utente)&(mess$gruppo==FALSE
),]),
others_grp=fun_temp(mess[(!(mess$user==nome_utente))&(mess$gruppo==TRUE),]),
others_personal=fun_temp(mess[(!(mess$user==nome_utente))&(mess$gruppo==FALSE),]))
}
mode <- function(x) as.numeric(names(which.max(table(x))))
#' @name activity_time_summary
#' @export activity_time_summary
#' @aliases activity_time_summary
#' @title summary of activities over time
#' @description funtions which decrive the activities over time
#' @param action_time a vector of POSIXct elements refering to the time of the activities.
#' @param dataI minimum date to be considered (POSIXct format)
#' @param dataF maximum date to be considered (POSIXct format)
#'
activity_time_summary <- function(action_times,dataI=NULL,dataF=NULL){
if(length(action_times)==0) return(
c(days_off=days_off_summary(c()),
week_off=days_off_summary(c()),
acts_day=some_statistics(c()),
acts_day_0excl=some_statistics(c()),
acts_week=some_statistics(c()),
acts_week_0excl=some_statistics(c()))
)
if(is.null(dataI))
dataI=as.Date(min(action_times))
if(is.null(dataF))
dataF=as.Date(max(action_times))
giorni=as.character(seq(dataI,dataF,by=1))
giorni_mess=factor(as.Date(action_times),levels = giorni)
table_acts_per_day=table(giorni_mess)
days_off=myFBr:::days_off_summary(table_acts_per_day)
acts_day=myFBr:::some_statistics(table_acts_per_day)
acts_day_0excl=myFBr:::some_statistics(table_acts_per_day[table_acts_per_day>0])
weeks=as.character(strftime(seq(dataI,dataF,by=7),format="%W-%Y"))
week_mess=factor(strftime(action_times,format="%W-%Y") ,levels = weeks)
table_acts_per_week=table(week_mess)
week_off=myFBr:::days_off_summary(table_acts_per_week)
acts_week=myFBr:::some_statistics(table_acts_per_week)
acts_week_0excl=myFBr:::some_statistics(table_acts_per_week[table_acts_per_week>0])
c(days_off=days_off,week_off=week_off,acts_day=acts_day,
acts_day_0excl=acts_day_0excl,acts_week=acts_week,acts_week_0excl=acts_week_0excl)
}
# table(table(cumsum(table_acts_per_day)))-1
days_off_summary <- function(table_acts_per_day){
if(length(table_acts_per_day)==0) return(c(prop_inactive=NA ,some_statistics(c())))
tab=table(cumsum(table_acts_per_day))-1
giorni_inattivita =tab[tab>0]
if((names(tab)[1]==0)&(tab[1]==1))
giorni_inattivita=c(1,giorni_inattivita)
c(prop_inactive=mean(table_acts_per_day==0) ,some_statistics(giorni_inattivita))
}
summary_freq_events_week_hour<-function(mess){
temp=factor(format(mess$time, "%H"),
levels=c("00","01","02","03","04","05","06","07","08","09","10","11","12","13","14","15","16","17","18","19","20","21","22","23"))
freq_ore=prop.table(table(temp))
names(dimnames(freq_ore))[1]=""
giorni=lubridate::wday(mess$time, label=TRUE) #weekdays(mess$time)
# giorni=gsub(".$","",giorni)
levels(giorni)=c("luned","marted","mercoled","gioved","venerd","sabat","domenic")
# temp=factor(giorni,levels=c("luned","marted","mercoled","gioved","venerd","sabat","domenic"))
freq_giorni=prop.table(table(temp))
names(dimnames(freq_giorni))[1]=""
# barplot(freq_giorni[1:7])
n_days_active=length(unique(format(mess$time, "%Y-%m-%d")))
list(freq_ore=freq_ore,freq_giorni=freq_giorni,n_days_active=n_days_active)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.