Nothing
## ---- include=FALSE-----------------------------------------------------------
options(digits=3)
library(knitr)
## ---- message=FALSE, warning=FALSE, echo=TRUE---------------------------------
library(quanteda)
d = data.frame(id = c(1,2,3),
text = c('Socrates is human', 'Humans are mortal', 'Therefore, Socrates is mortal'),
author = c('Aristotle','Aristotle','Aristotle'),
stringsAsFactors = F)
corp = corpus(d, docid_field = 'id', text_field='text')
dtm = dfm(tokens(corp))
dtm
docvars(dtm)
## ---- message=FALSE, warning=FALSE, echo=TRUE---------------------------------
library(RNewsflow)
dtm = rnewsflow_dfm ## copy the demo data
dtm
## -----------------------------------------------------------------------------
head(docvars(dtm), 3)
## -----------------------------------------------------------------------------
tdd = term_day_dist(dtm)
tail(tdd, 3)
## -----------------------------------------------------------------------------
select_terms = tdd$term[tdd$days.entropy.norm <= 0.1]
dtm = dtm[,select_terms]
## -----------------------------------------------------------------------------
dtm = quanteda::dfm_tfidf(dtm)
## ----results='hide', message=FALSE, warning=FALSE-----------------------------
g = newsflow_compare(dtm, date_var='date',
hour_window = c(0,36),
min_similarity = 0.4)
## -----------------------------------------------------------------------------
vertex_sourcetype = V(g)$sourcetype
edge_hourdiff = E(g)$hourdiff
head(vertex_sourcetype)
head(edge_hourdiff)
## ---- fig.show='hold'---------------------------------------------------------
v = as_data_frame(g, 'vertices')
e = as_data_frame(g, 'edges')
head(v[,c('name','date','source','sourcetype')],3)
head(e,3)
## ---- fig.width = 7, fig.height = 3-------------------------------------------
hist(E(g)$hourdiff, main='Time distance of document pairs',
xlab = 'Time difference in hours', breaks = 150, right=F)
## -----------------------------------------------------------------------------
# set window for all vertices
g = filter_window(g, hour_window = c(0.1, 36))
# set window for print newspapers
g = filter_window(g, hour_window = c(6, 36),
to_vertices = V(g)$sourcetype == 'Print NP')
## -----------------------------------------------------------------------------
show_window(g, to_attribute = 'source')
## -----------------------------------------------------------------------------
g_subcomps = decompose(g)
## ---- fig.width = 7, fig.height = 3-------------------------------------------
gs = g_subcomps[[20]] # select the second sub-component
document_network_plot(gs)
## ---- fig.width = 7, fig.height = 4-------------------------------------------
document_network_plot(gs, source_attribute = 'sourcetype',
dtm=dtm)
## ---- fig.width = 7, fig.height = 3-------------------------------------------
gs_onlyfirst = only_first_match(gs)
document_network_plot(gs_onlyfirst)
## -----------------------------------------------------------------------------
g_agg = network_aggregate(g, by='source',
edge_attribute='hourdiff',
agg_FUN=median)
e = as_data_frame(g_agg, 'edges')
head(e)
## -----------------------------------------------------------------------------
adj_m = as_adjacency_matrix(g_agg, attr= 'to.Vprop', sparse = FALSE)
round(adj_m, 2) # round on 2 decimals
## ---- fig.align='center', fig.width=7, fig.height=5---------------------------
directed_network_plot(g_agg, weight_var = 'to.Vprop',
weight_thres = 0.2)
## ---- fig.align='center', fig.width=7, fig.height=5---------------------------
g2 = only_first_match(g)
g2_agg = network_aggregate(g2, by='source',
edge_attribute='hourdiff',
agg_FUN=median)
directed_network_plot(g2_agg, weight_var = 'to.Vprop',
weight_thres = 0.2)
## -----------------------------------------------------------------------------
V(g)$day = format(as.Date(V(g)$date), '%Y-%m-%d')
agg_perday = network_aggregate(g,
by_from='sourcetype', by_to=c('sourcetype', 'day'),
edge_attribute='hourdiff', agg_FUN=median,
return_df=TRUE)
head(agg_perday[agg_perday$to.sourcetype == 'Online NP',
c('from.sourcetype', 'to.sourcetype', 'to.day','to.Vprop')])
## -----------------------------------------------------------------------------
agg_perdoc = network_aggregate(g,
by_from='name', by_to='sourcetype',
edge_attribute='weight', agg_FUN=max,
return_df=TRUE)
docXsource = xtabs(agg.weight ~ from.name + to.sourcetype,
agg_perdoc, sparse = FALSE)
head(docXsource)
## ----eval=FALSE---------------------------------------------------------------
# library(RNewsflow)
# library(quanteda)
#
# # Prepare DTM
# dtm = rnewsflow_dfm ## copy demo data
#
# tdd = term_day_dist(dtm)
# dtm = dtm[,tdd$term[tdd$days.entropy.norm <= 0.1]]
# dtm = dfm_tfidf(dtm)
#
# # Prepare document similarity network
# g = newsflow_compare(dtm, hour_window = c(-0.1,60),
# min_similarity = 0.4)
# g = filter_window(g, hour_window = c(6, 36),
# to_vertices = V(g)$sourcetype == 'Print NP')
# show_window(g, to_attribute = 'source')
#
# g_subcomps = decompose(g)
# document_network_plot(g_subcomps[[55]], dtm=dtm)
#
# g_agg = network_aggregate(g, by='source',
# edge_attribute='hourdiff', agg_FUN=median)
#
# as_adjacency_matrix(g_agg, attr='to.Vprop')
# directed_network_plot(g_agg, weight_var = 'to.Vprop',
# weight_thres=0.2)
#
# g2 = only_first_match(g)
# g2_agg = network_aggregate(g2, by='source',
# edge_attribute='hourdiff', agg_FUN=median)
#
# as_adjacency_matrix(g2_agg, attr='to.Vprop')
# directed_network_plot(g2_agg, weight_var = 'to.Vprop',
# weight_thres=0.2)
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.