rm(list=ls())
setwd("J:/Jeugdcriminaliteit 3/2849b Pilot textmining/Data jaar/stap7-ontwikkelenmachinelearning/deel2-featureconstructie/frog2features/data-raw")
require(XML);require(data.table);require(magrittr);require(tydr);require(igraph); require(stringi)
# ----------------------------------------------------------------------------------------------------
# Lexical entries
# ----------------------------------------------------------------------------------------------------
load(file="../data/lemma2semantic.dutch.rda")
load(file="../data/semantic2semantic.dutch.rda")
setwd("J:/Jeugdcriminaliteit 3/2849b Pilot textmining/Data jaar/stap7-ontwikkelenmachinelearning/deel2-featureconstructie/frog2features/data-raw")
source("../R/functionsConvenienceR.R" )
source("graph.functions.R" )
lemma2semantic[majorpos == "a", majorpos := "ADJ"]
lemma2semantic[majorpos == "n", majorpos := "N"]
lemma2semantic[majorpos == "v", majorpos := "WW"]
create <- F
# Create an edgelist
edges <- semantic2semantic[, .(sourceid, targetid,relType)]
edges <- edges[relType == "hypernym"]
# Filter the edgelist; all edge names should occur in the lemma2semantics database:
edges <- edges[sourceid %chin% lemma2semantic$synset,]
edges <- edges[targetid %chin% lemma2semantic$synset,]
# Get all lemma instances of the semantic set and concatenate them:
if(create == T){
synsets.dutch.collapsed <- lemma2semantic[,
instances := "[" %+% lapply(lemma, function(x){
paste0(.SD[, lemma], collapse=", ")}) %+% "]" , by = "synset"
]
save(synsets.dutch.collapsed, file = "synset.dutch.collapsed.Rda")
} else {
load(file = "synset.dutch.collapsed.Rda")
}
graph.semantic.dutch <- graph.data.frame(edges)
# Merge instances
instances <- merge(
x = {data.table(synset = V(graph.semantic.dutch)$name)},
y = {synsets.dutch.collapsed[, .(synset, instances )]} %>% unique,
by = c("synset"),
all.x = T,
all.y = F, sort=F
)
# Fix the sources
edges <- merge(edges, instances, by.x="sourceid",by.y=c("synset"))
edges <- edges[, .(sourceid, targetid, relType, sourceinstance = instances)]
edges <- merge(edges, instances, by.x="targetid",by.y=c("synset"))
edges <- edges[, .(sourceid, targetid, relType, sourceinstance, targetinstance = instances)]
# Rebuild graph:
graph.semantic.dutch <- graph.data.frame(
d = edges[, .(sourceid, targetid)]
)
instances <- merge(
x = {data.table(synset = V(graph.semantic.dutch)$name)},
y = {synsets.dutch.collapsed[, .(synset, instances )]} %>% unique,
by = c("synset"),
all.x = T,
all.y = F, sort=F
)
V(graph.semantic.dutch)$instances <- instances$instances
V(graph.semantic.dutch)$outdegree <- degree(graph.semantic.dutch, mode = "out")
V(graph.semantic.dutch)$indegree <- degree(graph.semantic.dutch, mode = "in")
ego.graphs.o5 <- make_ego_graph(graph.semantic.dutch, order = 5, mode = "out")
ego.graphs.o2 <- make_ego_graph(graph.semantic.dutch, order = 2, mode = "out")
semantic.properties.dutch <- data.table(
synset = V(graph.semantic.dutch)$name,
instances = V(graph.semantic.dutch)$instances,
outdegree = V(graph.semantic.dutch)$outdegree,
neighb.density = get.neighborhood.density(ego.graphs = ego.graphs.o5),
neighb.transitivity = get.neighborhood.transitivity(ego.graphs = ego.graphs.o5),
neighb.diameter = get.neighborhood.diameter(ego.graphs = ego.graphs.o5),
neighb.average.indegree = get.neighborhood.aggregate(ego.graphs = ego.graphs.o2, attribute.name = "indegree")
)
semantic.properties.dutch[1:100]
voorbeeld <- voorbeeld_outdeg2
plot(egograph, layout= layout_as_tree(egograph, flip.y = F),
vertex.label = V(egograph)$instances,
edge.arrow.size =.4,
vertex.label.cex = .6)
edges[relType == "hyponym", .(instances.x, instances.y)][instances.x %like% "kopen",]
edges.hyperonym <- edges[relType == "holonym",][, uniqueX := .N, by = c("instances.x")][uniqueX == 1]
V(graph.hyperonyms.dutch)$instances <- instances$instances
# Subgraph:
g.ego <- graph.hyperonyms.dutch %>%
make_ego_graph(.,order = 2,
nodes = V(.)[V(.)$instances == "[onderdeel]"],
mode = "out")
plot(g.ego[[1]], vertex.label = V(g.ego[[1]])$instances)
save(graph.hyperonyms.dutch, file="graph.hyperonyms.dutch.Rda")
subgraph <- graph.hyperonyms.dutch
V(graph.hypernym)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.