knitr::opts_chunk$set(echo = TRUE)

Purpose

The purpose of this notebook is to compare the extracted entities generated by the keras model with the entites generated by regex splitting.

Import

Libraries

  if (!require(pacman)) {install.packages('pacman')}
  p_load(
    dplyr
  )

Data

The input text will be our training data hypotheses.

df_raw <- read.csv("entity_extraction_comparison.csv", stringsAsFactors = FALSE)
# df_raw$sentence

Generate Nodes

Rename column name

df_raw %>% glimpse()

Extract Nodes

df <- df_raw %>% 
  rename(
    hypothesis_training = hypothesis,
    hypothesis = hypothesis_pr
    )

df %>% glimpse()

nodes <- entity_extraction(df)

nodes

Export CSV

output <- cbind(df, nodes) %>% 
  rename(node1_model = cause, 
         node2_model = effect) %>% 
  select(file_name:node2, node1_split:node2_model) %>% 
  rename(hypothesis = hypothesis_training)

output %>%  glimpse()
write.csv(output, "training_data_w_extracted_entities.csv")


canfielder/CausalityExtraction documentation built on Jan. 5, 2022, 10:55 a.m.