knitr::opts_chunk$set(echo = TRUE)
if (!require(pacman)) {install.packages('pacman')} p_load( dplyr )
folder_path <- "./../../../inst/extdata/sample_documents/" pdf_paths <- list.files(recursive = FALSE, path = folder_path, pattern = ".pdf", full.names = TRUE) print(pdf_paths)
n = 19 input_path <- pdf_paths[n] input_path text_processed <- process_text(input_path) # inspect(text_processed.v, m = 180)
hypothesis.df <- hypothesis_extraction(text_processed, apply_model = FALSE) hypothesis.df
hypothesis <- hypothesis.df %>% dplyr::select(hypothesis) hypothesis
hypothesis <- hypothesis %>% mutate( hypothesis = gsub( pattern = "hypo (.*?):\\s*", replacement = "", x = hypothesis ) ) hypothesis
entities <- entity_extraction(hypothesis.df) entities
model_input.df <- hypothesis %>% dplyr::bind_cols(entities) %>% dplyr::mutate( row_id= dplyr::row_number() ) %>% dplyr::select(row_id, dplyr::everything()) model_input.df
pattern_punct <- "[[:punct:]]" model_input.df <- model_input.df %>% dplyr::mutate( hypothesis = stringr::str_remove_all( string = hypothesis, pattern = pattern_punct ), cause = stringr::str_remove_all( string = cause, pattern = pattern_punct ), effect = stringr::str_remove_all( string = effect, pattern = pattern_punct ) ) model_input.df
missing_tag <- "<missing>" model_input.df <- model_input.df %>% dplyr::mutate( # Replace Missing With Tag cause = dplyr::if_else( # Quiets warning to console condition = cause == "", true = missing_tag, false = cause ), effect = dplyr::if_else( condition = effect == "", true = missing_tag, false = effect ) ) model_input.df
entity_extraction = FALSE if (entity_extraction) { print("if") model_input.df_001 <- model_input.df %>% dplyr::mutate( # Replace entity with node1/2 causal_statement = dplyr::if_else( condition = cause != missing_tag, true = { stringr::str_replace( string = hypothesis, pattern = cause, replacement = "node1" )}, false = hypothesis ) ) %>% dplyr::mutate( causal_statement = dplyr::if_else( condition = effect != missing_tag, true = { stringr::str_replace( string = causal_statement, pattern = effect, replacement = "node2" )}, false = causal_statement ) ) } else { print("else") model_input.df_001 <- model_input.df %>% dplyr::mutate( causal_statement = hypothesis ) } model_input.df_001
## Remove stopwords model_input.df_002 <- model_input.df_001 %>% tidytext::unnest_tokens(word, causal_statement) %>% dplyr::anti_join( tidytext::get_stopwords(), by = "word" ) %>% dplyr::select(row_id, word)
gen_causality_direction_model_input( hypothesis.df, entity_extraction = TRUE, token_method = "stem" )
gen_causality_direction_model_input( hypothesis.df, entity_extraction = FALSE, token_method = "stem" )
gen_causality_direction_model_input( hypothesis.df, token_method = "error" )
# Causality classification causality_class <- causality_classification(hypothesis.df) causality_class <- data.frame(causality_class) causality_class
# Direction class direction_class <- direction_classification(hypothesis.df) direction_class <- data.frame(direction_class) direction_class
file_name <- basename(input_path) # Compile table iter.df <- cbind(hypothesis.df, entities) %>% tidyr::drop_na() iter.df <- cbind(iter.df, causality_class, direction_class) iter.df$file_name <- file_name iter.df
iter.df <- iter.df %>% dplyr::rename( hypothesis_num = h_id, causal_relationship = causality_pred, direction = direction_pred ) %>% dplyr::select( file_name, hypothesis_num, hypothesis, cause, effect, direction, causal_relationship ) %>% purrr::modify_if(is.factor, as.character) iter.df
iter.df %>% dplyr::mutate( effect = stringr::str_remove_all(string = effect, pattern = stringr::regex("$,")) ) iter.df
iter.df %>% dplyr::mutate( causal_relationship = dplyr::if_else( condition = ((cause == "") | (effect == "")), true = "", false = as.character(causal_relationship) ) )
iter.df <- iter.df %>% mutate( hypothesis = gsub( pattern = "hypo (.*?):\\s*", replacement = "", x = hypothesis ) ) iter.df
CausalityExtraction(file_path = input_path)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.