knitr::opts_chunk$set(echo = TRUE)
library(reticulate)
reticulate::py_config()
virtualenv_dir = Sys.getenv('VIRTUALENV_NAME')
python_path = Sys.getenv('PYTHON_PATH')

virtualenv_list <-  reticulate::virtualenv_list()

PYTHON_DEPENDENCIES = c("numpy", "joblib", "scikit-learn", "tensorflow",
                        "tika", "pdfminer.six", "nltk", "scipy")

# Create New Virtual Environment if Needed
# if (!(virtualenv_dir %in% virtualenv_list)){
#   
# }
# 
# reticulate::virtualenv_create(envname = virtualenv_dir, python = python_path)

reticulate::virtualenv_install(virtualenv_dir, packages = PYTHON_DEPENDENCIES,
                               ignore_installed=TRUE)

reticulate::use_virtualenv(virtualenv_dir, required = TRUE)

Download Wordnet from NLTK

nltk <- import("nltk")

nltk$download('wordnet')
py_config()
# General
np <- import("numpy")

tf <- import("tensorflow")

# Modeling
## Sci-Kit Learn Model Selection
skl_ms <- import("sklearn.model_selection")

## Sci-Kit Learn Linear Models
skl_lm <- import("sklearn.linear_model")

## Sci-Kit Learn Support Vector Machines 
skl_svm <- import("sklearn.svm")

## Sci-Kit Learn Naive Bayes 
skl_nb <- import("sklearn.naive_bayes")
pdfminer <- import("pdfminer.high_level")
pdfminer$extract_text("./../data/sample_papers/jv04amj.pdf")


canfielder/CausalityExtraction documentation built on Jan. 5, 2022, 10:55 a.m.