################################################################################
#
# Load libraries
#
################################################################################
library(pdftools)
library(tm)
library(stringr)
library(tidyr)
library(dplyr)
library(tidytext)
################################################################################
x <- pdf_text(pdf = "data-raw/documents/natNutPolicy2008.pdf")
x <- str_split(x, pattern = "\n")
policy2008 <- NULL
for(i in 1:length(x)) {
#
# extract text from current page
#
temp <- tibble(text = x[[i]], page = rep(i, length(x[[i]])))
#
# Concatenate policy
#
policy2008 <- rbind(policy2008, temp)
}
#
# add linenumber and chapters
#
policy2008 <- mutate(.data = policy2008,
linenumber = row_number())
tibble::add_column(.data = policy2008,
section = NA)
policy2008$section[policy2008$page == 1] <- "front"
policy2008$section[policy2008$page %in% 2:3] <- "toc"
policy2008$section[policy2008$page == 4] <- "foreword"
policy2008$section[policy2008$page == 5] <- "abbreviations"
policy2008$section[policy2008$page %in% 6:7] <- "executive_summary"
policy2008$section[policy2008$page %in% 8:11] <- "introduction"
policy2008$section[policy2008$page == 12 & policy2008$linenumber %in% 314:331] <- "introduction"
policy2008$section[policy2008$page == 12 & policy2008$linenumber %in% 332:341] <- "policy_foundations"
policy2008$section[policy2008$page == 12 & policy2008$linenumber %in% 342:345] <- "guiding_principles"
policy2008$section[policy2008$page %in% 13:14] <- "guiding_principles"
policy2008$section[policy2008$page == 15 & policy2008$linenumber %in% 418:444] <- "guiding_principles"
policy2008$section[policy2008$page == 15 & policy2008$linenumber %in% 445:453] <- "priority_policy_issues"
policy2008$section[policy2008$page %in% 16:30] <- "priority_policy_issues"
policy2008$section[policy2008$page == 31] <- "references"
nutPolicy2008 <- policy2008
usethis::use_data(nutPolicy2008, overwrite = TRUE)
################################################################################
x <- pdf_text(pdf = "data-raw/documents/natNutPolicy2019.pdf")
x <- str_split(x, pattern = "\n")
policy2019 <- NULL
for(i in 1:length(x)) {
#
# extract text from current page
#
temp <- tibble(text = x[[i]], page = rep(i, length(x[[i]])))
#
# Concatenate policy
#
policy2019 <- rbind(policy2019, temp)
}
#
# add linenumber and chapters
#
policy2019 <- mutate(.data = policy2019,
linenumber = row_number())
tibble::add_column(.data = policy2019,
section = NA)
policy2019$section[policy2019$page == 1] <- "front"
policy2019$section[policy2019$page %in% 2:3] <- "toc"
policy2019$section[policy2019$page %in% 4:5] <- "foreword"
policy2019$section[policy2019$page == 6] <- "acknowledgement"
policy2019$section[policy2019$page %in% 7:8] <- "abbreviations"
policy2019$section[policy2019$page %in% 9:20] <- "introduction"
policy2019$section[policy2019$page == 21 & policy2019$linenumber %in% 597:604] <- "introduction"
policy2019$section[policy2019$page == 21 & policy2019$linenumber %in% 605:625] <- "policy_development_process"
policy2019$section[policy2019$page == 22 & policy2019$linenumber %in% 626:645] <- "policy_development_process"
policy2019$section[policy2019$page == 22 & policy2019$linenumber %in% 646:657] <- "guiding_principles"
policy2019$section[policy2019$page %in% 23:27] <- "guiding_principles"
policy2019$section[policy2019$page == 28 & policy2019$linenumber %in% 833:848] <- "policy_orientation"
policy2019$section[policy2019$page %in% 29:36] <- "policy_orientation"
policy2019$section[policy2019$page == 37 & policy2019$linenumber %in% 1093:1102] <- "policy_orientation"
policy2019$section[policy2019$page == 37 & policy2019$linenumber %in% 1103:1120] <- "roles_and_responsibilities"
policy2019$section[policy2019$page %in% 38:39] <- "roles_and_responsibilities"
policy2019$section[policy2019$page == 40 & policy2019$linenumber %in% 1206:1211] <- "implementation_arrangement_monitoring_evaluation"
policy2019$section[policy2019$page == 41] <- "implementation_arrangement_monitoring_evaluation"
policy2019$section[policy2019$page == 42 & policy2019$linenumber %in% 1240:1246] <- "implementation_arrangement_monitoring_evaluation"
policy2019$section[policy2019$page == 42 & policy2019$linenumber %in% 1247:1267] <- "policy_enabling_environment"
policy2019$section[policy2019$page == 43 & policy2019$linenumber %in% 1268:1275] <- "policy_enabling_environment"
policy2019$section[policy2019$page == 43 & policy2019$linenumber %in% 1276:1299] <- "risks_assumptions"
policy2019$section[policy2019$page == 44 & policy2019$linenumber %in% 1300:1303] <- "risks_assumptions"
policy2019$section[policy2019$page == 44 & policy2019$linenumber %in% 1304:1310] <- "references"
policy2019$section[policy2019$page == 45] <- "references"
nutPolicy2019 <- policy2019
usethis::use_data(nutPolicy2019, overwrite = TRUE)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.