data-raw/porcessNutPolicy.R

################################################################################
#
# Load libraries
#
################################################################################

library(pdftools)
library(tm)
library(stringr)
library(tidyr)
library(dplyr)
library(tidytext)

################################################################################

x <- pdf_text(pdf = "data-raw/documents/natNutPolicy2008.pdf")
x <- str_split(x, pattern = "\n")

policy2008 <- NULL

for(i in 1:length(x)) {
  #
  # extract text from current page
  #
  temp <- tibble(text = x[[i]], page = rep(i, length(x[[i]])))
  #
  # Concatenate policy
  #
  policy2008 <- rbind(policy2008, temp)
}
#
# add linenumber and chapters
#
policy2008 <- mutate(.data = policy2008,
                     linenumber = row_number())

tibble::add_column(.data = policy2008,
                   section = NA)

policy2008$section[policy2008$page == 1] <- "front"
policy2008$section[policy2008$page %in% 2:3] <- "toc"
policy2008$section[policy2008$page == 4] <- "foreword"
policy2008$section[policy2008$page == 5] <- "abbreviations"
policy2008$section[policy2008$page %in% 6:7] <- "executive_summary"
policy2008$section[policy2008$page %in% 8:11] <- "introduction"
policy2008$section[policy2008$page == 12 & policy2008$linenumber %in% 314:331] <- "introduction"
policy2008$section[policy2008$page == 12 & policy2008$linenumber %in% 332:341] <- "policy_foundations"
policy2008$section[policy2008$page == 12 & policy2008$linenumber %in% 342:345] <- "guiding_principles"
policy2008$section[policy2008$page %in% 13:14] <- "guiding_principles"
policy2008$section[policy2008$page == 15 & policy2008$linenumber %in% 418:444] <- "guiding_principles"
policy2008$section[policy2008$page == 15 & policy2008$linenumber %in% 445:453] <- "priority_policy_issues"
policy2008$section[policy2008$page %in% 16:30] <- "priority_policy_issues"
policy2008$section[policy2008$page == 31] <- "references"

nutPolicy2008 <- policy2008
usethis::use_data(nutPolicy2008, overwrite = TRUE)

################################################################################

x <- pdf_text(pdf = "data-raw/documents/natNutPolicy2019.pdf")
x <- str_split(x, pattern = "\n")

policy2019 <- NULL

for(i in 1:length(x)) {
  #
  # extract text from current page
  #
  temp <- tibble(text = x[[i]], page = rep(i, length(x[[i]])))
  #
  # Concatenate policy
  #
  policy2019 <- rbind(policy2019, temp)
}
#
# add linenumber and chapters
#
policy2019 <- mutate(.data = policy2019,
                     linenumber = row_number())

tibble::add_column(.data = policy2019,
                   section = NA)

policy2019$section[policy2019$page == 1] <- "front"
policy2019$section[policy2019$page %in% 2:3] <- "toc"
policy2019$section[policy2019$page %in% 4:5] <- "foreword"
policy2019$section[policy2019$page == 6] <- "acknowledgement"
policy2019$section[policy2019$page %in% 7:8] <- "abbreviations"
policy2019$section[policy2019$page %in% 9:20] <- "introduction"
policy2019$section[policy2019$page == 21 & policy2019$linenumber %in% 597:604] <- "introduction"
policy2019$section[policy2019$page == 21 & policy2019$linenumber %in% 605:625] <- "policy_development_process"
policy2019$section[policy2019$page == 22 & policy2019$linenumber %in% 626:645] <- "policy_development_process"
policy2019$section[policy2019$page == 22 & policy2019$linenumber %in% 646:657] <- "guiding_principles"
policy2019$section[policy2019$page %in% 23:27] <- "guiding_principles"
policy2019$section[policy2019$page == 28 & policy2019$linenumber %in% 833:848] <- "policy_orientation"
policy2019$section[policy2019$page %in% 29:36] <- "policy_orientation"
policy2019$section[policy2019$page == 37 & policy2019$linenumber %in% 1093:1102] <- "policy_orientation"
policy2019$section[policy2019$page == 37 & policy2019$linenumber %in% 1103:1120] <- "roles_and_responsibilities"
policy2019$section[policy2019$page %in% 38:39] <- "roles_and_responsibilities"
policy2019$section[policy2019$page == 40 & policy2019$linenumber %in% 1206:1211] <- "implementation_arrangement_monitoring_evaluation"
policy2019$section[policy2019$page == 41] <- "implementation_arrangement_monitoring_evaluation"
policy2019$section[policy2019$page == 42 & policy2019$linenumber %in% 1240:1246] <- "implementation_arrangement_monitoring_evaluation"
policy2019$section[policy2019$page == 42 & policy2019$linenumber %in% 1247:1267] <- "policy_enabling_environment"
policy2019$section[policy2019$page == 43 & policy2019$linenumber %in% 1268:1275] <- "policy_enabling_environment"
policy2019$section[policy2019$page == 43 & policy2019$linenumber %in% 1276:1299] <- "risks_assumptions"
policy2019$section[policy2019$page == 44 & policy2019$linenumber %in% 1300:1303] <- "risks_assumptions"
policy2019$section[policy2019$page == 44 & policy2019$linenumber %in% 1304:1310] <- "references"
policy2019$section[policy2019$page == 45] <- "references"

nutPolicy2019 <- policy2019
usethis::use_data(nutPolicy2019, overwrite = TRUE)
validmeasures/liberiaNutriBudget documentation built on June 4, 2019, 5:45 p.m.