################################################################################
#
# Load required libraries
#
################################################################################
library(pdftools)
library(tm)
library(tabulizer)
library(stringr)
library(tidyverse)
library(tidytext)
################################################################################
#
# Function to process tables
#
################################################################################
get_table <- function(tab, nrow = NULL, ncol = NULL) {
labs <- NULL
df <- matrix(data = NA, nrow = nrow, ncol = ncol)
for(i in 1:(length(tab))) {
words <- tab[[i]][str_detect(string = tab[[i]], pattern = "^[a-zA-Z]+$")]
phrase <- ""
for(j in 1:length(words)){
phrase <- paste(phrase, words[j], sep = " ")
}
phrase <- str_remove(string = phrase, pattern = " ")
labs <- c(labs, phrase)
numbers <- tab[[i]][str_detect(string = tab[[i]],
pattern = "^(\\d{6}|\\d{7}|\\d{8})$")]
numbers <- as.numeric(numbers)
df[i, ] <- numbers
}
}
################################################################################
#
# Create list for information and tables
#
################################################################################
## Extract tables
health1 <- extract_tables(file = "data-raw/budget/2018 Ministry of Finance and Development Planning Recast.pdf",
pages = 15:21,
method = "decide")
tab1 <- health1[[1]]
tab1 <- tab1[c(8:36), ]
tab1[3, 1] <- paste(tab1[3, 1], tab1[5, 1], sep = " ")
tab1[3, 2:8] <- tab1[4, 2:8]
tab1[9, 1] <- paste(tab1[9, 1], tab1[11, 1], sep = " ")
tab1[9, 2:8] <- tab1[10, 2:8]
tab2 <- health1[[2]]
################################################################################
tab3 <- health1[[3]]
tab3 <- tab3[24:43, ]
tab3[2, 1] <- paste(tab3[2, 1], tab3[4, 1], sep = " ")
tab3[2, 2:8] <- tab3[3, 2:8]
tab3[5, 1] <- paste(tab3[5, 1], tab3[7, 1], sep = " ")
tab3[5, 2:8] <- tab3[6, 2:8]
tab3[8, 1] <- paste(tab3[8, 1], tab3[10, 1], sep = " ")
tab3[8, 2:8] <- tab3[9, 2:8]
tab3[11, 1] <- paste(tab3[11, 1], tab3[13, 1], sep = " ")
tab3[11, 2:8] <- tab3[12, 2:8]
tab3[15, 1] <- paste(tab3[15, 1], tab3[17, 1], sep = " ")
tab3[15, 2:8] <- tab3[16, 2:8]
tab3[18, 1] <- paste(tab3[18, 1], tab3[20, 1], sep = " ")
tab3[18, 2:8] <- tab3[19, 2:8]
tab3 <- tab3[c(1:2, 5, 8, 11, 14:15, 18), ]
tab3[ , 1] <- str_replace_all(string = tab3[ , 1], pattern = ",", replacement = "")
tab3[ , 2] <- str_replace_all(string = tab3[ , 2], pattern = ",", replacement = "")
tab3[ , 3] <- str_replace_all(string = tab3[ , 3], pattern = ",", replacement = "")
tab3[ , 4] <- str_replace_all(string = tab3[ , 4], pattern = ",", replacement = "")
tab3[ , 5] <- str_replace_all(string = tab3[ , 5], pattern = ",", replacement = "")
tab3[ , 6] <- str_replace_all(string = tab3[ , 6], pattern = ",", replacement = "")
tab3[ , 7] <- str_replace_all(string = tab3[ , 7], pattern = ",", replacement = "")
tab3[ , 8] <- str_replace_all(string = tab3[ , 8], pattern = ",", replacement = "")
tab3 <- data.frame(str_split_fixed(string = tab3[ , 1], pattern = " - ", n = 2), tab3[ , 2:8])
tab3[ , 7] <- str_replace_all(string = tab3[ , 7], pattern = "-", replacement = "")
tab3[ , 7] <- str_remove_all(string = tab3[ , 7], pattern = "\\(")
tab3[ , 7] <- str_remove_all(string = tab3[ , 7], pattern = "\\)")
names(tab3) <- c("code", "item", "original", "revised", "allot", "balance",
"netAdjust", "recast", "adjRecast")
tab3[ , 1] <- as.numeric(as.character(tab3[ , 1]))
tab3[ , 2] <- as.character(tab3[ , 2])
tab3[ , 3] <- as.numeric(as.character(tab3[ , 3]))
tab3[ , 4] <- as.numeric(as.character(tab3[ , 4]))
tab3[ , 5] <- as.numeric(as.character(tab3[ , 5]))
tab3[ , 6] <- as.numeric(as.character(tab3[ , 6]))
tab3[ , 7] <- as.numeric(as.character(tab3[ , 7]))
tab3[ , 8] <- as.numeric(as.character(tab3[ , 8]))
tab3[ , 9] <- as.numeric(as.character(tab3[ , 9]))
################################################################################
tab4 <- health1[[4]]
tab5 <- health1[[5]]
tab6 <- health1[[6]]
tab7 <- health1[[7]]
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.