################################################################################
#
# Load required libraries
#
################################################################################
library(pdftools)
library(tm)
library(tabulizer)
library(stringr)
library(tidyverse)
library(tidytext)
################################################################################
#
# Function to process tables
#
################################################################################
get_table <- function(tab, nrow = NULL, ncol = NULL) {
labs <- NULL
df <- matrix(data = NA, nrow = nrow, ncol = ncol)
for(i in 1:(length(tab))) {
words <- tab[[i]][str_detect(string = tab[[i]], pattern = "^[a-zA-Z]+$")]
phrase <- ""
for(j in 1:length(words)){
phrase <- paste(phrase, words[j], sep = " ")
}
phrase <- str_remove(string = phrase, pattern = " ")
labs <- c(labs, phrase)
numbers <- tab[[i]][str_detect(string = tab[[i]],
pattern = "^(\\d{6}|\\d{7}|\\d{8})$")]
numbers <- as.numeric(numbers)
df[i, ] <- numbers
}
}
################################################################################
#
# Create list for information and tables in page 255 (2016)
#
################################################################################
## Extract tables from page 255
health1 <- extract_tables(file = "data-raw/budget/2016 Ministry of Finance and Development Planning.pdf",
pages = 255,
method = "decide")
goal <- health1[[1]][3, ]
strategicObjective <- str_c(health1[[1]][5, ], health1[[1]][6, ], health1[[1]][7, ], sep = " ")
## Extract first table in page 255
tab1 <- health1[[1]][11:15, ]
tab1 <- str_replace_all(string = tab1, pattern = "[[:punct:]]", replace = "")
tab1 <- str_split(string = tab1, pattern = " ")
## Extract relevant fields to re-create first table in page 255 and create data.frame
economic_classification <- NULL
df <- matrix(data = NA, nrow = 5, ncol = 6)
for(i in 1:(length(tab1))) {
words <- tab1[[i]][str_detect(string = tab1[[i]], pattern = "^[a-zA-Z]+$")]
phrase <- ""
for(j in 1:length(words)){
phrase <- paste(phrase, words[j], sep = " ")
}
phrase <- str_remove(string = phrase, pattern = " ")
economic_classification <- c(economic_classification, phrase)
numbers <- tab1[[i]][str_detect(string = tab1[[i]],
pattern = "^(\\d{6}|\\d{7}|\\d{8})$")]
numbers <- as.numeric(numbers)
df[i, ] <- numbers
}
df <- data.frame(economic_classification, df)
names(df) <- c("economic_classification",
"actual_2014_2015",
"budget_2015_2016", "actual_2015_2016",
"budget_2016_2017", "projection_2017_2018",
"projection_2018_2019")
df$economic_classification <- str_to_sentence(df$economic_classification)
df <- data.frame(code = c(21, 22, 23, 26, NA), df)
summaryEconHealth2016 <- df
## Extract second table in page 255
tab2 <- health1[[1]][19:31, ]
tab2 <- str_replace_all(string = tab2, pattern = "[[:punct:]]", replace = "")
tab2 <- str_split(string = tab2, pattern = " ")
tab2[[9]] <- c(tab2[[9]], tab2[[10]])
tab2 <- tab2[c(1:9, 11:13)]
## Extract relevant fields to re-create first table in page 255 and create data.frame
spending_entity <- NULL
df <- matrix(data = NA, nrow = 12, ncol = 6)
for(i in 1:(length(tab2))) {
words <- tab2[[i]][str_detect(string = tab2[[i]], pattern = "^[a-zA-Z]+$")]
phrase <- ""
for(j in 1:length(words)){
phrase <- paste(phrase, words[j], sep = " ")
}
phrase <- str_remove(string = phrase, pattern = " ")
spending_entity <- c(spending_entity, phrase)
numbers <- tab2[[i]][str_detect(string = tab2[[i]],
pattern = "^(\\d{6}|\\d{7}|\\d{8})$")]
numbers <- as.numeric(numbers)
df[i, ] <- numbers
}
df <- data.frame(spending_entity, df)
names(df) <- c("spending_entity",
"actual_2014_2015",
"budget_2015_2016", "actual_2015_2016",
"budget_2016_2017", "projection_2017_2018",
"projection_2018_2019")
df$spending_entity <- str_to_title(df$spending_entity)
df <- data.frame(code = c(310, 311, 312, 313, 336, 337, 338, 339, 434, 435, 436, NA), df)
summarySpendingHealth2016 <- df
summaryHealth2016 <- list(goal, strategicObjective, summaryEconHealth2016, summarySpendingHealth2016)
names(summaryHealth2016) <- c("goal", "strategicObjective", "summaryEconHealth2016", "summarySpendingHealth2016")
usethis::use_data(summaryHealth2016, overwrite = TRUE)
## Tidy-up
rm(words, phrase, numbers, economic_classification, tab1, tab2, df,
summaryEconHealth2016, summarySpendingHealth2016, health1)
################################################################################
#
#
#
################################################################################
## Extract tables from page 256-261
health2 <- extract_tables(file = "data-raw/budget/2016 Ministry of Finance and Development Planning.pdf",
pages = 256:268,
method = "decide")
tab1 <- health2[[1]][5:10, ]
tab1 <- str_replace_all(string = tab1, pattern = "[[:punct:]]", replace = "")
tab1 <- str_split(string = tab1, pattern = " ")
tab1[1] <- str_extract(tab1[1], "[0-9]+")
df <- get_table(tab = tab1, nrow = 6, ncol = 6)
tab2 <- health2[[2]][6:48, ]
tab2[8, 1] <- paste(tab2[8, 1], tab2[9, 1], sep = " ")
tab2[12, 1] <- paste(tab2[12, 1], tab2[13, 1], sep = " ")
tab2[15, 1] <- paste(tab2[15, 1], tab2[16, 1], sep = " ")
tab2[25, 1] <- paste(tab2[25, 1], tab2[26, 1], sep = " ")
tab2[33, 1] <- paste(tab2[33, 1], tab2[34, 1], sep = " ")
tab2[36, 1] <- paste(tab2[36, 1], tab2[37, 1], sep = " ")
tab2 <- tab2[c(1:8, 10:12, 14:15, 17:25, 27:33, 35:36, 38:43), ]
tab2[ , 1] <- str_replace_all(string = tab2[ , 1], pattern = "-", replacement = " ")
tab2[ , 1] <- str_replace_all(string = tab2[ , 1], pattern = "–", replacement = " ")
tab2[ , 1] <- str_replace_all(string = tab2[ , 1], pattern = " – ", replacement = " ")
tab2[ , 2] <- str_replace_all(string = tab2[ , 2], pattern = "[[:punct:]]", replacement = "")
tab2[ , 3] <- str_replace_all(string = tab2[ , 3], pattern = "[[:punct:]]", replacement = "")
tab2[ , 4] <- str_replace_all(string = tab2[ , 4], pattern = "[[:punct:]]", replacement = "")
tab2[ , 5] <- str_replace_all(string = tab2[ , 5], pattern = "[[:punct:]]", replacement = "")
tab2[ , 6] <- str_replace_all(string = tab2[ , 6], pattern = "[[:punct:]]", replacement = "")
tab2[ , 7] <- str_replace_all(string = tab2[ , 7], pattern = "[[:punct:]]", replacement = "")
x <- str_split_fixed(string = tab2[ , 1], pattern = " ", n = 7)
y <- x[str_detect(string = x, pattern = "^(\\d{1}|\\d{2}|\\d{3}|\\d{4}|\\d{5}|\\d{6}|\\d{7}|\\d{8})$")]
labs <- NULL
for(i in 1:(nrow(x))) {
words <- x[i, ][str_detect(string = x[i, ], pattern = "^[a-zA-Z]+$")]
phrase <- ""
for(j in 1:length(words)){
phrase <- paste(phrase, words[j], sep = " ")
}
phrase <- str_remove(string = phrase, pattern = " ")
labs <- c(labs, phrase)
}
tab2[ , 1] <- labs
tab2 <- data.frame(code = y, tab2)
categoryCode <- c(rep(20, 2), rep(21, 3), rep(22, 29), rep(23, 2), 26)
category <- c(rep("Capital Investment", 2), rep("Compensation of Employees", 3), rep("Use of Goods and Services", 29), rep("Consumption of Fixed Capital", 2), "Grants")
itemCode <- tab2$code
item <- tab2[ , 2]
tab2 <- data.frame(categoryCode, category, itemCode, item, tab2[ , 3:8])
names(tab2) <- c("categoryCode", "category", "itemCode", "item",
"actual_2014_2015",
"budget_2015_2016", "actual_2015_2016",
"budget_2016_2017", "projection_2017_2018",
"projection_2018_2019")
tab2 <- tab2[c(2, 4:5, 7:34, 36), ]
tab2[ , 2] <- as.character(tab2[ , 2])
tab2[ , 3] <- as.numeric(as.character(tab2[ , 3]))
tab2[ , 4] <- as.character(tab2[ , 4])
tab2[ , 5] <- as.numeric(as.character(tab2[ , 5]))
tab2[ , 6] <- as.numeric(as.character(tab2[ , 6]))
tab2[ , 7] <- as.numeric(as.character(tab2[ , 7]))
tab2[ , 8] <- as.numeric(as.character(tab2[ , 8]))
tab2[ , 9] <- as.numeric(as.character(tab2[ , 9]))
tab2[ , 10] <- as.numeric(as.character(tab2[ , 10]))
################################################################################
tab3 <- health2[[3]][5:49, ]
tab3[1, 1] <- paste(tab3[1, 1], tab3[2, 1], sep = " ")
tab3[7, 1] <- paste(tab3[7, 1], tab3[8, 1], sep = " ")
tab3[23, 1] <- paste(tab3[23, 1], tab3[24, 1], sep = " ")
tab3[28, 1] <- paste(tab3[28, 1], tab3[29, 1], sep = " ")
tab3[36, 1] <- paste(tab3[36, 1], tab3[37, 1], sep = " ")
tab3[40, 1] <- paste(tab3[40, 1], tab3[41, 1], sep = " ")
tab3 <- tab3[c(1, 3:7, 9:23, 25:28, 30:36, 38:40, 42:45), ]
tab3[ , 1] <- str_replace_all(string = tab3[ , 1], pattern = "-", replacement = " ")
tab3[ , 2] <- str_replace_all(string = tab3[ , 2], pattern = "[[:punct:]]", replacement = "")
tab3[ , 3] <- str_replace_all(string = tab3[ , 3], pattern = "[[:punct:]]", replacement = "")
tab3[ , 4] <- str_replace_all(string = tab3[ , 4], pattern = "[[:punct:]]", replacement = "")
tab3[ , 5] <- str_replace_all(string = tab3[ , 5], pattern = "[[:punct:]]", replacement = "")
tab3[ , 6] <- str_replace_all(string = tab3[ , 6], pattern = "[[:punct:]]", replacement = "")
tab3[ , 7] <- str_replace_all(string = tab3[ , 7], pattern = "[[:punct:]]", replacement = "")
x <- str_split_fixed(string = tab3[ , 1], pattern = " ", n = 7)
y <- x[str_detect(string = x, pattern = "^(\\d{1}|\\d{2}|\\d{3}|\\d{4}|\\d{5}|\\d{6}|\\d{7}|\\d{8})$")]
labs <- NULL
for(i in 1:(nrow(x))) {
words <- x[i, ][str_detect(string = x[i, ], pattern = "^[a-zA-Z]+$")]
phrase <- ""
for(j in 1:length(words)){
phrase <- paste(phrase, words[j], sep = " ")
}
phrase <- str_remove(string = phrase, pattern = " ")
labs <- c(labs, phrase)
}
tab3[ , 1] <- labs
tab3 <- data.frame(code = y, tab3)
categoryCode <- rep(26, nrow(tab3))
category <- rep("Grants", nrow(tab3))
tab3 <- data.frame(categoryCode, category, tab3)
names(tab3) <- c("categoryCode", "category", "itemCode", "item",
"actual_2014_2015",
"budget_2015_2016", "actual_2015_2016",
"budget_2016_2017", "projection_2017_2018",
"projection_2018_2019")
tab3[ , 2] <- as.character(tab3[ , 2])
tab3[ , 3] <- as.numeric(as.character(tab3[ , 3]))
tab3[ , 4] <- as.character(tab3[ , 4])
tab3[ , 5] <- as.numeric(as.character(tab3[ , 5]))
tab3[ , 6] <- as.numeric(as.character(tab3[ , 6]))
tab3[ , 7] <- as.numeric(as.character(tab3[ , 7]))
tab3[ , 8] <- as.numeric(as.character(tab3[ , 8]))
tab3[ , 9] <- as.numeric(as.character(tab3[ , 9]))
tab3[ , 10] <- as.numeric(as.character(tab3[ , 10]))
################################################################################
tab4 <- health2[[4]][5:49, ]
tab4[4, 1] <- paste(tab4[4, 1], tab4[5, 1], sep = " ")
tab4[16, 1] <- paste(tab4[16, 1], tab4[17, 1], sep = " ")
tab4[22, 1] <- paste(tab4[22, 1], tab4[23, 1], sep = " ")
tab4[35, 1] <- paste(tab4[35, 1], tab4[36, 1], sep = " ")
tab4[41, 1] <- paste(tab4[41, 1], tab4[42, 1], sep = " ")
tab4[4, 1] <- paste(tab4[4, 1], tab4[5, 1], sep = " ")
tab4 <- tab4[c(1:4, 6:16, 18:22, 24:35, 37:41, 43:45), ]
tab4[ , 1] <- str_replace_all(string = tab4[ , 1], pattern = "-", replacement = " ")
tab4[ , 2] <- str_replace_all(string = tab4[ , 2], pattern = "[[:punct:]]", replacement = "")
tab4[ , 3] <- str_replace_all(string = tab4[ , 3], pattern = "[[:punct:]]", replacement = "")
tab4[ , 4] <- str_replace_all(string = tab4[ , 4], pattern = "[[:punct:]]", replacement = "")
tab4[ , 5] <- str_replace_all(string = tab4[ , 5], pattern = "[[:punct:]]", replacement = "")
tab4[ , 6] <- str_replace_all(string = tab4[ , 6], pattern = "[[:punct:]]", replacement = "")
tab4[ , 7] <- str_replace_all(string = tab4[ , 7], pattern = "[[:punct:]]", replacement = "")
x <- str_split_fixed(string = tab4[ , 1], pattern = " ", n = 8)
y <- x[str_detect(string = x, pattern = "^(\\d{1}|\\d{2}|\\d{3}|\\d{4}|\\d{5}|\\d{6}|\\d{7}|\\d{8})$")]
labs <- NULL
for(i in 1:(nrow(x))) {
words <- x[i, ][str_detect(string = x[i, ], pattern = "^[a-zA-Z]+$")]
phrase <- ""
for(j in 1:length(words)){
phrase <- paste(phrase, words[j], sep = " ")
}
phrase <- str_remove(string = phrase, pattern = " ")
labs <- c(labs, phrase)
}
tab4[ , 1] <- labs
tab4 <- data.frame(code = y, tab4)
categoryCode <- rep(26, nrow(tab4))
category <- rep("Grants", nrow(tab4))
tab4 <- data.frame(categoryCode, category, tab4)
names(tab4) <- c("categoryCode", "category", "itemCode", "item",
"actual_2014_2015",
"budget_2015_2016", "actual_2015_2016",
"budget_2016_2017", "projection_2017_2018",
"projection_2018_2019")
tab4[ , 2] <- as.character(tab4[ , 2])
tab4[ , 3] <- as.numeric(as.character(tab4[ , 3]))
tab4[ , 4] <- as.character(tab4[ , 4])
tab4[ , 5] <- as.numeric(as.character(tab4[ , 5]))
tab4[ , 6] <- as.numeric(as.character(tab4[ , 6]))
tab4[ , 7] <- as.numeric(as.character(tab4[ , 7]))
tab4[ , 8] <- as.numeric(as.character(tab4[ , 8]))
tab4[ , 9] <- as.numeric(as.character(tab4[ , 9]))
tab4[ , 10] <- as.numeric(as.character(tab4[ , 10]))
################################################################################
tab5 <- health2[[5]][5:36, ]
tab5[3, 1] <- paste(tab5[3, 1], tab5[4, 1], sep = " ")
tab5[5, 1] <- paste(tab5[5, 1], tab5[6, 1], sep = " ")
tab5[13, 1] <- paste(tab5[13, 1], tab5[14, 1], sep = " ")
tab5[3, 1] <- paste(tab5[3, 1], tab5[4, 1], sep = " ")
tab5 <- tab5[c(1:3, 5, 7:13, 15:32), ]
tab5[ , 1] <- str_replace_all(string = tab5[ , 1], pattern = "-", replacement = " ")
tab5[ , 2] <- str_replace_all(string = tab5[ , 2], pattern = "[[:punct:]]", replacement = "")
tab5[ , 3] <- str_replace_all(string = tab5[ , 3], pattern = "[[:punct:]]", replacement = "")
tab5[ , 4] <- str_replace_all(string = tab5[ , 4], pattern = "[[:punct:]]", replacement = "")
tab5[ , 5] <- str_replace_all(string = tab5[ , 5], pattern = "[[:punct:]]", replacement = "")
tab5[ , 6] <- str_replace_all(string = tab5[ , 6], pattern = "[[:punct:]]", replacement = "")
tab5[ , 7] <- str_replace_all(string = tab5[ , 7], pattern = "[[:punct:]]", replacement = "")
w <- str_replace_all(string = tab5[ , 1], pattern = "[[:punct:]]", replacement = "")
x <- str_split_fixed(string = w, pattern = " ", n = 8)
y <- x[str_detect(string = x, pattern = "^(\\d{1}|\\d{2}|\\d{3}|\\d{4}|\\d{5}|\\d{6}|\\d{7}|\\d{8})$")]
tab5[ , 2] <- c(149998, 88701, 74999, rep(0, 2), 69997, rep(0, 22), 54938322)
labs <- NULL
for(i in 1:(nrow(x))) {
words <- x[i, ][str_detect(string = x[i, ], pattern = "^[a-zA-Z]+$")]
phrase <- ""
for(j in 1:length(words)){
phrase <- paste(phrase, words[j], sep = " ")
}
phrase <- str_remove(string = phrase, pattern = " ")
labs <- c(labs, phrase)
}
tab5[ , 1] <- labs
tab5 <- data.frame(code = c(y[1:28], 0), tab5)
categoryCode <- rep(26, nrow(tab5))
category <- rep("Grants", nrow(tab5))
tab5 <- data.frame(categoryCode, category, tab5)
names(tab5) <- c("categoryCode", "category", "itemCode", "item",
"actual_2014_2015",
"budget_2015_2016", "actual_2015_2016",
"budget_2016_2017", "projection_2017_2018",
"projection_2018_2019")
tab5 <- tab5[1:(nrow(tab5) - 1), ]
tab5[ , 2] <- as.character(tab5[ , 2])
tab5[ , 3] <- as.numeric(as.character(tab5[ , 3]))
tab5[ , 4] <- as.character(tab5[ , 4])
tab5[ , 5] <- as.numeric(as.character(tab5[ , 5]))
tab5[ , 6] <- as.numeric(as.character(tab5[ , 6]))
tab5[ , 7] <- as.numeric(as.character(tab5[ , 7]))
tab5[ , 8] <- as.numeric(as.character(tab5[ , 8]))
tab5[ , 9] <- as.numeric(as.character(tab5[ , 9]))
tab5[ , 10] <- as.numeric(as.character(tab5[ , 10]))
################################################################################
mohHealthEcon2016 <- data.frame(rbind(tab2, tab3, tab4, tab5))
usethis::use_data(mohHealthEcon2016, overwrite = TRUE)
################################################################################
tab6 <- health2[[5]][40:49, ]
tab6[ , 1] <- str_replace_all(string = tab6[ , 1], pattern = "-", replacement = " ")
tab6[ , 2] <- str_replace_all(string = tab6[ , 2], pattern = "[[:punct:]]", replacement = "")
tab6[ , 3] <- str_replace_all(string = tab6[ , 3], pattern = "[[:punct:]]", replacement = "")
tab6[ , 4] <- str_replace_all(string = tab6[ , 4], pattern = "[[:punct:]]", replacement = "")
tab6[ , 5] <- str_replace_all(string = tab6[ , 5], pattern = "[[:punct:]]", replacement = "")
tab6[ , 6] <- str_replace_all(string = tab6[ , 6], pattern = "[[:punct:]]", replacement = "")
tab6[ , 7] <- str_replace_all(string = tab6[ , 7], pattern = "[[:punct:]]", replacement = "")
x <- str_split_fixed(string = tab6[ , 1], pattern = " ", n = 5)
y <- x[str_detect(string = x, pattern = "^(\\d{1}|\\d{2}|\\d{3}|\\d{4}|\\d{5}|\\d{6}|\\d{7}|\\d{8})$")]
labs <- NULL
for(i in 1:(nrow(x))) {
words <- x[i, ][str_detect(string = x[i, ], pattern = "^[a-zA-Z]+$")]
phrase <- ""
for(j in 1:length(words)){
phrase <- paste(phrase, words[j], sep = " ")
}
phrase <- str_remove(string = phrase, pattern = " ")
labs <- c(labs, phrase)
}
tab6[ , 1] <- labs
tab6 <- data.frame(code = y, tab6)
tab6[ , 3] <- c(44573266, 327481, 242373, 239995, 414420, 489984, 765989, 507991, 1059971, 424676)
tab6$code <- 0:9 #str_pad(string = 0:15, width = 2, side = "left", pad = "0")
county <- tab6$X1
tab6 <- data.frame(countyCode = tab6$code, county, tab6[ , 3:8])
names(tab6) <- c("countyCode", "county",
"actual_2014_2015",
"budget_2015_2016", "actual_2015_2016",
"budget_2016_2017", "projection_2017_2018",
"projection_2018_2019")
tab6[ , 2] <- as.character(tab6[ , 2])
tab6[ , 3] <- as.numeric(as.character(tab6[ , 3]))
tab6[ , 4] <- as.numeric(as.character(tab6[ , 4]))
tab6[ , 5] <- as.numeric(as.character(tab6[ , 5]))
tab6[ , 6] <- as.numeric(as.character(tab6[ , 6]))
tab6[ , 7] <- as.numeric(as.character(tab6[ , 7]))
tab6[ , 8] <- as.numeric(as.character(tab6[ , 8]))
################################################################################
tab7 <- health2[[6]][5:10, ]
tab7[ , 4] <- tab7[ , 3]
tab7[ , 3] <- tab7[ , 2]
tab7[ , 2] <- ""
tab7[ , 1] <- str_replace_all(string = tab7[ , 1], pattern = "-", replacement = " ")
tab7[ , 1] <- str_replace_all(string = tab7[ , 1], pattern = "[[:punct:]]", replacement = "")
tab7[ , 2] <- str_replace_all(string = tab7[ , 2], pattern = "[[:punct:]]", replacement = "")
tab7[ , 3] <- str_replace_all(string = tab7[ , 3], pattern = "[[:punct:]]", replacement = "")
tab7[ , 4] <- str_replace_all(string = tab7[ , 4], pattern = "[[:punct:]]", replacement = "")
tab7[ , 5] <- str_replace_all(string = tab7[ , 5], pattern = "[[:punct:]]", replacement = "")
tab7[ , 6] <- str_replace_all(string = tab7[ , 6], pattern = "[[:punct:]]", replacement = "")
tab7[ , 7] <- str_replace_all(string = tab7[ , 7], pattern = "[[:punct:]]", replacement = "")
x <- str_split_fixed(string = tab7[ , 1], pattern = " ", n = 5)
x <- x[ , 2:5]
x[1:3, 3] <- x[1:3, 2]
x[6, 3] <- x[6, 2]
labs <- NULL
for(i in 1:(nrow(x))) {
words <- x[i, ][str_detect(string = x[i, ], pattern = "^[a-zA-Z]+$")]
phrase <- ""
for(j in 1:length(words)){
phrase <- paste(phrase, words[j], sep = " ")
}
phrase <- str_remove(string = phrase, pattern = " ")
labs <- c(labs, phrase)
}
#y <- x[str_detect(string = x, pattern = "^(\\d{1}|\\d{2}|\\d{3}|\\d{4}|\\d{5}|\\d{6}|\\d{7}|\\d{8})$")]
countyCode <- 10:15
county <- labs
tab7 <- data.frame(countyCode, county, x[ , 3], tab7[ , 3:7])
names(tab7) <- c("countyCode", "county",
"actual_2014_2015",
"budget_2015_2016", "actual_2015_2016",
"budget_2016_2017", "projection_2017_2018",
"projection_2018_2019")
tab7[ , 2] <- as.character(tab7[ , 2])
tab7[ , 3] <- as.numeric(as.character(tab7[ , 3]))
tab7[ , 4] <- as.numeric(as.character(tab7[ , 4]))
tab7[ , 5] <- as.numeric(as.character(tab7[ , 5]))
tab7[ , 6] <- as.numeric(as.character(tab7[ , 6]))
tab7[ , 7] <- as.numeric(as.character(tab7[ , 7]))
tab7[ , 8] <- as.numeric(as.character(tab7[ , 8]))
df <- data.frame(rbind(tab6[2:nrow(tab6), ], tab7))
df$county <- str_to_title(df$county)
df$county <- str_remove_all(string = df$county, pattern = " County")
mohHealthCounty2016 <- df
usethis::use_data(mohHealthCounty2016, overwrite = TRUE)
################################################################################
tab8 <- health2[[6]][24:47, ]
tab8[ , 4] <- tab8[ , 3]
tab8[ , 3] <- tab8[ , 2]
tab8[ , 2] <- ""
tab8[ , 1] <- str_replace_all(string = tab8[ , 1], pattern = "-", replacement = " ")
tab8[ , 1] <- str_replace_all(string = tab8[ , 1], pattern = "[[:punct:]]", replacement = "")
tab8[ , 2] <- str_replace_all(string = tab8[ , 2], pattern = "[[:punct:]]", replacement = "")
tab8[ , 3] <- str_replace_all(string = tab8[ , 3], pattern = "[[:punct:]]", replacement = "")
tab8[ , 4] <- str_replace_all(string = tab8[ , 4], pattern = "[[:punct:]]", replacement = "")
tab8[ , 5] <- str_replace_all(string = tab8[ , 5], pattern = "[[:punct:]]", replacement = "")
tab8[ , 6] <- str_replace_all(string = tab8[ , 6], pattern = "[[:punct:]]", replacement = "")
tab8[ , 7] <- str_replace_all(string = tab8[ , 7], pattern = "[[:punct:]]", replacement = "")
tab8[9, 1] <- paste(tab8[9, 1], tab8[10, 1], sep = " ")
tab8[13, 1] <- paste(tab8[13, 1], tab8[14, 1], sep = " ")
tab8[15, 1] <- paste(tab8[15, 1], tab8[16, 1], sep = " ")
tab8[21, 1] <- paste(tab8[21, 1], tab8[22, 1], sep = " ")
tab8 <- tab8[c(1:9, 11:13, 15, 17:21, 23:24), ]
x <- str_split_fixed(string = tab8[ , 1], pattern = " ", n = 8)
labs <- NULL
numDF <- matrix(data = NA, nrow = nrow(x), ncol = 2)
for(i in 1:(nrow(x))) {
words <- x[i, ][str_detect(string = x[i, ], pattern = "^[a-zA-Z]+$")]
phrase <- ""
for(j in 1:length(words)){
phrase <- paste(phrase, words[j], sep = " ")
}
phrase <- str_remove(string = phrase, pattern = " ")
labs <- c(labs, phrase)
numDF[i, ] <- x[i, ][str_detect(string = x[i, ], pattern = "^(\\d{1}|\\d{2}|\\d{3}|\\d{4}|\\d{5}|\\d{6}|\\d{7}|\\d{8})$")]
}
tab8 <- data.frame(numDF[ , 1], labs, numDF[ , 2], tab8[ , 3:7])
tab8 <- tab8[c(3, 5:6, 8:20), ]
departmentCode <- rep("0100", 16)
department <- rep(labs[1], 16)
categoryCode <- c(20, rep(21, 2), rep(22, 13))
category <- c(labs[2], rep(labs[4], 2), rep(labs[7], 13))
tab8 <- data.frame(departmentCode, department, categoryCode, category, tab8)
tab8$department <- str_to_title(tab8$department)
tab8$category <- str_to_title(tab8$category)
names(tab8) <- c("departmentCode", "department",
"categoryCode", "category",
"itemCode", "item",
"actual_2014_2015",
"budget_2015_2016", "actual_2015_2016",
"budget_2016_2017", "projection_2017_2018",
"projection_2018_2019")
tab8[ , 1] <- as.numeric(as.character(tab8[ , 1]))
tab8[ , 5] <- as.numeric(as.character(tab8[ , 5]))
tab8[ , 6] <- as.character(tab8[ , 6])
tab8[ , 7] <- as.numeric(as.character(tab8[ , 7]))
tab8[ , 8] <- as.numeric(as.character(tab8[ , 8]))
tab8[ , 9] <- as.numeric(as.character(tab8[ , 9]))
tab8[ , 10] <- as.numeric(as.character(tab8[ , 10]))
tab8[ , 11] <- as.numeric(as.character(tab8[ , 11]))
tab8[ , 12] <- as.numeric(as.character(tab8[ , 12]))
################################################################################
tab9 <- health2[[7]][5:49, ]
tab9[ , 1] <- str_replace_all(string = tab9[ , 1], pattern = " - ", replacement = " ")
tab9[ , 1] <- str_replace_all(string = tab9[ , 1], pattern = "-", replacement = " ")
tab9[ , 1] <- str_replace_all(string = tab9[ , 1], pattern = "[[:punct:]]", replacement = "")
tab9[ , 2] <- str_replace_all(string = tab9[ , 2], pattern = "[[:punct:]]", replacement = "")
tab9[ , 3] <- str_replace_all(string = tab9[ , 3], pattern = "[[:punct:]]", replacement = "")
tab9[ , 4] <- str_replace_all(string = tab9[ , 4], pattern = "[[:punct:]]", replacement = "")
tab9[ , 5] <- str_replace_all(string = tab9[ , 5], pattern = "[[:punct:]]", replacement = "")
tab9[ , 6] <- str_replace_all(string = tab9[ , 6], pattern = "[[:punct:]]", replacement = "")
tab9[ , 7] <- str_replace_all(string = tab9[ , 7], pattern = "[[:punct:]]", replacement = "")
tab9[2, 1] <- paste(tab9[2, 1], tab9[3, 1], sep = " ")
tab9[7, 1] <- paste(tab9[7, 1], tab9[8, 1], sep = " ")
tab9[18, 1] <- paste(tab9[18, 1], tab9[19, 1], sep = " ")
tab9[24, 1] <- paste(tab9[24, 1], tab9[25, 1], sep = " ")
tab9[29, 1] <- paste(tab9[29, 1], tab9[30, 1], sep = " ")
tab9[37, 1] <- paste(tab9[37, 1], tab9[38, 1], sep = " ")
tab9[44, 1] <- paste(tab9[44, 1], tab9[45, 1], sep = " ")
tab9 <- tab9[c(1:2, 4:7, 9:18, 20:24, 26:29, 31:37, 39:44), ]
x <- str_split_fixed(string = tab9[ , 1], pattern = " ", n = 7)
labs <- NULL
numDF <- NULL
for(i in 1:(nrow(x))) {
words <- x[i, ][str_detect(string = x[i, ], pattern = "^[a-zA-Z]+$")]
phrase <- ""
for(j in 1:length(words)){
phrase <- paste(phrase, words[j], sep = " ")
}
phrase <- str_remove(string = phrase, pattern = " ")
labs <- c(labs, phrase)
numDF <- c(numDF, x[i, ][str_detect(string = x[i, ], pattern = "^(\\d{1}|\\d{2}|\\d{3}|\\d{4}|\\d{5}|\\d{6}|\\d{7}|\\d{8})$")])
}
tab9 <- data.frame(numDF, labs, tab9[ , 2:7])
tab9 <- tab9[2:38, ]
departmentCode <- rep("0100", 37)
department <- rep("Curative Services", 37)
categoryCode <- rep(26, 37)
category <- rep(labs[1], 37)
tab9 <- data.frame(departmentCode, department, categoryCode, category, tab9)
tab9$category <- str_to_title(tab9$category)
names(tab9) <- c("departmentCode", "department",
"categoryCode", "category",
"itemCode", "item",
"actual_2014_2015",
"budget_2015_2016", "actual_2015_2016",
"budget_2016_2017", "projection_2017_2018",
"projection_2018_2019")
tab9[ , 1] <- as.numeric(as.character(tab9[ , 1]))
tab9[ , 2] <- as.character(tab9[ , 2])
tab9[ , 3] <- as.numeric(as.character(tab9[ , 3]))
tab9[ , 4] <- as.character(tab9[ , 4])
tab9[ , 5] <- as.numeric(as.character(tab9[ , 5]))
tab9[ , 6] <- as.character(tab9[ , 6])
tab9[ , 7] <- as.numeric(as.character(tab9[ , 7]))
tab9[ , 8] <- as.numeric(as.character(tab9[ , 8]))
tab9[ , 9] <- as.numeric(as.character(tab9[ , 9]))
tab9[ , 10] <- as.numeric(as.character(tab9[ , 10]))
tab9[ , 11] <- as.numeric(as.character(tab9[ , 11]))
tab9[ , 12] <- as.numeric(as.character(tab9[ , 12]))
################################################################################
tab10 <- health2[[8]][5:50, ]
tab10[ , 1] <- str_replace_all(string = tab10[ , 1], pattern = " - ", replacement = " ")
tab10[ , 1] <- str_replace_all(string = tab10[ , 1], pattern = "-", replacement = " ")
tab10[ , 1] <- str_replace_all(string = tab10[ , 1], pattern = "[[:punct:]]", replacement = "")
tab10[ , 2] <- str_replace_all(string = tab10[ , 2], pattern = "[[:punct:]]", replacement = "")
tab10[ , 3] <- str_replace_all(string = tab10[ , 3], pattern = "[[:punct:]]", replacement = "")
tab10[ , 4] <- str_replace_all(string = tab10[ , 4], pattern = "[[:punct:]]", replacement = "")
tab10[ , 5] <- str_replace_all(string = tab10[ , 5], pattern = "[[:punct:]]", replacement = "")
tab10[ , 6] <- str_replace_all(string = tab10[ , 6], pattern = "[[:punct:]]", replacement = "")
tab10[ , 7] <- str_replace_all(string = tab10[ , 7], pattern = "[[:punct:]]", replacement = "")
tab10[11, 1] <- paste(tab10[11, 1], tab10[12, 1], sep = " ")
tab10[13, 1] <- paste(tab10[13, 1], tab10[14, 1], sep = " ")
tab10[16, 1] <- paste(tab10[16, 1], tab10[17, 1], sep = " ")
tab10[25, 1] <- paste(tab10[25, 1], tab10[26, 1], sep = " ")
tab10[30, 1] <- paste(tab10[30, 1], tab10[31, 1], sep = " ")
tab10[32, 1] <- paste(tab10[32, 1], tab10[33, 1], sep = " ")
tab10[37, 1] <- paste(tab10[37, 1], tab10[38, 1], sep = " ")
tab10[39, 1] <- paste(tab10[39, 1], tab10[40, 1], sep = " ")
tab10 <- tab10[c(1:11, 13, 15:16, 18:25, 27:30, 32, 34:37, 39, 41:46), ]
x <- str_split_fixed(string = tab10[ , 1], pattern = " ", n = 7)
labs <- NULL
numDF <- NULL
for(i in 1:(nrow(x))) {
words <- x[i, ][str_detect(string = x[i, ], pattern = "^[a-zA-Z]+$")]
phrase <- ""
for(j in 1:length(words)){
phrase <- paste(phrase, words[j], sep = " ")
}
phrase <- str_remove(string = phrase, pattern = " ")
labs <- c(labs, phrase)
numDF <- c(numDF, x[i, ][str_detect(string = x[i, ], pattern = "^(\\d{1}|\\d{2}|\\d{3}|\\d{4}|\\d{5}|\\d{6}|\\d{7}|\\d{8})$")])
}
tab10 <- data.frame(numDF, labs, tab10[ , 2:7])
departmentCode <- rep("0100", 38)
department <- rep("Curative Services", 38)
categoryCode <- rep(26, 38)
category <- rep("Grants", 38)
tab10 <- data.frame(departmentCode, department, categoryCode, category, tab10)
tab10$category <- str_to_title(tab10$category)
names(tab10) <- c("departmentCode", "department",
"categoryCode", "category",
"itemCode", "item",
"actual_2014_2015",
"budget_2015_2016", "actual_2015_2016",
"budget_2016_2017", "projection_2017_2018",
"projection_2018_2019")
tab10[ , 1] <- as.numeric(as.character(tab10[ , 1]))
tab10[ , 2] <- as.character(tab10[ , 2])
tab10[ , 3] <- as.numeric(as.character(tab10[ , 3]))
tab10[ , 4] <- as.character(tab10[ , 4])
tab10[ , 5] <- as.numeric(as.character(tab10[ , 5]))
tab10[ , 6] <- as.character(tab10[ , 6])
tab10[ , 7] <- as.numeric(as.character(tab10[ , 7]))
tab10[ , 8] <- as.numeric(as.character(tab10[ , 8]))
tab10[ , 9] <- as.numeric(as.character(tab10[ , 9]))
tab10[ , 10] <- as.numeric(as.character(tab10[ , 10]))
tab10[ , 11] <- as.numeric(as.character(tab10[ , 11]))
tab10[ , 12] <- as.numeric(as.character(tab10[ , 12]))
################################################################################
tab11 <- health2[[9]][5:23, ]
tab11[ , 3] <- tab11[ , 2]
tab11[ , 2] <- 0
tab11[ , 1] <- str_replace_all(string = tab11[ , 1], pattern = " - ", replacement = " ")
tab11[ , 1] <- str_replace_all(string = tab11[ , 1], pattern = "-", replacement = " ")
tab11[ , 1] <- str_replace_all(string = tab11[ , 1], pattern = "[[:punct:]]", replacement = "")
tab11[ , 2] <- str_replace_all(string = tab11[ , 2], pattern = "[[:punct:]]", replacement = "")
tab11[ , 3] <- str_replace_all(string = tab11[ , 3], pattern = "[[:punct:]]", replacement = "")
tab11[ , 4] <- str_replace_all(string = tab11[ , 4], pattern = "[[:punct:]]", replacement = "")
tab11[ , 5] <- str_replace_all(string = tab11[ , 5], pattern = "[[:punct:]]", replacement = "")
tab11[ , 6] <- str_replace_all(string = tab11[ , 6], pattern = "[[:punct:]]", replacement = "")
tab11[ , 7] <- str_replace_all(string = tab11[ , 7], pattern = "[[:punct:]]", replacement = "")
tab11[1, 1] <- paste(tab11[1, 1], tab11[2, 1], sep = " ")
tab11 <- tab11[c(1, 3:19), ]
x <- str_split_fixed(string = tab11[ , 1], pattern = " ", n = 7)
labs <- NULL
numDF <- matrix(data = NA, nrow = nrow(x), ncol = 2)
for(i in 1:(nrow(x))) {
words <- x[i, ][str_detect(string = x[i, ], pattern = "^[a-zA-Z]+$")]
phrase <- ""
for(j in 1:length(words)){
phrase <- paste(phrase, words[j], sep = " ")
}
phrase <- str_remove(string = phrase, pattern = " ")
labs <- c(labs, phrase)
numDF[i, ] <- x[i, ][str_detect(string = x[i, ], pattern = "^(\\d{1}|\\d{2}|\\d{3}|\\d{4}|\\d{5}|\\d{6}|\\d{7}|\\d{8})$")]
}
tab11 <- data.frame(numDF[ , 1], labs, tab11[ , 2:7])
departmentCode <- rep("0100", 18)
department <- rep("Curative Services", 18)
categoryCode <- rep(26, 18)
category <- rep("Grants", 18)
tab11 <- data.frame(departmentCode, department, categoryCode, category, tab11)
tab11$category <- str_to_title(tab11$category)
names(tab11) <- c("departmentCode", "department",
"categoryCode", "category",
"itemCode", "item",
"actual_2014_2015",
"budget_2015_2016", "actual_2015_2016",
"budget_2016_2017", "projection_2017_2018",
"projection_2018_2019")
tab11[ , 1] <- as.numeric(as.character(tab11[ , 1]))
tab11[ , 2] <- as.character(tab11[ , 2])
tab11[ , 3] <- as.numeric(as.character(tab11[ , 3]))
tab11[ , 4] <- as.character(tab11[ , 4])
tab11[ , 5] <- as.numeric(as.character(tab11[ , 5]))
tab11[ , 6] <- as.character(tab11[ , 6])
tab11[ , 7] <- as.numeric(as.character(tab11[ , 7]))
tab11[ , 8] <- as.numeric(as.character(tab11[ , 8]))
tab11[ , 9] <- as.numeric(as.character(tab11[ , 9]))
tab11[ , 10] <- as.numeric(as.character(tab11[ , 10]))
tab11[ , 11] <- as.numeric(as.character(tab11[ , 11]))
tab11[ , 12] <- as.numeric(as.character(tab11[ , 12]))
################################################################################
df <- data.frame(rbind(tab8, tab9, tab10, tab11))
mohHealthCurative2016 <- df
usethis::use_data(mohHealthCurative2016, overwrite = TRUE)
################################################################################
tab12 <- health2[[9]][35:47, ]
tab12[ , 3] <- tab12[ , 2]
tab12[ , 2] <- ""
tab12[ , 1] <- str_replace_all(string = tab12[ , 1], pattern = " - ", replacement = " ")
tab12[ , 1] <- str_replace_all(string = tab12[ , 1], pattern = "-", replacement = " ")
tab12[ , 1] <- str_replace_all(string = tab12[ , 1], pattern = "[[:punct:]]", replacement = "")
tab12[ , 2] <- str_replace_all(string = tab12[ , 2], pattern = "[[:punct:]]", replacement = "")
tab12[ , 3] <- str_replace_all(string = tab12[ , 3], pattern = "[[:punct:]]", replacement = "")
tab12[ , 4] <- str_replace_all(string = tab12[ , 4], pattern = "[[:punct:]]", replacement = "")
tab12[ , 5] <- str_replace_all(string = tab12[ , 5], pattern = "[[:punct:]]", replacement = "")
tab12[ , 6] <- str_replace_all(string = tab12[ , 6], pattern = "[[:punct:]]", replacement = "")
tab12[ , 7] <- str_replace_all(string = tab12[ , 7], pattern = "[[:punct:]]", replacement = "")
tab12[6, 1] <- paste(tab12[6, 1], tab12[7, 1], sep = " ")
tab12[10, 1] <- paste(tab12[10, 1], tab12[11, 1], sep = " ")
tab12[12, 1] <- paste(tab12[12, 1], tab12[13, 1], sep = " ")
tab12 <- tab12[c(1:6, 8:10, 12), ]
x <- str_split_fixed(string = tab12[ , 1], pattern = " ", n = 7)
labs <- NULL
numDF <- matrix(data = NA, nrow = nrow(x), ncol = 2)
for(i in 1:(nrow(x))) {
words <- x[i, ][str_detect(string = x[i, ], pattern = "^[a-zA-Z]+$")]
phrase <- ""
for(j in 1:length(words)){
phrase <- paste(phrase, words[j], sep = " ")
}
phrase <- str_remove(string = phrase, pattern = " ")
labs <- c(labs, phrase)
numDF[i, ] <- x[i, ][str_detect(string = x[i, ], pattern = "^(\\d{1}|\\d{2}|\\d{3}|\\d{4}|\\d{5}|\\d{6}|\\d{7}|\\d{8})$")]
}
tab12 <- data.frame(numDF[ , 1], labs, numDF[ , 2], tab12[ , 3:7])
tab12 <- tab12[c(3, 5:10), ]
departmentCode <- rep("0200", 7)
department <- rep(labs[1], 7)
categoryCode <- c(21, rep(22, 6))
category <- c(labs[2], rep(labs[4], 6))
tab12 <- data.frame(departmentCode, department, categoryCode, category, tab12)
tab12$department <- str_to_title(tab12$department)
tab12$category <- str_to_title(tab12$category)
names(tab12) <- c("departmentCode", "department",
"categoryCode", "category",
"itemCode", "item",
"actual_2014_2015",
"budget_2015_2016", "actual_2015_2016",
"budget_2016_2017", "projection_2017_2018",
"projection_2018_2019")
tab12[ , 1] <- as.numeric(as.character(tab12[ , 1]))
tab12[ , 2] <- as.character(tab12[ , 2])
tab12[ , 3] <- as.numeric(as.character(tab12[ , 3]))
tab12[ , 4] <- as.character(tab12[ , 4])
tab12[ , 5] <- as.numeric(as.character(tab12[ , 5]))
tab12[ , 6] <- as.character(tab12[ , 6])
tab12[ , 7] <- as.numeric(as.character(tab12[ , 7]))
tab12[ , 8] <- as.numeric(as.character(tab12[ , 8]))
tab12[ , 9] <- as.numeric(as.character(tab12[ , 9]))
tab12[ , 10] <- as.numeric(as.character(tab12[ , 10]))
tab12[ , 11] <- as.numeric(as.character(tab12[ , 11]))
tab12[ , 12] <- as.numeric(as.character(tab12[ , 12]))
################################################################################
tab13 <- health2[[10]][5:16, ]
tab13[ , 3] <- tab13[ , 2]
tab13[ , 2] <- ""
tab13[ , 1] <- str_replace_all(string = tab13[ , 1], pattern = " - ", replacement = " ")
tab13[ , 1] <- str_replace_all(string = tab13[ , 1], pattern = "-", replacement = " ")
tab13[ , 1] <- str_replace_all(string = tab13[ , 1], pattern = "[[:punct:]]", replacement = "")
tab13[ , 2] <- str_replace_all(string = tab13[ , 2], pattern = "[[:punct:]]", replacement = "")
tab13[ , 3] <- str_replace_all(string = tab13[ , 3], pattern = "[[:punct:]]", replacement = "")
tab13[ , 4] <- str_replace_all(string = tab13[ , 4], pattern = "[[:punct:]]", replacement = "")
tab13[ , 5] <- str_replace_all(string = tab13[ , 5], pattern = "[[:punct:]]", replacement = "")
tab13[ , 6] <- str_replace_all(string = tab13[ , 6], pattern = "[[:punct:]]", replacement = "")
tab13[ , 7] <- str_replace_all(string = tab13[ , 7], pattern = "[[:punct:]]", replacement = "")
tab13[4, 1] <- paste(tab13[4, 1], tab13[5, 1], sep = " ")
tab13 <- tab13[c(1:4, 6:12), ]
x <- str_split_fixed(string = tab13[ , 1], pattern = " ", n = 8)
labs <- NULL
numDF <- matrix(data = NA, nrow = nrow(x), ncol = 2)
for(i in 1:(nrow(x))) {
words <- x[i, ][str_detect(string = x[i, ], pattern = "^[a-zA-Z]+$")]
phrase <- ""
for(j in 1:length(words)){
phrase <- paste(phrase, words[j], sep = " ")
}
phrase <- str_remove(string = phrase, pattern = " ")
labs <- c(labs, phrase)
numDF[i, ] <- x[i, ][str_detect(string = x[i, ], pattern = "^(\\d{1}|\\d{2}|\\d{3}|\\d{4}|\\d{5}|\\d{6}|\\d{7}|\\d{8})$")]
}
tab13 <- data.frame(numDF[ , 1], labs, numDF[ , 2], tab13[ , 3:7])
tab13 <- tab13[c(1:9, 11), ]
departmentCode <- rep("0200", 10)
department <- rep("Preventive Services", 10)
categoryCode <- c(rep(22, 9), 26)
category <- c(rep("Use Of Goods And Services", 9), "Grants")
tab13 <- data.frame(departmentCode, department, categoryCode, category, tab13)
tab13$department <- str_to_title(tab13$department)
tab13$category <- str_to_title(tab13$category)
names(tab13) <- c("departmentCode", "department",
"categoryCode", "category",
"itemCode", "item",
"actual_2014_2015",
"budget_2015_2016", "actual_2015_2016",
"budget_2016_2017", "projection_2017_2018",
"projection_2018_2019")
tab13[ , 1] <- as.numeric(as.character(tab13[ , 1]))
tab13[ , 2] <- as.character(tab13[ , 2])
tab13[ , 3] <- as.numeric(as.character(tab13[ , 3]))
tab13[ , 4] <- as.character(tab13[ , 4])
tab13[ , 5] <- as.numeric(as.character(tab13[ , 5]))
tab13[ , 6] <- as.character(tab13[ , 6])
tab13[ , 7] <- as.numeric(as.character(tab13[ , 7]))
tab13[ , 8] <- as.numeric(as.character(tab13[ , 8]))
tab13[ , 9] <- as.numeric(as.character(tab13[ , 9]))
tab13[ , 10] <- as.numeric(as.character(tab13[ , 10]))
tab13[ , 11] <- as.numeric(as.character(tab13[ , 11]))
tab13[ , 12] <- as.numeric(as.character(tab13[ , 12]))
################################################################################
df <- data.frame(rbind(tab12, tab13))
mohHealthPreventive2016 <- df
usethis::use_data(mohHealthPreventive2016, overwrite = TRUE)
################################################################################
tab14 <- health2[[10]][28:44, ]
tab14[ , 3] <- tab14[ , 2]
tab14[ , 2] <- ""
tab14[ , 1] <- str_replace_all(string = tab14[ , 1], pattern = " - ", replacement = " ")
tab14[ , 1] <- str_replace_all(string = tab14[ , 1], pattern = "-", replacement = " ")
tab14[ , 1] <- str_replace_all(string = tab14[ , 1], pattern = "[[:punct:]]", replacement = "")
tab14[ , 2] <- str_replace_all(string = tab14[ , 2], pattern = "[[:punct:]]", replacement = "")
tab14[ , 3] <- str_replace_all(string = tab14[ , 3], pattern = "[[:punct:]]", replacement = "")
tab14[ , 4] <- str_replace_all(string = tab14[ , 4], pattern = "[[:punct:]]", replacement = "")
tab14[ , 5] <- str_replace_all(string = tab14[ , 5], pattern = "[[:punct:]]", replacement = "")
tab14[ , 6] <- str_replace_all(string = tab14[ , 6], pattern = "[[:punct:]]", replacement = "")
tab14[ , 7] <- str_replace_all(string = tab14[ , 7], pattern = "[[:punct:]]", replacement = "")
tab14[7, 1] <- paste(tab14[7, 1], tab14[8, 1], sep = " ")
tab14 <- tab14[c(1:7, 9:17), ]
x <- str_split_fixed(string = tab14[ , 1], pattern = " ", n = 8)
labs <- NULL
numDF <- matrix(data = NA, nrow = nrow(x), ncol = 2)
for(i in 1:(nrow(x))) {
words <- x[i, ][str_detect(string = x[i, ], pattern = "^[a-zA-Z]+$")]
phrase <- ""
for(j in 1:length(words)){
phrase <- paste(phrase, words[j], sep = " ")
}
phrase <- str_remove(string = phrase, pattern = " ")
labs <- c(labs, phrase)
numDF[i, ] <- x[i, ][str_detect(string = x[i, ], pattern = "^(\\d{1}|\\d{2}|\\d{3}|\\d{4}|\\d{5}|\\d{6}|\\d{7}|\\d{8})$")]
}
tab14 <- data.frame(numDF[ , 1], labs, numDF[ , 2], tab14[ , 3:7])
tab14 <- tab14[c(3, 5, 7:16), ]
departmentCode <- rep("0300", 12)
department <- rep(labs[1], 12)
categoryCode <- c(21, 22, rep(26, 10))
category <- c(labs[2], labs[4], rep(labs[6], 10))
tab14 <- data.frame(departmentCode, department, categoryCode, category, tab14)
tab14$department <- str_to_title(tab14$department)
tab14$category <- str_to_title(tab14$category)
names(tab14) <- c("departmentCode", "department",
"categoryCode", "category",
"itemCode", "item",
"actual_2014_2015",
"budget_2015_2016", "actual_2015_2016",
"budget_2016_2017", "projection_2017_2018",
"projection_2018_2019")
tab14[ , 1] <- as.numeric(as.character(tab14[ , 1]))
tab14[ , 2] <- as.character(tab14[ , 2])
tab14[ , 3] <- as.numeric(as.character(tab14[ , 3]))
tab14[ , 4] <- as.character(tab14[ , 4])
tab14[ , 5] <- as.numeric(as.character(tab14[ , 5]))
tab14[ , 6] <- as.character(tab14[ , 6])
tab14[ , 7] <- as.numeric(as.character(tab14[ , 7]))
tab14[ , 8] <- as.numeric(as.character(tab14[ , 8]))
tab14[ , 9] <- as.numeric(as.character(tab14[ , 9]))
tab14[ , 10] <- as.numeric(as.character(tab14[ , 10]))
tab14[ , 11] <- as.numeric(as.character(tab14[ , 11]))
tab14[ , 12] <- as.numeric(as.character(tab14[ , 12]))
################################################################################
mohHealthSocial2016 <- tab14
usethis::use_data(mohHealthSocial2016, overwrite = TRUE)
################################################################################
tab15 <- health2[[11]][11:29, ]
tab15[ , 3] <- tab15[ , 2]
tab15[ , 2] <- ""
tab15[ , 1] <- str_replace_all(string = tab15[ , 1], pattern = " - ", replacement = " ")
tab15[ , 1] <- str_replace_all(string = tab15[ , 1], pattern = "-", replacement = " ")
tab15[ , 1] <- str_replace_all(string = tab15[ , 1], pattern = "[[:punct:]]", replacement = "")
tab15[ , 2] <- str_replace_all(string = tab15[ , 2], pattern = "[[:punct:]]", replacement = "")
tab15[ , 3] <- str_replace_all(string = tab15[ , 3], pattern = "[[:punct:]]", replacement = "")
tab15[ , 4] <- str_replace_all(string = tab15[ , 4], pattern = "[[:punct:]]", replacement = "")
tab15[ , 5] <- str_replace_all(string = tab15[ , 5], pattern = "[[:punct:]]", replacement = "")
tab15[ , 6] <- str_replace_all(string = tab15[ , 6], pattern = "[[:punct:]]", replacement = "")
tab15[ , 7] <- str_replace_all(string = tab15[ , 7], pattern = "[[:punct:]]", replacement = "")
tab15[1, 1] <- paste(tab15[1, 1], tab15[2, 1], sep = " ")
tab15[7, 1] <- paste(tab15[7, 1], tab15[8, 1], sep = " ")
tab15[10, 1] <- paste(tab15[10, 1], tab15[11, 1], sep = " ")
tab15 <- tab15[c(1, 3:7, 9:10, 12:19), ]
x <- str_split_fixed(string = tab15[ , 1], pattern = " ", n = 8)
labs <- NULL
numDF <- matrix(data = NA, nrow = nrow(x), ncol = 2)
for(i in 1:(nrow(x))) {
words <- x[i, ][str_detect(string = x[i, ], pattern = "^[a-zA-Z]+$")]
phrase <- ""
for(j in 1:length(words)){
phrase <- paste(phrase, words[j], sep = " ")
}
phrase <- str_remove(string = phrase, pattern = " ")
labs <- c(labs, phrase)
numDF[i, ] <- x[i, ][str_detect(string = x[i, ], pattern = "^(\\d{1}|\\d{2}|\\d{3}|\\d{4}|\\d{5}|\\d{6}|\\d{7}|\\d{8})$")]
}
tab15 <- data.frame(numDF[ , 1], labs, numDF[ , 2], tab15[ , 3:7])
tab15 <- tab15[c(3, 5:13, 15:16), ]
departmentCode <- rep("0400", 12)
department <- rep(labs[1], 12)
categoryCode <- c(21, rep(22, 9), rep(26, 2))
category <- c(labs[2], rep(labs[4], 9), rep(labs[14], 2))
tab15 <- data.frame(departmentCode, department, categoryCode, category, tab15)
tab15$department <- str_to_title(tab15$department)
tab15$category <- str_to_title(tab15$category)
names(tab15) <- c("departmentCode", "department",
"categoryCode", "category",
"itemCode", "item",
"actual_2014_2015",
"budget_2015_2016", "actual_2015_2016",
"budget_2016_2017", "projection_2017_2018",
"projection_2018_2019")
tab15[ , 1] <- as.numeric(as.character(tab15[ , 1]))
tab15[ , 2] <- as.character(tab15[ , 2])
tab15[ , 3] <- as.numeric(as.character(tab15[ , 3]))
tab15[ , 4] <- as.character(tab15[ , 4])
tab15[ , 5] <- as.numeric(as.character(tab15[ , 5]))
tab15[ , 6] <- as.character(tab15[ , 6])
tab15[ , 7] <- as.numeric(as.character(tab15[ , 7]))
tab15[ , 8] <- as.numeric(as.character(tab15[ , 8]))
tab15[ , 9] <- as.numeric(as.character(tab15[ , 9]))
tab15[ , 10] <- as.numeric(as.character(tab15[ , 10]))
tab15[ , 11] <- as.numeric(as.character(tab15[ , 11]))
tab15[ , 12] <- as.numeric(as.character(tab15[ , 12]))
################################################################################
mohHealthPlanning2016 <- tab15
usethis::use_data(mohHealthPlanning2016, overwrite = TRUE)
################################################################################
tab16 <- rbind(health2[[11]][40:47, ], health2[[12]][5:9, ])
tab16[ , 3] <- tab16[ , 2]
tab16[ , 2] <- ""
tab16[ , 1] <- str_replace_all(string = tab16[ , 1], pattern = " - ", replacement = " ")
tab16[ , 1] <- str_replace_all(string = tab16[ , 1], pattern = "-", replacement = " ")
tab16[ , 1] <- str_replace_all(string = tab16[ , 1], pattern = "[[:punct:]]", replacement = "")
tab16[ , 2] <- str_replace_all(string = tab16[ , 2], pattern = "[[:punct:]]", replacement = "")
tab16[ , 3] <- str_replace_all(string = tab16[ , 3], pattern = "[[:punct:]]", replacement = "")
tab16[ , 4] <- str_replace_all(string = tab16[ , 4], pattern = "[[:punct:]]", replacement = "")
tab16[ , 5] <- str_replace_all(string = tab16[ , 5], pattern = "[[:punct:]]", replacement = "")
tab16[ , 6] <- str_replace_all(string = tab16[ , 6], pattern = "[[:punct:]]", replacement = "")
tab16[ , 7] <- str_replace_all(string = tab16[ , 7], pattern = "[[:punct:]]", replacement = "")
tab16[7, 1] <- paste(tab16[7, 1], tab16[8, 1], sep = " ")
tab16 <- tab16[c(1:7, 9:13), ]
x <- str_split_fixed(string = tab16[ , 1], pattern = " ", n = 8)
labs <- NULL
numDF <- matrix(data = NA, nrow = nrow(x), ncol = 2)
for(i in 1:(nrow(x))) {
words <- x[i, ][str_detect(string = x[i, ], pattern = "^[a-zA-Z]+$")]
phrase <- ""
for(j in 1:length(words)){
phrase <- paste(phrase, words[j], sep = " ")
}
phrase <- str_remove(string = phrase, pattern = " ")
labs <- c(labs, phrase)
numDF[i, ] <- x[i, ][str_detect(string = x[i, ], pattern = "^(\\d{1}|\\d{2}|\\d{3}|\\d{4}|\\d{5}|\\d{6}|\\d{7}|\\d{8})$")]
}
tab16 <- data.frame(numDF[ , 1], labs, numDF[ , 2], tab16[ , 3:7])
tab16 <- tab16[c(3:4, 6:12), ]
departmentCode <- rep("0500", 9)
department <- rep(labs[1], 9)
categoryCode <- c(rep(21, 2), rep(22, 7))
category <- c(rep(labs[2], 2), rep(labs[5], 7))
tab16 <- data.frame(departmentCode, department, categoryCode, category, tab16)
tab16$department <- str_to_title(tab16$department)
tab16$category <- str_to_title(tab16$category)
names(tab16) <- c("departmentCode", "department",
"categoryCode", "category",
"itemCode", "item",
"actual_2014_2015",
"budget_2015_2016", "actual_2015_2016",
"budget_2016_2017", "projection_2017_2018",
"projection_2018_2019")
tab16[ , 1] <- as.numeric(as.character(tab16[ , 1]))
tab16[ , 2] <- as.character(tab16[ , 2])
tab16[ , 3] <- as.numeric(as.character(tab16[ , 3]))
tab16[ , 4] <- as.character(tab16[ , 4])
tab16[ , 5] <- as.numeric(as.character(tab16[ , 5]))
tab16[ , 6] <- as.character(tab16[ , 6])
tab16[ , 7] <- as.numeric(as.character(tab16[ , 7]))
tab16[ , 8] <- as.numeric(as.character(tab16[ , 8]))
tab16[ , 9] <- as.numeric(as.character(tab16[ , 9]))
tab16[ , 10] <- as.numeric(as.character(tab16[ , 10]))
tab16[ , 11] <- as.numeric(as.character(tab16[ , 11]))
tab16[ , 12] <- as.numeric(as.character(tab16[ , 12]))
################################################################################
mohHealthVital2016 <- tab16
usethis::use_data(mohHealthVital2016, overwrite = TRUE)
################################################################################
tab17 <- rbind(health2[[12]][21:47, ], health2[[13]][5:11, ])
tab17[ , 3] <- tab17[ , 2]
tab17[ , 2] <- ""
tab17[ , 1] <- str_replace_all(string = tab17[ , 1], pattern = " - ", replacement = " ")
tab17[ , 1] <- str_replace_all(string = tab17[ , 1], pattern = "-", replacement = " ")
tab17[ , 1] <- str_replace_all(string = tab17[ , 1], pattern = "[[:punct:]]", replacement = "")
tab17[ , 2] <- str_replace_all(string = tab17[ , 2], pattern = "[[:punct:]]", replacement = "")
tab17[ , 3] <- str_replace_all(string = tab17[ , 3], pattern = "[[:punct:]]", replacement = "")
tab17[ , 4] <- str_replace_all(string = tab17[ , 4], pattern = "[[:punct:]]", replacement = "")
tab17[ , 5] <- str_replace_all(string = tab17[ , 5], pattern = "[[:punct:]]", replacement = "")
tab17[ , 6] <- str_replace_all(string = tab17[ , 6], pattern = "[[:punct:]]", replacement = "")
tab17[ , 7] <- str_replace_all(string = tab17[ , 7], pattern = "[[:punct:]]", replacement = "")
tab17[7, 1] <- paste(tab17[7, 1], tab17[8, 1], sep = " ")
tab17[11, 1] <- paste(tab17[11, 1], tab17[12, 1], sep = " ")
tab17[21, 1] <- paste(tab17[21, 1], tab17[22, 1], sep = " ")
tab17[26, 1] <- paste(tab17[26, 1], tab17[27, 1], sep = " ")
tab17[29, 1] <- paste(tab17[29, 1], tab17[30, 1], sep = " ")
tab17 <- tab17[c(1:7, 9:11, 13:21, 23:26, 28:29, 31:34), ]
x <- str_split_fixed(string = tab17[ , 1], pattern = " ", n = 8)
labs <- NULL
numDF <- matrix(data = NA, nrow = nrow(x), ncol = 2)
for(i in 1:(nrow(x))) {
words <- x[i, ][str_detect(string = x[i, ], pattern = "^[a-zA-Z]+$")]
phrase <- ""
for(j in 1:length(words)){
phrase <- paste(phrase, words[j], sep = " ")
}
phrase <- str_remove(string = phrase, pattern = " ")
labs <- c(labs, phrase)
numDF[i, ] <- x[i, ][str_detect(string = x[i, ], pattern = "^(\\d{1}|\\d{2}|\\d{3}|\\d{4}|\\d{5}|\\d{6}|\\d{7}|\\d{8})$")]
}
tab17 <- data.frame(numDF[ , 1], labs, numDF[ , 2], tab17[ , 3:7])
tab17 <- tab17[c(3:4, 6:27, 29), ]
departmentCode <- rep("0600", 25)
department <- rep(labs[1], 25)
categoryCode <- c(rep(21, 2), rep(22, 22), 23)
category <- c(rep(labs[2], 2), rep(labs[5], 22), labs[28])
tab17 <- data.frame(departmentCode, department, categoryCode, category, tab17)
tab17$department <- str_to_title(tab17$department)
tab17$category <- str_to_title(tab17$category)
names(tab17) <- c("departmentCode", "department",
"categoryCode", "category",
"itemCode", "item",
"actual_2014_2015",
"budget_2015_2016", "actual_2015_2016",
"budget_2016_2017", "projection_2017_2018",
"projection_2018_2019")
tab17[ , 1] <- as.numeric(as.character(tab17[ , 1]))
tab17[ , 2] <- as.character(tab17[ , 2])
tab17[ , 3] <- as.numeric(as.character(tab17[ , 3]))
tab17[ , 4] <- as.character(tab17[ , 4])
tab17[ , 5] <- as.numeric(as.character(tab17[ , 5]))
tab17[ , 6] <- as.character(tab17[ , 6])
tab17[ , 7] <- as.numeric(as.character(tab17[ , 7]))
tab17[ , 8] <- as.numeric(as.character(tab17[ , 8]))
tab17[ , 9] <- as.numeric(as.character(tab17[ , 9]))
tab17[ , 10] <- as.numeric(as.character(tab17[ , 10]))
tab17[ , 11] <- as.numeric(as.character(tab17[ , 11]))
tab17[ , 12] <- as.numeric(as.character(tab17[ , 12]))
################################################################################
mohHealthAdmin2016 <- tab17
usethis::use_data(mohHealthAdmin2016, overwrite = TRUE)
################################################################################
tab18 <- health2[[13]][22:26, ]
tab18[ , 3] <- tab18[ , 2]
tab18[ , 2] <- ""
tab18[ , 1] <- str_replace_all(string = tab18[ , 1], pattern = " - ", replacement = " ")
tab18[ , 1] <- str_replace_all(string = tab18[ , 1], pattern = "-", replacement = " ")
tab18[ , 1] <- str_replace_all(string = tab18[ , 1], pattern = "[[:punct:]]", replacement = "")
tab18[ , 2] <- str_replace_all(string = tab18[ , 2], pattern = "[[:punct:]]", replacement = "")
tab18[ , 3] <- str_replace_all(string = tab18[ , 3], pattern = "[[:punct:]]", replacement = "")
tab18[ , 4] <- str_replace_all(string = tab18[ , 4], pattern = "[[:punct:]]", replacement = "")
tab18[ , 5] <- str_replace_all(string = tab18[ , 5], pattern = "[[:punct:]]", replacement = "")
tab18[ , 6] <- str_replace_all(string = tab18[ , 6], pattern = "[[:punct:]]", replacement = "")
tab18[ , 7] <- str_replace_all(string = tab18[ , 7], pattern = "[[:punct:]]", replacement = "")
x <- str_split_fixed(string = tab18[ , 1], pattern = " ", n = 8)
labs <- NULL
numDF <- matrix(data = NA, nrow = nrow(x), ncol = 2)
for(i in 1:(nrow(x))) {
words <- x[i, ][str_detect(string = x[i, ], pattern = "^[a-zA-Z]+$")]
phrase <- ""
for(j in 1:length(words)){
phrase <- paste(phrase, words[j], sep = " ")
}
phrase <- str_remove(string = phrase, pattern = " ")
labs <- c(labs, phrase)
numDF[i, ] <- x[i, ][str_detect(string = x[i, ], pattern = "^(\\d{1}|\\d{2}|\\d{3}|\\d{4}|\\d{5}|\\d{6}|\\d{7}|\\d{8})$")]
}
tab18 <- data.frame(numDF[ , 1], labs, numDF[ , 2], tab18[ , 3:7])
tab18 <- tab18[c(3, 5), ]
departmentCode <- rep("5500", 2)
department <- rep(labs[1], 2)
categoryCode <- c(22, 26)
category <- c(labs[2], labs[4])
tab18 <- data.frame(departmentCode, department, categoryCode, category, tab18)
tab18$department <- str_to_title(tab18$department)
tab18$category <- str_to_title(tab18$category)
names(tab18) <- c("departmentCode", "department",
"categoryCode", "category",
"itemCode", "item",
"actual_2014_2015",
"budget_2015_2016", "actual_2015_2016",
"budget_2016_2017", "projection_2017_2018",
"projection_2018_2019")
tab18[ , 1] <- as.numeric(as.character(tab18[ , 1]))
tab18[ , 2] <- as.character(tab18[ , 2])
tab18[ , 3] <- as.numeric(as.character(tab18[ , 3]))
tab18[ , 4] <- as.character(tab18[ , 4])
tab18[ , 5] <- as.numeric(as.character(tab18[ , 5]))
tab18[ , 6] <- as.character(tab18[ , 6])
tab18[ , 7] <- as.numeric(as.character(tab18[ , 7]))
tab18[ , 8] <- as.numeric(as.character(tab18[ , 8]))
tab18[ , 9] <- as.numeric(as.character(tab18[ , 9]))
tab18[ , 10] <- as.numeric(as.character(tab18[ , 10]))
tab18[ , 11] <- as.numeric(as.character(tab18[ , 11]))
tab18[ , 12] <- as.numeric(as.character(tab18[ , 12]))
################################################################################
mohHealthClaims2016 <- tab18
usethis::use_data(mohHealthClaims2016, overwrite = TRUE)
################################################################################
mohHealthDepartment2016 <- data.frame(rbind(mohHealthCurative2016,
mohHealthPreventive2016,
mohHealthSocial2016,
mohHealthPlanning2016,
mohHealthVital2016,
mohHealthAdmin2016,
mohHealthClaims2016))
usethis::use_data(mohHealthDepartment2016, overwrite = TRUE)
################################################################################
tab <- extract_tables(file = "data-raw/budget/2016 Ministry of Finance and Development Planning.pdf",
pages = 28:30,
method = "decide")
tab <- rbind(tab[[1]], tab[[2]], tab[[3]])
tab <- tab[c(6:50, 56:100, 106:145), ]
tab[7, 1] <- paste(tab[7, 1], tab[8, 1], sep = " ")
tab[11, 1] <- paste(tab[11, 1], tab[12, 1], sep = " ")
tab[15, 1] <- paste(tab[15, 1], tab[16, 1], sep = " ")
tab[31, 1] <- paste(tab[31, 1], tab[32, 1], sep = " ")
tab[33, 1] <- paste(tab[33, 1], tab[34, 1], sep = " ")
tab[40, 1] <- paste(tab[40, 1], tab[41, 1], sep = " ")
tab[60, 1] <- paste(tab[60, 1], tab[61, 1], sep = " ")
tab[66, 1] <- paste(tab[66, 1], tab[67, 1], sep = " ")
tab[70, 1] <- paste(tab[70, 1], tab[71, 1], sep = " ")
tab[72, 1] <- paste(tab[72, 1], tab[73, 1], sep = " ")
tab[83, 1] <- paste(tab[83, 1], tab[84, 1], sep = " ")
tab[108, 1] <- paste(tab[108, 1], tab[109, 1], sep = " ")
tab <- tab[c(1:7, 9:11, 13:15, 17:31, 33, 35:40, 42:60, 62:66, 68:70, 72, 74:83, 85:108), ]
sectorCode <- c(rep("01", 16), rep("02", 5), rep("03", 11),
rep("04", 8), rep("05", 11), rep("06", 6),
rep("07", 19), rep("08", 7), rep("09", 5))
sector <- c(rep("Public Administration", 16),
rep("Municipal Government", 5),
rep("Transparency and Accountability", 11),
rep("Security and Rule of Law", 8),
rep("Health", 11),
rep("Social Development Services", 6),
rep("Education", 19),
rep("Energy and Environment", 7),
rep("Agriculture", 5))
tab <- tab[c(2:17, 19:23, 25:35, 37:44, 46:56, 58:63, 65:83, 85:91, 93:97), ]
spendingEntityCode <- tab[ , 1] %>%
str_remove_all(pattern = "[[:alpha:]]") %>%
str_remove_all(pattern = "[[:blank:]]") %>%
str_remove_all(pattern = "[[:punct:]]")
spendingEntity <- tab[ , 1] %>%
str_remove_all(pattern = str_pad(spendingEntityCode, width = 4, side = "right", pad = " "))
tab[ , 2] <- str_remove_all(string = tab[ , 2], pattern = "[[:punct:]]")
tab[ , 3] <- str_remove_all(string = tab[ , 3], pattern = "[[:punct:]]")
tab[ , 4] <- str_remove_all(string = tab[ , 4], pattern = "[[:punct:]]")
tab[ , 5] <- str_remove_all(string = tab[ , 5], pattern = "[[:punct:]]")
tab[ , 6] <- str_remove_all(string = tab[ , 6], pattern = "[[:punct:]]")
tab <- data.frame(sectorCode, sector,
spendingEntityCode, spendingEntity,
tab[ , 2:4],
str_split_fixed(string = tab[ , 5], pattern = " ", n = 2),
tab[ , 6])
tab[ , 5] <- as.numeric(as.character(tab[ , 5]))
tab[ , 6] <- as.numeric(as.character(tab[ , 6]))
tab[ , 7] <- as.numeric(as.character(tab[ , 7]))
tab[ , 8] <- as.numeric(as.character(tab[ , 8]))
tab[ , 9] <- as.numeric(as.character(tab[ , 9]))
tab[ , 10] <- as.numeric(as.character(tab[ , 10]))
names(tab) <- c("sectorCode", "sector",
"spendingEntityCode", "spendingEntity",
"actual_2014_2015",
"budget_2015_2016", "actual_2015_2016",
"budget_2016_2017", "projection_2017_2018",
"projection_2018_2019")
natBudgetSummary2016 <- tab
usethis::use_data(natBudgetSummary2016, overwrite = TRUE)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.