################################################################################
#
# Load required libraries
#
################################################################################
library(pdftools)
library(tm)
library(tabulizer)
library(stringr)
library(tidyverse)
library(tidytext)
options(scipen = 999)
################################################################################
#
# Function to process tables
#
################################################################################
get_table <- function(tab, nrow = NULL, ncol = NULL) {
labs <- NULL
df <- matrix(data = NA, nrow = nrow, ncol = ncol)
for(i in 1:(length(tab))) {
words <- tab[[i]][str_detect(string = tab[[i]], pattern = "^[a-zA-Z]+$")]
phrase <- ""
for(j in 1:length(words)){
phrase <- paste(phrase, words[j], sep = " ")
}
phrase <- str_remove(string = phrase, pattern = " ")
labs <- c(labs, phrase)
numbers <- tab[[i]][str_detect(string = tab[[i]],
pattern = "^(\\d{6}|\\d{7}|\\d{8})$")]
numbers <- as.numeric(numbers)
df[i, ] <- numbers
}
}
################################################################################
#
# Create list for information and tables in page 253 (2018)
#
################################################################################
## Extract tables from page 253
health1 <- extract_tables(file = "data-raw/budget/2018 Ministry of Finance and Development Planning.pdf",
pages = 253,
method = "decide")
#goal <- health1[[1]][3, ]
#strategicObjective <- str_c(health1[[1]][5, ], health1[[1]][6, ], health1[[1]][7, ], sep = " ")
## Extract first table in page 253
tab1 <- health1[[1]][27:31, ]
tab1 <- paste(tab1[ , 1], tab1[ , 2], sep = " ")
tab1 <- str_replace_all(string = tab1, pattern = "[[:punct:]]", replace = "")
tab1 <- str_split_fixed(string = tab1, pattern = " ", n = 12)
## Extract relevant fields to re-create first table in page 255 and create data.frame
economic_classification <- NULL
df <- matrix(data = NA, nrow = 5, ncol = 7)
for(i in 1:(nrow(tab1))) {
words <- tab1[i, ][str_detect(string = tab1[i, ], pattern = "^[a-zA-Z]+$")]
phrase <- ""
for(j in 1:length(words)){
phrase <- paste(phrase, words[j], sep = " ")
}
phrase <- str_remove(string = phrase, pattern = " ")
economic_classification <- c(economic_classification, phrase)
numbers <- tab1[i, ][str_detect(string = tab1[i,],
pattern = "^(\\d{1}|\\d{2}|\\d{3}|\\d{4}|\\d{5}|\\d{6}|\\d{7}|\\d{8}|\\d{9}|\\d{10})$")]
numbers <- as.numeric(numbers)
df[i, ] <- numbers
}
df <- data.frame(df[ , 1], economic_classification, df[ , 2:7])
names(df) <- c("code", "economic_classification",
"actual_2016_2017",
"budget_2017_2018", "outturn_2017_2018",
"budget_2018_2019", "projection_2019_2020",
"projection_2020_2021")
df$economic_classification <- str_to_sentence(df$economic_classification)
summaryEconHealth2018 <- df
## Extract second table in page 255
tab2 <- health1[[1]][37:46, ]
tab2 <- paste(tab2[ , 1], tab2[ , 2], sep = " ")
tab2 <- str_replace_all(string = tab2, pattern = "[[:punct:]]", replace = "")
tab2 <- str_split_fixed(string = tab2, pattern = " ", n = 12)
## Extract relevant fields to re-create first table in page 255 and create data.frame
spending_entity <- NULL
df <- matrix(data = NA, nrow = 10, ncol = 7)
for(i in 1:nrow(tab2)) {
words <- tab2[i, ][str_detect(string = tab2[i, ], pattern = "^[a-zA-Z]+$")]
phrase <- ""
for(j in 1:length(words)){
phrase <- paste(phrase, words[j], sep = " ")
}
phrase <- str_remove(string = phrase, pattern = " ")
spending_entity <- c(spending_entity, phrase)
numbers <- tab2[i, ][str_detect(string = tab2[i, ],
pattern = "^(\\d{1}|\\d{2}|\\d{3}|\\d{4}|\\d{5}|\\d{6}|\\d{7}|\\d{8}|\\d{9}|\\d{10})$")]
numbers <- as.numeric(numbers)
df[i, ] <- numbers
}
df <- data.frame(df[ , 1], spending_entity, df[ , 2:7])
names(df) <- c("code", "spending_entity",
"actual_2016_2017",
"budget_2017_2018", "outturn_2017_2018",
"budget_2018_2019", "projection_2019_2020",
"projection_2020_2021")
df$spending_entity <- str_to_title(df$spending_entity)
summarySpendingHealth2018 <- df
summaryHealth2018 <- list(summaryEconHealth2018, summarySpendingHealth2018)
names(summaryHealth2018) <- c("summaryEconHealth2018", "summarySpendingHealth2018")
usethis::use_data(summaryHealth2018, overwrite = TRUE)
## Tidy-up
rm(words, phrase, numbers, economic_classification, tab1, tab2, df,
summaryEconHealth2018, summarySpendingHealth2018, health1)
################################################################################
#
#
#
################################################################################
## Extract tables from page 25-266
health2 <- extract_tables(file = "data-raw/budget/2018 Ministry of Finance and Development Planning.pdf",
pages = 254:266,
method = "decide")
tab1 <- health2[[1]][14:48, ]
tab1[ , 1] <- str_replace_all(string = tab1[ , 1], pattern = " – ", replacement = " ")
tab1[ , 1] <- str_replace_all(string = tab1[ , 1], pattern = "–", replacement = " ")
tab1[ , 1] <- str_replace_all(string = tab1[ , 1], pattern = "-", replacement = " ")
tab1[ , 1] <- str_replace_all(string = tab1[ , 1], pattern = "[[:punct:]]", replacement = "")
tab1[ , 2] <- str_replace_all(string = tab1[ , 2], pattern = "[[:punct:]]", replacement = "")
tab1[ , 3] <- str_replace_all(string = tab1[ , 3], pattern = "[[:punct:]]", replacement = "")
tab1[ , 4] <- str_replace_all(string = tab1[ , 4], pattern = "[[:punct:]]", replacement = "")
tab1[ , 5] <- str_replace_all(string = tab1[ , 5], pattern = "[[:punct:]]", replacement = "")
tab1[ , 6] <- str_replace_all(string = tab1[ , 6], pattern = "[[:punct:]]", replacement = "")
tab1[7, 1] <- paste(tab1[7, 1], tab1[8, 1], sep = " ")
tab1[11, 1] <- paste(tab1[11, 1], tab1[12, 1], sep = " ")
tab1 <- tab1[c(1:7, 9:11, 13:35), ]
y <- str_split_fixed(string = tab1[ , 6], pattern = " ", n = 2)
tab1 <- cbind(tab1[ , 1:5], y)
x <- str_split_fixed(string = tab1[ , 1], pattern = " ", n = 8)
labs <- NULL
df <- matrix(data = NA, nrow = nrow(x), ncol = 2)
for(i in 1:(nrow(x))) {
words <- x[i, ][str_detect(string = x[i, ], pattern = "^[a-zA-Z]+$")]
phrase <- ""
for(j in 1:length(words)){
phrase <- paste(phrase, words[j], sep = " ")
}
phrase <- str_remove(string = phrase, pattern = " ")
labs <- c(labs, phrase)
numbers <- x[i, ][str_detect(string = x[i, ],
pattern = "^(\\d{1}|\\d{2}|\\d{3}|\\d{4}|\\d{5}|\\d{6}|\\d{7}|\\d{8}|\\d{9}|\\d{10})$")]
numbers <- as.numeric(numbers)
df[i, ] <- numbers
}
tab1 <- data.frame(df[ , 1], labs, df[ , 2], tab1[ , 3:7])
tab1 <- tab1[c(2:4, 6:26, 28:33), ]
categoryCode <- c(rep(21, 3), rep(22, 21), rep(25, 6))
category <- c(rep(labs[1], 3), rep(labs[5], 21), rep(labs[27], 6))
tab1 <- data.frame(categoryCode, category, tab1)
tab1$category <- str_to_title(tab1$category)
names(tab1) <- c("categoryCode", "category",
"itemCode", "item",
"actual_2016_2017",
"budget_2017_2018", "outturn_2017_2018",
"budget_2018_2019", "projection_2019_2020",
"projection_2020_2021")
tab1[ , 4] <- as.character(tab1[ , 4])
tab1[ , 5] <- as.numeric(as.character(tab1[ , 5]))
tab1[ , 6] <- as.numeric(as.character(tab1[ , 6]))
tab1[ , 7] <- as.numeric(as.character(tab1[ , 7]))
tab1[ , 8] <- as.numeric(as.character(tab1[ , 8]))
tab1[ , 9] <- as.numeric(as.character(tab1[ , 9]))
tab1[ , 10] <- as.numeric(as.character(tab1[ , 10]))
################################################################################
tab2 <- health2[[2]][6:48, ]
tab2[ , 1] <- str_replace_all(string = tab2[ , 1], pattern = " – ", replacement = " ")
tab2[ , 1] <- str_replace_all(string = tab2[ , 1], pattern = "–", replacement = " ")
tab2[ , 1] <- str_replace_all(string = tab2[ , 1], pattern = "-", replacement = " ")
tab2[ , 1] <- str_replace_all(string = tab2[ , 1], pattern = "[[:punct:]]", replacement = "")
tab2[ , 2] <- str_replace_all(string = tab2[ , 2], pattern = "[[:punct:]]", replacement = "")
tab2[ , 3] <- str_replace_all(string = tab2[ , 3], pattern = "[[:punct:]]", replacement = "")
tab2[ , 4] <- str_replace_all(string = tab2[ , 4], pattern = "[[:punct:]]", replacement = "")
tab2[ , 5] <- str_replace_all(string = tab2[ , 5], pattern = "[[:punct:]]", replacement = "")
tab2[ , 6] <- str_replace_all(string = tab2[ , 6], pattern = "[[:punct:]]", replacement = "")
tab2[9, 1] <- paste(tab2[9, 1], tab2[10, 1], sep = " ")
tab2[15, 1] <- paste(tab2[15, 1], tab2[16, 1], sep = " ")
tab2[31, 1] <- paste(tab2[31, 1], tab2[32, 1], sep = " ")
tab2[36, 1] <- paste(tab2[36, 1], tab2[37, 1], sep = " ")
tab2 <- tab2[c(1:9, 11:15, 17:31, 33:36, 38:43), ]
y <- str_split_fixed(string = tab2[ , 6], pattern = " ", n = 2)
tab2 <- cbind(tab2[ , 1:5], y)
x <- str_split_fixed(string = tab2[ , 1], pattern = " ", n = 8)
labs <- NULL
df <- matrix(data = NA, nrow = nrow(x), ncol = 2)
for(i in 1:(nrow(x))) {
words <- x[i, ][str_detect(string = x[i, ], pattern = "^[a-zA-Z]+$")]
phrase <- ""
for(j in 1:length(words)){
phrase <- paste(phrase, words[j], sep = " ")
}
phrase <- str_remove(string = phrase, pattern = " ")
labs <- c(labs, phrase)
numbers <- x[i, ][str_detect(string = x[i, ],
pattern = "^(\\d{1}|\\d{2}|\\d{3}|\\d{4}|\\d{5}|\\d{6}|\\d{7}|\\d{8}|\\d{9}|\\d{10})$")]
numbers <- as.numeric(numbers)
df[i, ] <- numbers
}
tab2 <- data.frame(df[ , 1], labs, df[ , 2], tab2[ , 3:7])
tab2 <- tab2[c(1:7, 9:39), ]
categoryCode <- c(rep(25, 7), rep(26, 31))
category <- c(rep("Subsidy", 7), rep(labs[8], 31))
tab2 <- data.frame(categoryCode, category, tab2)
tab2$category <- str_to_title(tab2$category)
names(tab2) <- c("categoryCode", "category",
"itemCode", "item",
"actual_2016_2017",
"budget_2017_2018", "outturn_2017_2018",
"budget_2018_2019", "projection_2019_2020",
"projection_2020_2021")
tab2[ , 4] <- as.character(tab2[ , 4])
tab2[ , 5] <- as.numeric(as.character(tab2[ , 5]))
tab2[ , 6] <- as.numeric(as.character(tab2[ , 6]))
tab2[ , 7] <- as.numeric(as.character(tab2[ , 7]))
tab2[ , 8] <- as.numeric(as.character(tab2[ , 8]))
tab2[ , 9] <- as.numeric(as.character(tab2[ , 9]))
tab2[ , 10] <- as.numeric(as.character(tab2[ , 10]))
################################################################################
tab3 <- health2[[3]][6:46, ]
tab3[ , 1] <- str_replace_all(string = tab3[ , 1], pattern = " – ", replacement = " ")
tab3[ , 1] <- str_replace_all(string = tab3[ , 1], pattern = "–", replacement = " ")
tab3[ , 1] <- str_replace_all(string = tab3[ , 1], pattern = "-", replacement = " ")
tab3[ , 1] <- str_replace_all(string = tab3[ , 1], pattern = "[[:punct:]]", replacement = "")
tab3[ , 2] <- str_replace_all(string = tab3[ , 2], pattern = "[[:punct:]]", replacement = "")
tab3[ , 3] <- str_replace_all(string = tab3[ , 3], pattern = "[[:punct:]]", replacement = "")
tab3[ , 4] <- str_replace_all(string = tab3[ , 4], pattern = "[[:punct:]]", replacement = "")
tab3[ , 5] <- str_replace_all(string = tab3[ , 5], pattern = "[[:punct:]]", replacement = "")
tab3[ , 6] <- str_replace_all(string = tab3[ , 6], pattern = "[[:punct:]]", replacement = "")
tab3[1, 1] <- paste(tab3[1, 1], tab3[2, 1], sep = " ")
tab3[14, 1] <- paste(tab3[14, 1], tab3[15, 1], sep = " ")
tab3[23, 1] <- paste(tab3[23, 1], tab3[24, 1], sep = " ")
tab3[41, 1] <- paste(tab3[41, 1], "County", sep = " ")
tab3 <- tab3[c(1, 3:14, 16:23, 25:41), ]
y <- str_split_fixed(string = tab3[ , 6], pattern = " ", n = 2)
tab3 <- cbind(tab3[ , 1:5], y)
x <- str_split_fixed(string = tab3[ , 1], pattern = " ", n = 8)
labs <- NULL
df <- matrix(data = NA, nrow = nrow(x), ncol = 2)
for(i in 1:(nrow(x))) {
words <- x[i, ][str_detect(string = x[i, ], pattern = "^[a-zA-Z]+$")]
phrase <- ""
for(j in 1:length(words)){
phrase <- paste(phrase, words[j], sep = " ")
}
phrase <- str_remove(string = phrase, pattern = " ")
labs <- c(labs, phrase)
numbers <- x[i, ][str_detect(string = x[i, ],
pattern = "^(\\d{1}|\\d{2}|\\d{3}|\\d{4}|\\d{5}|\\d{6}|\\d{7}|\\d{8}|\\d{9}|\\d{10})$")]
numbers <- as.numeric(numbers)
df[i, ] <- numbers
}
tab3 <- data.frame(df[ , 1], labs, df[ , 2], tab3[ , 3:7])
categoryCode <- rep(26, nrow(tab3))
category <- rep("Grants", nrow(tab3))
tab3 <- data.frame(categoryCode, category, tab3)
tab3$category <- str_to_title(tab3$category)
names(tab3) <- c("categoryCode", "category",
"itemCode", "item",
"actual_2016_2017",
"budget_2017_2018", "outturn_2017_2018",
"budget_2018_2019", "projection_2019_2020",
"projection_2020_2021")
tab3[ , 4] <- as.character(tab3[ , 4])
tab3[ , 5] <- as.numeric(as.character(tab3[ , 5]))
tab3[ , 6] <- as.numeric(as.character(tab3[ , 6]))
tab3[ , 7] <- as.numeric(as.character(tab3[ , 7]))
tab3[ , 8] <- as.numeric(as.character(tab3[ , 8]))
tab3[ , 9] <- as.numeric(as.character(tab3[ , 9]))
tab3[ , 10] <- as.numeric(as.character(tab3[ , 10]))
################################################################################
tab4 <- health2[[4]][6:24, ]
tab4[1, 1] <- paste(tab4[1, 1], tab4[2, 1], sep = " ")
tab4 <- tab4[c(1, 3:19), ]
tab4[ , 1] <- str_replace_all(string = tab4[ , 1], pattern = " – ", replacement = " ")
tab4[ , 1] <- str_replace_all(string = tab4[ , 1], pattern = "–", replacement = " ")
tab4[ , 1] <- str_replace_all(string = tab4[ , 1], pattern = "-", replacement = " ")
tab4[ , 1] <- str_replace_all(string = tab4[ , 1], pattern = "[[:punct:]]", replacement = "")
tab4[ , 2] <- str_replace_all(string = tab4[ , 2], pattern = "[[:punct:]]", replacement = "")
tab4[ , 3] <- str_replace_all(string = tab4[ , 3], pattern = "[[:punct:]]", replacement = "")
tab4[ , 4] <- str_replace_all(string = tab4[ , 4], pattern = "[[:punct:]]", replacement = "")
tab4[ , 5] <- str_replace_all(string = tab4[ , 5], pattern = "[[:punct:]]", replacement = "")
tab4[ , 6] <- str_replace_all(string = tab4[ , 6], pattern = "[[:punct:]]", replacement = "")
y <- str_split_fixed(string = tab4[ , 6], pattern = " ", n = 2)
tab4 <- cbind(tab4[ , 1:5], y)
x <- str_split_fixed(string = tab4[ , 1], pattern = " ", n = 8)
labs <- NULL
df <- matrix(data = NA, nrow = nrow(x), ncol = 2)
for(i in 1:(nrow(x))) {
words <- x[i, ][str_detect(string = x[i, ], pattern = "^[a-zA-Z]+$")]
phrase <- ""
for(j in 1:length(words)){
phrase <- paste(phrase, words[j], sep = " ")
}
phrase <- str_remove(string = phrase, pattern = " ")
labs <- c(labs, phrase)
numbers <- x[i, ][str_detect(string = x[i, ],
pattern = "^(\\d{1}|\\d{2}|\\d{3}|\\d{4}|\\d{5}|\\d{6}|\\d{7}|\\d{8}|\\d{9}|\\d{10})$")]
numbers <- as.numeric(numbers)
df[i, ] <- numbers
}
tab4 <- data.frame(df[ , 1], labs, df[ , 2], tab4[ , 3:7])
tab4 <- tab4[c(1:16, 18), ]
categoryCode <- c(rep(26, 16), 31)
category <- c(rep("Grants", 16), labs[17])
tab4 <- data.frame(categoryCode, category, tab4)
tab4$category <- str_to_title(tab4$category)
names(tab4) <- c("categoryCode", "category",
"itemCode", "item",
"actual_2016_2017",
"budget_2017_2018", "outturn_2017_2018",
"budget_2018_2019", "projection_2019_2020",
"projection_2020_2021")
tab4[ , 4] <- as.character(tab4[ , 4])
tab4[ , 5] <- as.numeric(as.character(tab4[ , 5]))
tab4[ , 6] <- as.numeric(as.character(tab4[ , 6]))
tab4[ , 7] <- as.numeric(as.character(tab4[ , 7]))
tab4[ , 8] <- as.numeric(as.character(tab4[ , 8]))
tab4[ , 9] <- as.numeric(as.character(tab4[ , 9]))
tab4[ , 10] <- as.numeric(as.character(tab4[ , 10]))
################################################################################
mohHealthEcon2018 <- data.frame(rbind(tab1, tab2, tab3, tab4))
usethis::use_data(mohHealthEcon2018, overwrite = TRUE)
################################################################################
tab5 <- health2[[4]][30:44, ]
tab5[ , 1] <- str_replace_all(string = tab5[ , 1], pattern = "-", replacement = " ")
tab5[ , 1] <- str_replace_all(string = tab5[ , 1], pattern = "[[:punct:]]", replacement = "")
tab5[ , 2] <- str_replace_all(string = tab5[ , 2], pattern = "[[:punct:]]", replacement = "")
tab5[ , 3] <- str_replace_all(string = tab5[ , 3], pattern = "[[:punct:]]", replacement = "")
tab5[ , 4] <- str_replace_all(string = tab5[ , 4], pattern = "[[:punct:]]", replacement = "")
tab5[ , 5] <- str_replace_all(string = tab5[ , 5], pattern = "[[:punct:]]", replacement = "")
tab5[ , 6] <- str_replace_all(string = tab5[ , 6], pattern = "[[:punct:]]", replacement = "")
x <- str_split_fixed(string = tab5[ , 1], pattern = " ", n = 5)
y <- str_split_fixed(string = tab5[ , 6], pattern = " ", n = 2)
tab5 <- cbind(tab5[ , 1:5], y)
labs <- NULL
df <- matrix(data = NA, nrow = nrow(x), ncol = 2)
for(i in 1:(nrow(x))) {
words <- x[i, ][str_detect(string = x[i, ], pattern = "^[a-zA-Z]+$")]
phrase <- ""
for(j in 1:length(words)){
phrase <- paste(phrase, words[j], sep = " ")
}
phrase <- str_remove(string = phrase, pattern = " ")
labs <- c(labs, phrase)
numbers <- x[i, ][str_detect(string = x[i, ],
pattern = "^(\\d{1}|\\d{2}|\\d{3}|\\d{4}|\\d{5}|\\d{6}|\\d{7}|\\d{8}|\\d{9}|\\d{10})$")]
numbers <- as.numeric(numbers)
df[i, ] <- numbers
}
tab5 <- data.frame(df[ , 1], labs, df[ , 2], tab5[ , 3:7])
tab5$labs <- str_to_title(tab5$labs)
names(tab5) <- c("countyCode", "county",
"actual_2016_2017",
"budget_2017_2018", "outturn_2017_2018",
"budget_2018_2019", "projection_2019_2020",
"projection_2020_2021")
tab5[ , 4] <- as.numeric(as.character(tab5[ , 4]))
tab5[ , 5] <- as.numeric(as.character(tab5[ , 5]))
tab5[ , 6] <- as.numeric(as.character(tab5[ , 6]))
tab5[ , 7] <- as.numeric(as.character(tab5[ , 7]))
tab5[ , 8] <- as.numeric(as.character(tab5[ , 8]))
################################################################################
mohHealthCounty2018 <- tab5
usethis::use_data(mohHealthCounty2018, overwrite = TRUE)
################################################################################
tab6 <- health2[[5]][14:48, ]
tab6[ , 1] <- str_replace_all(string = tab6[ , 1], pattern = " - ", replacement = " ")
tab6[ , 1] <- str_replace_all(string = tab6[ , 1], pattern = "-", replacement = " ")
tab6[ , 1] <- str_replace_all(string = tab6[ , 1], pattern = "[[:punct:]]", replacement = "")
tab6[ , 2] <- str_replace_all(string = tab6[ , 2], pattern = "[[:punct:]]", replacement = "")
tab6[ , 3] <- str_replace_all(string = tab6[ , 3], pattern = "[[:punct:]]", replacement = "")
tab6[ , 4] <- str_replace_all(string = tab6[ , 4], pattern = "[[:punct:]]", replacement = "")
tab6[7, 1] <- paste(tab6[7, 1], tab6[8, 1], sep = " ")
tab6[9, 1] <- paste(tab6[9, 1], tab6[10, 1], sep = " ")
tab6[11, 1] <- paste(tab6[11, 1], tab6[12, 1], sep = " ")
tab6[16, 1] <- paste(tab6[16, 1], tab6[17, 1], sep = " ")
tab6[23, 1] <- paste(tab6[23, 1], tab6[24, 1], sep = " ")
tab6[28, 1] <- paste(tab6[28, 1], tab6[29, 1], sep = " ")
tab6[31, 1] <- paste(tab6[31, 1], tab6[32, 1], sep = " ")
tab6 <- tab6[c(1:7, 9, 11, 13:16, 18:23, 25:28, 30:31, 33:35), ]
tab6 <- paste(tab6[ , 1], tab6[ , 2], tab6[ , 3], tab6[ , 4], sep = " ")
x <- str_split_fixed(string = tab6, pattern = " ", n = 13)
labs <- NULL
df <- matrix(data = NA, nrow = nrow(x), ncol = 7)
for(i in 1:(nrow(x))) {
words <- x[i, ][str_detect(string = x[i, ], pattern = "^[a-zA-Z]+$")]
phrase <- ""
for(j in 1:length(words)){
phrase <- paste(phrase, words[j], sep = " ")
}
phrase <- str_remove(string = phrase, pattern = " ")
labs <- c(labs, phrase)
numbers <- x[i, ][str_detect(string = x[i, ],
pattern = "^(\\d{1}|\\d{2}|\\d{3}|\\d{4}|\\d{5}|\\d{6}|\\d{7}|\\d{8}|\\d{9}|\\d{10})$")]
numbers <- as.numeric(numbers)
df[i, ] <- numbers
}
tab6a <- cbind(df[ , 1], labs, df[ , 2:7])
tab6 <- health2[[6]][5:50, ]
tab6[ , 1] <- str_replace_all(string = tab6[ , 1], pattern = " - ", replacement = " ")
tab6[ , 1] <- str_replace_all(string = tab6[ , 1], pattern = "-", replacement = " ")
tab6[ , 1] <- str_replace_all(string = tab6[ , 1], pattern = "[[:punct:]]", replacement = "")
tab6[ , 2] <- str_replace_all(string = tab6[ , 2], pattern = "[[:punct:]]", replacement = "")
tab6[ , 3] <- str_replace_all(string = tab6[ , 3], pattern = "[[:punct:]]", replacement = "")
tab6[ , 4] <- str_replace_all(string = tab6[ , 4], pattern = "[[:punct:]]", replacement = "")
tab6[ , 5] <- str_replace_all(string = tab6[ , 5], pattern = "[[:punct:]]", replacement = "")
tab6[ , 6] <- str_replace_all(string = tab6[ , 6], pattern = "[[:punct:]]", replacement = "")
tab6[4, 1] <- paste(tab6[4, 1], tab6[5, 1], sep = " ")
tab6[11, 1] <- paste(tab6[11, 1], tab6[12, 1], sep = " ")
tab6[14, 1] <- paste(tab6[14, 1], tab6[15, 1], sep = " ")
tab6[19, 1] <- paste(tab6[19, 1], tab6[20, 1], sep = " ")
tab6[22, 1] <- paste(tab6[22, 1], tab6[23, 1], sep = " ")
tab6[24, 1] <- paste(tab6[24, 1], tab6[25, 1], sep = " ")
tab6[27, 1] <- paste(tab6[27, 1], tab6[28, 1], sep = " ")
tab6[33, 1] <- paste(tab6[33, 1], tab6[34, 1], sep = " ")
tab6[35, 1] <- paste(tab6[35, 1], tab6[36, 1], sep = " ")
tab6[37, 1] <- paste(tab6[37, 1], tab6[38, 1], sep = " ")
tab6[40, 1] <- paste(tab6[40, 1], tab6[41, 1], sep = " ")
tab6[44, 1] <- paste(tab6[44, 1], tab6[45, 1], sep = " ")
tab6 <- tab6[c(1:4, 6:11, 13:14, 16:19, 21:22, 24, 26:27, 29:33, 35, 37, 39:40, 42:44, 46), ]
tab6 <- paste(tab6[ , 1], tab6[ , 2], tab6[ , 3], tab6[ , 4], tab6[ , 5], tab6[ , 6], sep = " ")
x <- str_split_fixed(string = tab6, pattern = " ", n = 13)
labs <- NULL
df <- matrix(data = NA, nrow = nrow(x), ncol = 7)
for(i in 1:(nrow(x))) {
words <- x[i, ][str_detect(string = x[i, ], pattern = "^[a-zA-Z]+$")]
phrase <- ""
for(j in 1:length(words)){
phrase <- paste(phrase, words[j], sep = " ")
}
phrase <- str_remove(string = phrase, pattern = " ")
labs <- c(labs, phrase)
numbers <- x[i, ][str_detect(string = x[i, ],
pattern = "^(\\d{1}|\\d{2}|\\d{3}|\\d{4}|\\d{5}|\\d{6}|\\d{7}|\\d{8}|\\d{9}|\\d{10})$")]
numbers <- as.numeric(numbers)
df[i, ] <- numbers
}
tab6b <- cbind(df[ , 1], labs, df[ , 2:7])
tab6 <- health2[[7]][5:48, ]
tab6[ , 1] <- str_replace_all(string = tab6[ , 1], pattern = " - ", replacement = " ")
tab6[ , 1] <- str_replace_all(string = tab6[ , 1], pattern = "-", replacement = " ")
tab6[ , 1] <- str_replace_all(string = tab6[ , 1], pattern = "[[:punct:]]", replacement = "")
tab6[ , 2] <- str_replace_all(string = tab6[ , 2], pattern = "[[:punct:]]", replacement = "")
tab6[ , 3] <- str_replace_all(string = tab6[ , 3], pattern = "[[:punct:]]", replacement = "")
tab6[ , 4] <- str_replace_all(string = tab6[ , 4], pattern = "[[:punct:]]", replacement = "")
tab6[ , 5] <- str_replace_all(string = tab6[ , 5], pattern = "[[:punct:]]", replacement = "")
tab6[ , 6] <- str_replace_all(string = tab6[ , 6], pattern = "[[:punct:]]", replacement = "")
tab6[1, 1] <- paste(tab6[1, 1], tab6[2, 1], sep = " ")
tab6[3, 1] <- paste(tab6[3, 1], tab6[4, 1], sep = " ")
tab6[6, 1] <- paste(tab6[6, 1], tab6[7, 1], sep = " ")
tab6[12, 1] <- paste(tab6[12, 1], tab6[13, 1], sep = " ")
tab6[16, 1] <- paste(tab6[16, 1], tab6[17, 1], sep = " ")
tab6[19, 1] <- paste(tab6[19, 1], tab6[20, 1], sep = " ")
tab6[28, 1] <- paste(tab6[28, 1], tab6[29, 1], sep = " ")
tab6[30, 1] <- paste(tab6[30, 1], tab6[31, 1], sep = " ")
tab6[36, 1] <- paste(tab6[36, 1], tab6[37, 1], sep = " ")
tab6 <- tab6[c(1, 3, 5:6, 8:12, 14:16, 18:19, 21:28, 30, 32:36, 38:44), ]
tab6 <- paste(tab6[ , 1], tab6[ , 2], tab6[ , 3], tab6[ , 4], tab6[ , 5], tab6[ , 6], sep = " ")
x <- str_split_fixed(string = tab6, pattern = " ", n = 14)
labs <- NULL
df <- matrix(data = NA, nrow = nrow(x), ncol = 7)
for(i in 1:(nrow(x))) {
words <- x[i, ][str_detect(string = x[i, ], pattern = "^[a-zA-Z]+$")]
phrase <- ""
for(j in 1:length(words)){
phrase <- paste(phrase, words[j], sep = " ")
}
phrase <- str_remove(string = phrase, pattern = " ")
labs <- c(labs, phrase)
numbers <- x[i, ][str_detect(string = x[i, ],
pattern = "^(\\d{1}|\\d{2}|\\d{3}|\\d{4}|\\d{5}|\\d{6}|\\d{7}|\\d{8}|\\d{9}|\\d{10})$")]
numbers <- as.numeric(numbers)
df[i, ] <- numbers
}
tab6c <- cbind(df[ , 1], labs, df[ , 2:7])
tab6 <- rbind(tab6a, tab6b, tab6c)
tab6 <- tab6[c(3:4, 6:16, 18:30, 32:97), ]
departmentCode <- rep(100, nrow(tab6))
department <- rep("Curative Services", nrow(tab6))
categoryCode <- c(rep(21, 2), rep(22, 11), rep(25, 13), rep(26, 66))
category <- c(rep("Compensation Of Employees", 2),
rep("Use Of Goods And Services", 11),
rep("Subsidy", 13),
rep("Grants", 66))
tab6 <- data.frame(departmentCode, department, categoryCode, category, tab6)
tab6$department <- str_to_title(string = tab6$department)
tab6$category <- str_to_title(string = tab6$category)
names(tab6) <- c("departmentCode", "department",
"categoryCode", "category",
"itemCode", "item",
"actual_2016_2017",
"budget_2017_2018", "outturn_2017_2018",
"budget_2018_2019", "projection_2019_2020",
"projection_2020_2021")
tab6[ , 5] <- as.numeric(as.character(tab6[ , 5]))
tab6[ , 6] <- as.character(tab6[ , 6])
tab6[ , 7] <- as.numeric(as.character(tab6[ , 7]))
tab6[ , 8] <- as.numeric(as.character(tab6[ , 8]))
tab6[ , 9] <- as.numeric(as.character(tab6[ , 9]))
tab6[ , 10] <- as.numeric(as.character(tab6[ , 10]))
tab6[ , 11] <- as.numeric(as.character(tab6[ , 11]))
tab6[ , 12] <- as.numeric(as.character(tab6[ , 12]))
################################################################################
mohHealthCurative2018 <- tab6
usethis::use_data(mohHealthCurative2018, overwrite = TRUE)
################################################################################
tab7 <- health2[[8]][12:28, ]
tab7[ , 1] <- str_replace_all(string = tab7[ , 1], pattern = " - ", replacement = " ")
tab7[ , 1] <- str_replace_all(string = tab7[ , 1], pattern = "-", replacement = " ")
tab7[ , 1] <- str_replace_all(string = tab7[ , 1], pattern = "[[:punct:]]", replacement = "")
tab7[ , 2] <- str_replace_all(string = tab7[ , 2], pattern = "[[:punct:]]", replacement = "")
tab7[ , 3] <- str_replace_all(string = tab7[ , 3], pattern = "[[:punct:]]", replacement = "")
tab7[ , 4] <- str_replace_all(string = tab7[ , 4], pattern = "[[:punct:]]", replacement = "")
tab7[ , 5] <- str_replace_all(string = tab7[ , 5], pattern = "[[:punct:]]", replacement = "")
tab7[ , 6] <- str_replace_all(string = tab7[ , 6], pattern = "[[:punct:]]", replacement = "")
tab7[5, 1] <- paste(tab7[5, 1], tab7[6, 1], sep = " ")
tab7[10, 1] <- paste(tab7[10, 1], tab7[11, 1], sep = " ")
tab7[16, 1] <- paste(tab7[16, 1], tab7[17, 1], sep = " ")
tab7 <- tab7[c(1:5, 7:10, 12:16), ]
tab7 <- paste(tab7[ , 1], tab7[ , 2], tab7[ , 3], tab7[ , 4], tab7[ , 5], tab7[ , 6], sep = " ")
x <- str_split_fixed(string = tab7, pattern = " ", n = 13)
labs <- NULL
df <- matrix(data = NA, nrow = nrow(x), ncol = 7)
for(i in 1:(nrow(x))) {
words <- x[i, ][str_detect(string = x[i, ], pattern = "^[a-zA-Z]+$")]
phrase <- ""
for(j in 1:length(words)){
phrase <- paste(phrase, words[j], sep = " ")
}
phrase <- str_remove(string = phrase, pattern = " ")
labs <- c(labs, phrase)
numbers <- x[i, ][str_detect(string = x[i, ],
pattern = "^(\\d{1}|\\d{2}|\\d{3}|\\d{4}|\\d{5}|\\d{6}|\\d{7}|\\d{8}|\\d{9}|\\d{10})$")]
#numbers <- as.numeric(numbers)
df[i, ] <- numbers
}
tab7 <- cbind(df[ , 1], labs, df[ , 2:7])
tab7 <- tab7[c(3, 5:12, 14), ]
departmentCode <- rep(200, nrow(tab7))
department <- rep(labs[1], nrow(tab7))
categoryCode <- c(21, rep(22, 8), 26)
category <- c(labs[2], rep(labs[4], 8), labs[13])
tab7 <- data.frame(departmentCode, department, categoryCode, category, tab7)
tab7$department <- str_to_title(string = tab7$department)
tab7$category <- str_to_title(string = tab7$category)
names(tab7) <- c("departmentCode", "department",
"categoryCode", "category",
"itemCode", "item",
"actual_2016_2017",
"budget_2017_2018", "outturn_2017_2018",
"budget_2018_2019", "projection_2019_2020",
"projection_2020_2021")
tab7[ , 5] <- as.numeric(as.character(tab7[ , 5]))
tab7[ , 6] <- as.character(tab7[ , 6])
tab7[ , 7] <- as.numeric(as.character(tab7[ , 7]))
tab7[ , 8] <- as.numeric(as.character(tab7[ , 8]))
tab7[ , 9] <- as.numeric(as.character(tab7[ , 9]))
tab7[ , 10] <- as.numeric(as.character(tab7[ , 10]))
tab7[ , 11] <- as.numeric(as.character(tab7[ , 11]))
tab7[ , 12] <- as.numeric(as.character(tab7[ , 12]))
################################################################################
mohHealthPreventive2018 <- tab7
usethis::use_data(mohHealthPreventive2018, overwrite = TRUE)
################################################################################
tab8 <- health2[[9]][10:30, ]
tab8[1, 1] <- paste(tab8[1, 1], tab8[2, 1], sep = " ")
tab8[7, 1] <- paste(tab8[7, 1], tab8[8, 1], sep = " ")
tab8[9, 1] <- paste(tab8[9, 1], tab8[10, 1], sep = " ")
tab8[11, 1] <- paste(tab8[11, 1], tab8[12, 1], sep = " ")
tab8[16, 1] <- paste(tab8[16, 1], tab8[17, 1], sep = " ")
tab8 <- tab8[c(1, 3:7, 9, 11, 13:16, 18:21), ]
tab8[ , 1] <- str_replace_all(string = tab8[ , 1], pattern = "-", replacement = " ")
tab8[ , 1] <- str_replace_all(string = tab8[ , 1], pattern = "[[:punct:]]", replacement = "")
tab8[ , 2] <- str_replace_all(string = tab8[ , 2], pattern = "[[:punct:]]", replacement = "")
tab8[ , 3] <- str_replace_all(string = tab8[ , 3], pattern = "[[:punct:]]", replacement = "")
tab8[ , 4] <- str_replace_all(string = tab8[ , 4], pattern = "[[:punct:]]", replacement = "")
tab8[ , 5] <- str_replace_all(string = tab8[ , 5], pattern = "[[:punct:]]", replacement = "")
tab8[ , 6] <- str_replace_all(string = tab8[ , 6], pattern = "[[:punct:]]", replacement = "")
tab8 <- paste(tab8[ , 1], tab8[ , 2], tab8[ , 3], tab8[ , 4], tab8[ , 5], tab8[ , 6], sep = " ")
x <- str_split_fixed(string = tab8, pattern = " ", n = 14)
labs <- NULL
df <- matrix(data = NA, nrow = nrow(x), ncol = 7)
for(i in 1:(nrow(x))) {
words <- x[i, ][str_detect(string = x[i, ], pattern = "^[a-zA-Z]+$")]
phrase <- ""
for(j in 1:length(words)){
phrase <- paste(phrase, words[j], sep = " ")
}
phrase <- str_remove(string = phrase, pattern = " ")
labs <- c(labs, phrase)
numbers <- x[i, ][str_detect(string = x[i, ],
pattern = "^(\\d{1}|\\d{2}|\\d{3}|\\d{4}|\\d{5}|\\d{6}|\\d{7}|\\d{8}|\\d{9}|\\d{10})$")]
numbers <- as.numeric(numbers)
df[i, ] <- numbers
}
tab8 <- cbind(df[ , 1], labs, df[ , 2:7])
tab8 <- tab8[c(3, 5:14, 16), ]
departmentCode <- rep(400, nrow(tab8))
department <- rep(labs[1], nrow(tab8))
categoryCode <- c(21, rep(22, 10), 26)
category <- c(labs[2], rep(labs[4], 10), labs[15])
tab8 <- data.frame(departmentCode, department, categoryCode, category, tab8)
tab8$department <- str_to_title(string = tab8$department)
tab8$category <- str_to_title(string = tab8$category)
names(tab8) <- c("departmentCode", "department",
"categoryCode", "category",
"itemCode", "item",
"actual_2016_2017",
"budget_2017_2018", "outturn_2017_2018",
"budget_2018_2019", "projection_2019_2020",
"projection_2020_2021")
tab8[ , 5] <- as.numeric(as.character(tab8[ , 5]))
tab8[ , 6] <- as.character(tab8[ , 6])
tab8[ , 7] <- as.numeric(as.character(tab8[ , 7]))
tab8[ , 8] <- as.numeric(as.character(tab8[ , 8]))
tab8[ , 9] <- as.numeric(as.character(tab8[ , 9]))
tab8[ , 10] <- as.numeric(as.character(tab8[ , 10]))
tab8[ , 11] <- as.numeric(as.character(tab8[ , 11]))
tab8[ , 12] <- as.numeric(as.character(tab8[ , 12]))
################################################################################
mohHealthPlanning2018 <- tab8
usethis::use_data(mohHealthPlanning2018, overwrite = TRUE)
################################################################################
tab9 <- health2[[10]][5:20, ]
tab9[7, 1] <- paste(tab9[7, 1], tab9[8, 1], sep = " ")
tab9[9, 1] <- paste(tab9[9, 1], tab9[10, 1], sep = " ")
tab9[13, 1] <- paste(tab9[13, 1], tab9[14, 1], sep = " ")
tab9 <- tab9[c(1:7, 9, 11:13, 15:16), ]
tab9[ , 1] <- str_replace_all(string = tab9[ , 1], pattern = " - ", replacement = " ")
tab9[ , 1] <- str_replace_all(string = tab9[ , 1], pattern = "-", replacement = " ")
tab9[ , 1] <- str_replace_all(string = tab9[ , 1], pattern = "–", replacement = " ")
tab9[ , 1] <- str_replace_all(string = tab9[ , 1], pattern = "[[:punct:]]", replacement = "")
tab9[ , 2] <- str_replace_all(string = tab9[ , 2], pattern = "[[:punct:]]", replacement = "")
tab9[ , 3] <- str_replace_all(string = tab9[ , 3], pattern = "[[:punct:]]", replacement = "")
tab9[ , 4] <- str_replace_all(string = tab9[ , 4], pattern = "[[:punct:]]", replacement = "")
tab9[ , 5] <- str_replace_all(string = tab9[ , 5], pattern = "[[:punct:]]", replacement = "")
tab9[ , 6] <- str_replace_all(string = tab9[ , 6], pattern = "[[:punct:]]", replacement = "")
tab9 <- paste(tab9[ , 1], tab9[ , 2], tab9[ , 3], tab9[ , 4], tab9[ , 5], tab9[ , 6], sep = " ")
x <- str_split_fixed(string = tab9, pattern = " ", n = 13)
labs <- NULL
df <- matrix(data = NA, nrow = nrow(x), ncol = 7)
for(i in 1:(nrow(x))) {
words <- x[i, ][str_detect(string = x[i, ], pattern = "^[a-zA-Z]+$")]
phrase <- ""
for(j in 1:length(words)){
phrase <- paste(phrase, words[j], sep = " ")
}
phrase <- str_remove(string = phrase, pattern = " ")
labs <- c(labs, phrase)
numbers <- x[i, ][str_detect(string = x[i, ],
pattern = "^(\\d{1}|\\d{2}|\\d{3}|\\d{4}|\\d{5}|\\d{6}|\\d{7}|\\d{8}|\\d{9}|\\d{10})$")]
numbers <- as.numeric(numbers)
df[i, ] <- numbers
}
tab9 <- cbind(df[ , 1], labs, df[ , 2:7])
tab9 <- tab9[c(3:4, 6:13), ]
departmentCode <- rep(500, nrow(tab9))
department <- rep(labs[1], nrow(tab9))
categoryCode <- c(rep(21, 2), rep(22, 8))
category <- c(rep(labs[2], 2), rep(labs[5], 8))
tab9 <- data.frame(departmentCode, department, categoryCode, category, tab9)
tab9$department <- str_to_title(string = tab9$department)
tab9$category <- str_to_title(string = tab9$category)
names(tab9) <- c("departmentCode", "department",
"categoryCode", "category",
"itemCode", "item",
"actual_2016_2017",
"budget_2017_2018", "outturn_2017_2018",
"budget_2018_2019", "projection_2019_2020",
"projection_2020_2021")
tab9[ , 5] <- as.numeric(as.character(tab9[ , 5]))
tab9[ , 6] <- as.character(tab9[ , 6])
tab9[ , 7] <- as.numeric(as.character(tab9[ , 7]))
tab9[ , 8] <- as.numeric(as.character(tab9[ , 8]))
tab9[ , 9] <- as.numeric(as.character(tab9[ , 9]))
tab9[ , 10] <- as.numeric(as.character(tab9[ , 10]))
tab9[ , 11] <- as.numeric(as.character(tab9[ , 11]))
tab9[ , 12] <- as.numeric(as.character(tab9[ , 12]))
################################################################################
mohHealthVital2018 <- tab9
usethis::use_data(mohHealthVital2018, overwrite = TRUE)
################################################################################
tab10 <- rbind(health2[[10]][32:48, ], health2[[11]][5:17, ])
tab10[1, 1] <- paste(tab10[1, 1], tab10[2, 1], sep = " ")
tab10[9, 1] <- paste(tab10[9, 1], tab10[10, 1], sep = " ")
tab10[11, 1] <- paste(tab10[11, 1], tab10[12, 1], sep = " ")
tab10[14, 1] <- paste(tab10[14, 1], tab10[15, 1], sep = " ")
tab10[21, 1] <- paste(tab10[21, 1], tab10[22, 1], sep = " ")
tab10 <- tab10[c(1, 3:9, 11, 13:14, 16:21, 23:30), ]
tab10[ , 1] <- str_replace_all(string = tab10[ , 1], pattern = " - ", replacement = " ")
tab10[ , 1] <- str_replace_all(string = tab10[ , 1], pattern = "-", replacement = " ")
tab10[ , 1] <- str_replace_all(string = tab10[ , 1], pattern = "–", replacement = " ")
tab10[ , 1] <- str_replace_all(string = tab10[ , 1], pattern = "[[:punct:]]", replacement = "")
tab10[ , 2] <- str_replace_all(string = tab10[ , 2], pattern = "[[:punct:]]", replacement = "")
tab10[ , 3] <- str_replace_all(string = tab10[ , 3], pattern = "[[:punct:]]", replacement = "")
tab10[ , 4] <- str_replace_all(string = tab10[ , 4], pattern = "[[:punct:]]", replacement = "")
tab10[ , 5] <- str_replace_all(string = tab10[ , 5], pattern = "[[:punct:]]", replacement = "")
tab10[ , 6] <- str_replace_all(string = tab10[ , 6], pattern = "[[:punct:]]", replacement = "")
tab10 <- paste(tab10[ , 1], tab10[ , 2], tab10[ , 3], tab10[ , 4], tab10[ , 5], tab10[ , 6], sep = " ")
x <- str_split_fixed(string = tab10, pattern = " ", n = 14)
labs <- NULL
df <- matrix(data = NA, nrow = nrow(x), ncol = 7)
for(i in 1:(nrow(x))) {
words <- x[i, ][str_detect(string = x[i, ], pattern = "^[a-zA-Z]+$")]
phrase <- ""
for(j in 1:length(words)){
phrase <- paste(phrase, words[j], sep = " ")
}
phrase <- str_remove(string = phrase, pattern = " ")
labs <- c(labs, phrase)
numbers <- x[i, ][str_detect(string = x[i, ],
pattern = "^(\\d{1}|\\d{2}|\\d{3}|\\d{4}|\\d{5}|\\d{6}|\\d{7}|\\d{8}|\\d{9}|\\d{10})$")]
numbers <- as.numeric(numbers)
df[i, ] <- numbers
}
tab10 <- cbind(df[ , 1], labs, df[ , 2:7])
tab10 <- tab10[c(3:5, 7:23, 25), ]
departmentCode <- rep(600, nrow(tab10))
department <- rep(labs[1], nrow(tab10))
categoryCode <- c(rep(21, 3), rep(22, 17), 31)
category <- c(rep(labs[2], 3), rep(labs[6], 17), labs[24])
tab10 <- data.frame(departmentCode, department, categoryCode, category, tab10)
tab10$department <- str_to_title(string = tab10$department)
tab10$category <- str_to_title(string = tab10$category)
names(tab10) <- c("departmentCode", "department",
"categoryCode", "category",
"itemCode", "item",
"actual_2016_2017",
"budget_2017_2018", "outturn_2017_2018",
"budget_2018_2019", "projection_2019_2020",
"projection_2020_2021")
tab10[ , 5] <- as.numeric(as.character(tab10[ , 5]))
tab10[ , 6] <- as.character(tab10[ , 6])
tab10[ , 7] <- as.numeric(as.character(tab10[ , 7]))
tab10[ , 8] <- as.numeric(as.character(tab10[ , 8]))
tab10[ , 9] <- as.numeric(as.character(tab10[ , 9]))
tab10[ , 10] <- as.numeric(as.character(tab10[ , 10]))
tab10[ , 11] <- as.numeric(as.character(tab10[ , 11]))
tab10[ , 12] <- as.numeric(as.character(tab10[ , 12]))
################################################################################
mohHealthAdmin2018 <- tab10
usethis::use_data(mohHealthAdmin2018, overwrite = TRUE)
################################################################################
tab11 <- rbind(health2[[11]][27:30, ], health2[[12]][c(5:10, 20:27, 37:45), ], health2[[13]][5:8, ])
tab11[7, 1] <- paste(tab11[7, 1], tab11[8, 1], sep = " ")
tab11[9, 1] <- paste(tab11[9, 1], tab11[10, 1], sep = " ")
tab11[15, 1] <- paste(tab11[15, 1], tab11[16, 1], sep = " ")
tab11[24, 1] <- paste(tab11[24, 1], tab11[25, 1], sep = " ")
tab11[28, 1] <- paste(tab11[28, 1], tab11[29, 1], sep = " ")
tab11 <- tab11[c(1:7, 9, 11:15, 17:24, 26:28, 30:31), ]
tab11[ , 1] <- str_replace_all(string = tab11[ , 1], pattern = " - ", replacement = " ")
tab11[ , 1] <- str_replace_all(string = tab11[ , 1], pattern = "-", replacement = " ")
tab11[ , 1] <- str_replace_all(string = tab11[ , 1], pattern = "–", replacement = " ")
tab11[ , 1] <- str_replace_all(string = tab11[ , 1], pattern = "[[:punct:]]", replacement = "")
tab11[ , 2] <- str_replace_all(string = tab11[ , 2], pattern = "[[:punct:]]", replacement = "")
tab11[ , 3] <- str_replace_all(string = tab11[ , 3], pattern = "[[:punct:]]", replacement = "")
tab11[ , 4] <- str_replace_all(string = tab11[ , 4], pattern = "[[:punct:]]", replacement = "")
tab11[ , 5] <- str_replace_all(string = tab11[ , 5], pattern = "[[:punct:]]", replacement = "")
tab11[ , 6] <- str_replace_all(string = tab11[ , 6], pattern = "[[:punct:]]", replacement = "")
tab11 <- paste(tab11[ , 1], tab11[ , 2], tab11[ , 3], tab11[ , 4], tab11[ , 5], tab11[ , 6], sep = " ")
x <- str_split_fixed(string = tab11, pattern = " ", n = 14)
labs <- NULL
df <- matrix(data = NA, nrow = nrow(x), ncol = 7)
for(i in 1:(nrow(x))) {
words <- x[i, ][str_detect(string = x[i, ], pattern = "^[a-zA-Z]+$")]
phrase <- ""
for(j in 1:length(words)){
phrase <- paste(phrase, words[j], sep = " ")
}
phrase <- str_remove(string = phrase, pattern = " ")
labs <- c(labs, phrase)################################################################################
#
# Load required libraries
#
################################################################################
library(pdftools)
library(tm)
library(tabulizer)
library(stringr)
library(tidyverse)
library(tidytext)
options(scipen = 999)
################################################################################
#
# Function to process tables
#
################################################################################
get_table <- function(tab, nrow = NULL, ncol = NULL) {
labs <- NULL
df <- matrix(data = NA, nrow = nrow, ncol = ncol)
for(i in 1:(length(tab))) {
words <- tab[[i]][str_detect(string = tab[[i]], pattern = "^[a-zA-Z]+$")]
phrase <- ""
for(j in 1:length(words)){
phrase <- paste(phrase, words[j], sep = " ")
}
phrase <- str_remove(string = phrase, pattern = " ")
labs <- c(labs, phrase)
numbers <- tab[[i]][str_detect(string = tab[[i]],
pattern = "^(\\d{6}|\\d{7}|\\d{8})$")]
numbers <- as.numeric(numbers)
df[i, ] <- numbers
}
}
################################################################################
#
# Create list for information and tables in page 253 (2018)
#
################################################################################
## Extract tables from page 253
health1 <- extract_tables(file = "data-raw/budget/2018 Ministry of Finance and Development Planning.pdf",
pages = 253,
method = "decide")
#goal <- health1[[1]][3, ]
#strategicObjective <- str_c(health1[[1]][5, ], health1[[1]][6, ], health1[[1]][7, ], sep = " ")
## Extract first table in page 253
tab1 <- health1[[1]][27:31, ]
tab1 <- paste(tab1[ , 1], tab1[ , 2], sep = " ")
tab1 <- str_replace_all(string = tab1, pattern = "[[:punct:]]", replace = "")
tab1 <- str_split_fixed(string = tab1, pattern = " ", n = 12)
## Extract relevant fields to re-create first table in page 255 and create data.frame
economic_classification <- NULL
df <- matrix(data = NA, nrow = 5, ncol = 7)
for(i in 1:(nrow(tab1))) {
words <- tab1[i, ][str_detect(string = tab1[i, ], pattern = "^[a-zA-Z]+$")]
phrase <- ""
for(j in 1:length(words)){
phrase <- paste(phrase, words[j], sep = " ")
}
phrase <- str_remove(string = phrase, pattern = " ")
economic_classification <- c(economic_classification, phrase)
numbers <- tab1[i, ][str_detect(string = tab1[i,],
pattern = "^(\\d{1}|\\d{2}|\\d{3}|\\d{4}|\\d{5}|\\d{6}|\\d{7}|\\d{8}|\\d{9}|\\d{10})$")]
numbers <- as.numeric(numbers)
df[i, ] <- numbers
}
df <- data.frame(df[ , 1], economic_classification, df[ , 2:7])
names(df) <- c("code", "economic_classification",
"actual_2016_2017",
"budget_2017_2018", "outturn_2017_2018",
"budget_2018_2019", "projection_2019_2020",
"projection_2020_2021")
df$economic_classification <- str_to_sentence(df$economic_classification)
summaryEconHealth2018 <- df
## Extract second table in page 255
tab2 <- health1[[1]][37:46, ]
tab2 <- paste(tab2[ , 1], tab2[ , 2], sep = " ")
tab2 <- str_replace_all(string = tab2, pattern = "[[:punct:]]", replace = "")
tab2 <- str_split_fixed(string = tab2, pattern = " ", n = 12)
## Extract relevant fields to re-create first table in page 255 and create data.frame
spending_entity <- NULL
df <- matrix(data = NA, nrow = 10, ncol = 7)
for(i in 1:nrow(tab2)) {
words <- tab2[i, ][str_detect(string = tab2[i, ], pattern = "^[a-zA-Z]+$")]
phrase <- ""
for(j in 1:length(words)){
phrase <- paste(phrase, words[j], sep = " ")
}
phrase <- str_remove(string = phrase, pattern = " ")
spending_entity <- c(spending_entity, phrase)
numbers <- tab2[i, ][str_detect(string = tab2[i, ],
pattern = "^(\\d{1}|\\d{2}|\\d{3}|\\d{4}|\\d{5}|\\d{6}|\\d{7}|\\d{8}|\\d{9}|\\d{10})$")]
numbers <- as.numeric(numbers)
df[i, ] <- numbers
}
df <- data.frame(df[ , 1], spending_entity, df[ , 2:7])
names(df) <- c("code", "spending_entity",
"actual_2016_2017",
"budget_2017_2018", "outturn_2017_2018",
"budget_2018_2019", "projection_2019_2020",
"projection_2020_2021")
df$spending_entity <- str_to_title(df$spending_entity)
summarySpendingHealth2018 <- df
summaryHealth2018 <- list(summaryEconHealth2018, summarySpendingHealth2018)
names(summaryHealth2018) <- c("summaryEconHealth2018", "summarySpendingHealth2018")
usethis::use_data(summaryHealth2018, overwrite = TRUE)
## Tidy-up
rm(words, phrase, numbers, economic_classification, tab1, tab2, df,
summaryEconHealth2018, summarySpendingHealth2018, health1)
################################################################################
#
#
#
################################################################################
## Extract tables from page 25-266
health2 <- extract_tables(file = "data-raw/budget/2018 Ministry of Finance and Development Planning.pdf",
pages = 254:266,
method = "decide")
tab1 <- health2[[1]][14:48, ]
tab1[ , 1] <- str_replace_all(string = tab1[ , 1], pattern = " – ", replacement = " ")
tab1[ , 1] <- str_replace_all(string = tab1[ , 1], pattern = "–", replacement = " ")
tab1[ , 1] <- str_replace_all(string = tab1[ , 1], pattern = "-", replacement = " ")
tab1[ , 1] <- str_replace_all(string = tab1[ , 1], pattern = "[[:punct:]]", replacement = "")
tab1[ , 2] <- str_replace_all(string = tab1[ , 2], pattern = "[[:punct:]]", replacement = "")
tab1[ , 3] <- str_replace_all(string = tab1[ , 3], pattern = "[[:punct:]]", replacement = "")
tab1[ , 4] <- str_replace_all(string = tab1[ , 4], pattern = "[[:punct:]]", replacement = "")
tab1[ , 5] <- str_replace_all(string = tab1[ , 5], pattern = "[[:punct:]]", replacement = "")
tab1[ , 6] <- str_replace_all(string = tab1[ , 6], pattern = "[[:punct:]]", replacement = "")
tab1[7, 1] <- paste(tab1[7, 1], tab1[8, 1], sep = " ")
tab1[11, 1] <- paste(tab1[11, 1], tab1[12, 1], sep = " ")
tab1 <- tab1[c(1:7, 9:11, 13:35), ]
y <- str_split_fixed(string = tab1[ , 6], pattern = " ", n = 2)
tab1 <- cbind(tab1[ , 1:5], y)
x <- str_split_fixed(string = tab1[ , 1], pattern = " ", n = 8)
labs <- NULL
df <- matrix(data = NA, nrow = nrow(x), ncol = 2)
for(i in 1:(nrow(x))) {
words <- x[i, ][str_detect(string = x[i, ], pattern = "^[a-zA-Z]+$")]
phrase <- ""
for(j in 1:length(words)){
phrase <- paste(phrase, words[j], sep = " ")
}
phrase <- str_remove(string = phrase, pattern = " ")
labs <- c(labs, phrase)
numbers <- x[i, ][str_detect(string = x[i, ],
pattern = "^(\\d{1}|\\d{2}|\\d{3}|\\d{4}|\\d{5}|\\d{6}|\\d{7}|\\d{8}|\\d{9}|\\d{10})$")]
numbers <- as.numeric(numbers)
df[i, ] <- numbers
}
tab1 <- data.frame(df[ , 1], labs, df[ , 2], tab1[ , 3:7])
tab1 <- tab1[c(2:4, 6:26, 28:33), ]
categoryCode <- c(rep(21, 3), rep(22, 21), rep(25, 6))
category <- c(rep(labs[1], 3), rep(labs[5], 21), rep(labs[27], 6))
tab1 <- data.frame(categoryCode, category, tab1)
tab1$category <- str_to_title(tab1$category)
names(tab1) <- c("categoryCode", "category",
"itemCode", "item",
"actual_2016_2017",
"budget_2017_2018", "outturn_2017_2018",
"budget_2018_2019", "projection_2019_2020",
"projection_2020_2021")
tab1[ , 4] <- as.character(tab1[ , 4])
tab1[ , 5] <- as.numeric(as.character(tab1[ , 5]))
tab1[ , 6] <- as.numeric(as.character(tab1[ , 6]))
tab1[ , 7] <- as.numeric(as.character(tab1[ , 7]))
tab1[ , 8] <- as.numeric(as.character(tab1[ , 8]))
tab1[ , 9] <- as.numeric(as.character(tab1[ , 9]))
tab1[ , 10] <- as.numeric(as.character(tab1[ , 10]))
################################################################################
tab2 <- health2[[2]][6:48, ]
tab2[ , 1] <- str_replace_all(string = tab2[ , 1], pattern = " – ", replacement = " ")
tab2[ , 1] <- str_replace_all(string = tab2[ , 1], pattern = "–", replacement = " ")
tab2[ , 1] <- str_replace_all(string = tab2[ , 1], pattern = "-", replacement = " ")
tab2[ , 1] <- str_replace_all(string = tab2[ , 1], pattern = "[[:punct:]]", replacement = "")
tab2[ , 2] <- str_replace_all(string = tab2[ , 2], pattern = "[[:punct:]]", replacement = "")
tab2[ , 3] <- str_replace_all(string = tab2[ , 3], pattern = "[[:punct:]]", replacement = "")
tab2[ , 4] <- str_replace_all(string = tab2[ , 4], pattern = "[[:punct:]]", replacement = "")
tab2[ , 5] <- str_replace_all(string = tab2[ , 5], pattern = "[[:punct:]]", replacement = "")
tab2[ , 6] <- str_replace_all(string = tab2[ , 6], pattern = "[[:punct:]]", replacement = "")
tab2[9, 1] <- paste(tab2[9, 1], tab2[10, 1], sep = " ")
tab2[15, 1] <- paste(tab2[15, 1], tab2[16, 1], sep = " ")
tab2[31, 1] <- paste(tab2[31, 1], tab2[32, 1], sep = " ")
tab2[36, 1] <- paste(tab2[36, 1], tab2[37, 1], sep = " ")
tab2 <- tab2[c(1:9, 11:15, 17:31, 33:36, 38:43), ]
y <- str_split_fixed(string = tab2[ , 6], pattern = " ", n = 2)
tab2 <- cbind(tab2[ , 1:5], y)
x <- str_split_fixed(string = tab2[ , 1], pattern = " ", n = 8)
labs <- NULL
df <- matrix(data = NA, nrow = nrow(x), ncol = 2)
for(i in 1:(nrow(x))) {
words <- x[i, ][str_detect(string = x[i, ], pattern = "^[a-zA-Z]+$")]
phrase <- ""
for(j in 1:length(words)){
phrase <- paste(phrase, words[j], sep = " ")
}
phrase <- str_remove(string = phrase, pattern = " ")
labs <- c(labs, phrase)
numbers <- x[i, ][str_detect(string = x[i, ],
pattern = "^(\\d{1}|\\d{2}|\\d{3}|\\d{4}|\\d{5}|\\d{6}|\\d{7}|\\d{8}|\\d{9}|\\d{10})$")]
numbers <- as.numeric(numbers)
df[i, ] <- numbers
}
tab2 <- data.frame(df[ , 1], labs, df[ , 2], tab2[ , 3:7])
tab2 <- tab2[c(1:7, 9:39), ]
categoryCode <- c(rep(25, 7), rep(26, 31))
category <- c(rep("Subsidy", 7), rep(labs[8], 31))
tab2 <- data.frame(categoryCode, category, tab2)
tab2$category <- str_to_title(tab2$category)
names(tab2) <- c("categoryCode", "category",
"itemCode", "item",
"actual_2016_2017",
"budget_2017_2018", "outturn_2017_2018",
"budget_2018_2019", "projection_2019_2020",
"projection_2020_2021")
tab2[ , 4] <- as.character(tab2[ , 4])
tab2[ , 5] <- as.numeric(as.character(tab2[ , 5]))
tab2[ , 6] <- as.numeric(as.character(tab2[ , 6]))
tab2[ , 7] <- as.numeric(as.character(tab2[ , 7]))
tab2[ , 8] <- as.numeric(as.character(tab2[ , 8]))
tab2[ , 9] <- as.numeric(as.character(tab2[ , 9]))
tab2[ , 10] <- as.numeric(as.character(tab2[ , 10]))
################################################################################
tab3 <- health2[[3]][6:46, ]
tab3[ , 1] <- str_replace_all(string = tab3[ , 1], pattern = " – ", replacement = " ")
tab3[ , 1] <- str_replace_all(string = tab3[ , 1], pattern = "–", replacement = " ")
tab3[ , 1] <- str_replace_all(string = tab3[ , 1], pattern = "-", replacement = " ")
tab3[ , 1] <- str_replace_all(string = tab3[ , 1], pattern = "[[:punct:]]", replacement = "")
tab3[ , 2] <- str_replace_all(string = tab3[ , 2], pattern = "[[:punct:]]", replacement = "")
tab3[ , 3] <- str_replace_all(string = tab3[ , 3], pattern = "[[:punct:]]", replacement = "")
tab3[ , 4] <- str_replace_all(string = tab3[ , 4], pattern = "[[:punct:]]", replacement = "")
tab3[ , 5] <- str_replace_all(string = tab3[ , 5], pattern = "[[:punct:]]", replacement = "")
tab3[ , 6] <- str_replace_all(string = tab3[ , 6], pattern = "[[:punct:]]", replacement = "")
tab3[1, 1] <- paste(tab3[1, 1], tab3[2, 1], sep = " ")
tab3[14, 1] <- paste(tab3[14, 1], tab3[15, 1], sep = " ")
tab3[23, 1] <- paste(tab3[23, 1], tab3[24, 1], sep = " ")
tab3[41, 1] <- paste(tab3[41, 1], "County", sep = " ")
tab3 <- tab3[c(1, 3:14, 16:23, 25:41), ]
y <- str_split_fixed(string = tab3[ , 6], pattern = " ", n = 2)
tab3 <- cbind(tab3[ , 1:5], y)
x <- str_split_fixed(string = tab3[ , 1], pattern = " ", n = 8)
labs <- NULL
df <- matrix(data = NA, nrow = nrow(x), ncol = 2)
for(i in 1:(nrow(x))) {
words <- x[i, ][str_detect(string = x[i, ], pattern = "^[a-zA-Z]+$")]
phrase <- ""
for(j in 1:length(words)){
phrase <- paste(phrase, words[j], sep = " ")
}
phrase <- str_remove(string = phrase, pattern = " ")
labs <- c(labs, phrase)
numbers <- x[i, ][str_detect(string = x[i, ],
pattern = "^(\\d{1}|\\d{2}|\\d{3}|\\d{4}|\\d{5}|\\d{6}|\\d{7}|\\d{8}|\\d{9}|\\d{10})$")]
numbers <- as.numeric(numbers)
df[i, ] <- numbers
}
tab3 <- data.frame(df[ , 1], labs, df[ , 2], tab3[ , 3:7])
categoryCode <- rep(26, nrow(tab3))
category <- rep("Grants", nrow(tab3))
tab3 <- data.frame(categoryCode, category, tab3)
tab3$category <- str_to_title(tab3$category)
names(tab3) <- c("categoryCode", "category",
"itemCode", "item",
"actual_2016_2017",
"budget_2017_2018", "outturn_2017_2018",
"budget_2018_2019", "projection_2019_2020",
"projection_2020_2021")
tab3[ , 4] <- as.character(tab3[ , 4])
tab3[ , 5] <- as.numeric(as.character(tab3[ , 5]))
tab3[ , 6] <- as.numeric(as.character(tab3[ , 6]))
tab3[ , 7] <- as.numeric(as.character(tab3[ , 7]))
tab3[ , 8] <- as.numeric(as.character(tab3[ , 8]))
tab3[ , 9] <- as.numeric(as.character(tab3[ , 9]))
tab3[ , 10] <- as.numeric(as.character(tab3[ , 10]))
################################################################################
tab4 <- health2[[4]][6:24, ]
tab4[1, 1] <- paste(tab4[1, 1], tab4[2, 1], sep = " ")
tab4 <- tab4[c(1, 3:19), ]
tab4[ , 1] <- str_replace_all(string = tab4[ , 1], pattern = " – ", replacement = " ")
tab4[ , 1] <- str_replace_all(string = tab4[ , 1], pattern = "–", replacement = " ")
tab4[ , 1] <- str_replace_all(string = tab4[ , 1], pattern = "-", replacement = " ")
tab4[ , 1] <- str_replace_all(string = tab4[ , 1], pattern = "[[:punct:]]", replacement = "")
tab4[ , 2] <- str_replace_all(string = tab4[ , 2], pattern = "[[:punct:]]", replacement = "")
tab4[ , 3] <- str_replace_all(string = tab4[ , 3], pattern = "[[:punct:]]", replacement = "")
tab4[ , 4] <- str_replace_all(string = tab4[ , 4], pattern = "[[:punct:]]", replacement = "")
tab4[ , 5] <- str_replace_all(string = tab4[ , 5], pattern = "[[:punct:]]", replacement = "")
tab4[ , 6] <- str_replace_all(string = tab4[ , 6], pattern = "[[:punct:]]", replacement = "")
y <- str_split_fixed(string = tab4[ , 6], pattern = " ", n = 2)
tab4 <- cbind(tab4[ , 1:5], y)
x <- str_split_fixed(string = tab4[ , 1], pattern = " ", n = 8)
labs <- NULL
df <- matrix(data = NA, nrow = nrow(x), ncol = 2)
for(i in 1:(nrow(x))) {
words <- x[i, ][str_detect(string = x[i, ], pattern = "^[a-zA-Z]+$")]
phrase <- ""
for(j in 1:length(words)){
phrase <- paste(phrase, words[j], sep = " ")
}
phrase <- str_remove(string = phrase, pattern = " ")
labs <- c(labs, phrase)
numbers <- x[i, ][str_detect(string = x[i, ],
pattern = "^(\\d{1}|\\d{2}|\\d{3}|\\d{4}|\\d{5}|\\d{6}|\\d{7}|\\d{8}|\\d{9}|\\d{10})$")]
numbers <- as.numeric(numbers)
df[i, ] <- numbers
}
tab4 <- data.frame(df[ , 1], labs, df[ , 2], tab4[ , 3:7])
tab4 <- tab4[c(1:16, 18), ]
categoryCode <- c(rep(26, 16), 31)
category <- c(rep("Grants", 16), labs[17])
tab4 <- data.frame(categoryCode, category, tab4)
tab4$category <- str_to_title(tab4$category)
names(tab4) <- c("categoryCode", "category",
"itemCode", "item",
"actual_2016_2017",
"budget_2017_2018", "outturn_2017_2018",
"budget_2018_2019", "projection_2019_2020",
"projection_2020_2021")
tab4[ , 4] <- as.character(tab4[ , 4])
tab4[ , 5] <- as.numeric(as.character(tab4[ , 5]))
tab4[ , 6] <- as.numeric(as.character(tab4[ , 6]))
tab4[ , 7] <- as.numeric(as.character(tab4[ , 7]))
tab4[ , 8] <- as.numeric(as.character(tab4[ , 8]))
tab4[ , 9] <- as.numeric(as.character(tab4[ , 9]))
tab4[ , 10] <- as.numeric(as.character(tab4[ , 10]))
################################################################################
mohHealthEcon2018 <- data.frame(rbind(tab1, tab2, tab3, tab4))
usethis::use_data(mohHealthEcon2018, overwrite = TRUE)
################################################################################
tab5 <- health2[[4]][30:44, ]
tab5[ , 1] <- str_replace_all(string = tab5[ , 1], pattern = "-", replacement = " ")
tab5[ , 1] <- str_replace_all(string = tab5[ , 1], pattern = "[[:punct:]]", replacement = "")
tab5[ , 2] <- str_replace_all(string = tab5[ , 2], pattern = "[[:punct:]]", replacement = "")
tab5[ , 3] <- str_replace_all(string = tab5[ , 3], pattern = "[[:punct:]]", replacement = "")
tab5[ , 4] <- str_replace_all(string = tab5[ , 4], pattern = "[[:punct:]]", replacement = "")
tab5[ , 5] <- str_replace_all(string = tab5[ , 5], pattern = "[[:punct:]]", replacement = "")
tab5[ , 6] <- str_replace_all(string = tab5[ , 6], pattern = "[[:punct:]]", replacement = "")
x <- str_split_fixed(string = tab5[ , 1], pattern = " ", n = 5)
y <- str_split_fixed(string = tab5[ , 6], pattern = " ", n = 2)
tab5 <- cbind(tab5[ , 1:5], y)
labs <- NULL
df <- matrix(data = NA, nrow = nrow(x), ncol = 2)
for(i in 1:(nrow(x))) {
words <- x[i, ][str_detect(string = x[i, ], pattern = "^[a-zA-Z]+$")]
phrase <- ""
for(j in 1:length(words)){
phrase <- paste(phrase, words[j], sep = " ")
}
phrase <- str_remove(string = phrase, pattern = " ")
labs <- c(labs, phrase)
numbers <- x[i, ][str_detect(string = x[i, ],
pattern = "^(\\d{1}|\\d{2}|\\d{3}|\\d{4}|\\d{5}|\\d{6}|\\d{7}|\\d{8}|\\d{9}|\\d{10})$")]
numbers <- as.numeric(numbers)
df[i, ] <- numbers
}
tab5 <- data.frame(df[ , 1], labs, df[ , 2], tab5[ , 3:7])
tab5$labs <- str_to_title(tab5$labs)
names(tab5) <- c("countyCode", "county",
"actual_2016_2017",
"budget_2017_2018", "outturn_2017_2018",
"budget_2018_2019", "projection_2019_2020",
"projection_2020_2021")
tab5[ , 4] <- as.numeric(as.character(tab5[ , 4]))
tab5[ , 5] <- as.numeric(as.character(tab5[ , 5]))
tab5[ , 6] <- as.numeric(as.character(tab5[ , 6]))
tab5[ , 7] <- as.numeric(as.character(tab5[ , 7]))
tab5[ , 8] <- as.numeric(as.character(tab5[ , 8]))
################################################################################
mohHealthCounty2018 <- tab5
usethis::use_data(mohHealthCounty2018, overwrite = TRUE)
################################################################################
tab6 <- health2[[5]][14:48, ]
tab6[ , 1] <- str_replace_all(string = tab6[ , 1], pattern = " - ", replacement = " ")
tab6[ , 1] <- str_replace_all(string = tab6[ , 1], pattern = "-", replacement = " ")
tab6[ , 1] <- str_replace_all(string = tab6[ , 1], pattern = "[[:punct:]]", replacement = "")
tab6[ , 2] <- str_replace_all(string = tab6[ , 2], pattern = "[[:punct:]]", replacement = "")
tab6[ , 3] <- str_replace_all(string = tab6[ , 3], pattern = "[[:punct:]]", replacement = "")
tab6[ , 4] <- str_replace_all(string = tab6[ , 4], pattern = "[[:punct:]]", replacement = "")
tab6[7, 1] <- paste(tab6[7, 1], tab6[8, 1], sep = " ")
tab6[9, 1] <- paste(tab6[9, 1], tab6[10, 1], sep = " ")
tab6[11, 1] <- paste(tab6[11, 1], tab6[12, 1], sep = " ")
tab6[16, 1] <- paste(tab6[16, 1], tab6[17, 1], sep = " ")
tab6[23, 1] <- paste(tab6[23, 1], tab6[24, 1], sep = " ")
tab6[28, 1] <- paste(tab6[28, 1], tab6[29, 1], sep = " ")
tab6[31, 1] <- paste(tab6[31, 1], tab6[32, 1], sep = " ")
tab6 <- tab6[c(1:7, 9, 11, 13:16, 18:23, 25:28, 30:31, 33:35), ]
tab6 <- paste(tab6[ , 1], tab6[ , 2], tab6[ , 3], tab6[ , 4], sep = " ")
x <- str_split_fixed(string = tab6, pattern = " ", n = 13)
labs <- NULL
df <- matrix(data = NA, nrow = nrow(x), ncol = 7)
for(i in 1:(nrow(x))) {
words <- x[i, ][str_detect(string = x[i, ], pattern = "^[a-zA-Z]+$")]
phrase <- ""
for(j in 1:length(words)){
phrase <- paste(phrase, words[j], sep = " ")
}
phrase <- str_remove(string = phrase, pattern = " ")
labs <- c(labs, phrase)
numbers <- x[i, ][str_detect(string = x[i, ],
pattern = "^(\\d{1}|\\d{2}|\\d{3}|\\d{4}|\\d{5}|\\d{6}|\\d{7}|\\d{8}|\\d{9}|\\d{10})$")]
numbers <- as.numeric(numbers)
df[i, ] <- numbers
}
tab6a <- cbind(df[ , 1], labs, df[ , 2:7])
tab6 <- health2[[6]][5:50, ]
tab6[ , 1] <- str_replace_all(string = tab6[ , 1], pattern = " - ", replacement = " ")
tab6[ , 1] <- str_replace_all(string = tab6[ , 1], pattern = "-", replacement = " ")
tab6[ , 1] <- str_replace_all(string = tab6[ , 1], pattern = "[[:punct:]]", replacement = "")
tab6[ , 2] <- str_replace_all(string = tab6[ , 2], pattern = "[[:punct:]]", replacement = "")
tab6[ , 3] <- str_replace_all(string = tab6[ , 3], pattern = "[[:punct:]]", replacement = "")
tab6[ , 4] <- str_replace_all(string = tab6[ , 4], pattern = "[[:punct:]]", replacement = "")
tab6[ , 5] <- str_replace_all(string = tab6[ , 5], pattern = "[[:punct:]]", replacement = "")
tab6[ , 6] <- str_replace_all(string = tab6[ , 6], pattern = "[[:punct:]]", replacement = "")
tab6[4, 1] <- paste(tab6[4, 1], tab6[5, 1], sep = " ")
tab6[11, 1] <- paste(tab6[11, 1], tab6[12, 1], sep = " ")
tab6[14, 1] <- paste(tab6[14, 1], tab6[15, 1], sep = " ")
tab6[19, 1] <- paste(tab6[19, 1], tab6[20, 1], sep = " ")
tab6[22, 1] <- paste(tab6[22, 1], tab6[23, 1], sep = " ")
tab6[24, 1] <- paste(tab6[24, 1], tab6[25, 1], sep = " ")
tab6[27, 1] <- paste(tab6[27, 1], tab6[28, 1], sep = " ")
tab6[33, 1] <- paste(tab6[33, 1], tab6[34, 1], sep = " ")
tab6[35, 1] <- paste(tab6[35, 1], tab6[36, 1], sep = " ")
tab6[37, 1] <- paste(tab6[37, 1], tab6[38, 1], sep = " ")
tab6[40, 1] <- paste(tab6[40, 1], tab6[41, 1], sep = " ")
tab6[44, 1] <- paste(tab6[44, 1], tab6[45, 1], sep = " ")
tab6 <- tab6[c(1:4, 6:11, 13:14, 16:19, 21:22, 24, 26:27, 29:33, 35, 37, 39:40, 42:44, 46), ]
tab6 <- paste(tab6[ , 1], tab6[ , 2], tab6[ , 3], tab6[ , 4], tab6[ , 5], tab6[ , 6], sep = " ")
x <- str_split_fixed(string = tab6, pattern = " ", n = 13)
labs <- NULL
df <- matrix(data = NA, nrow = nrow(x), ncol = 7)
for(i in 1:(nrow(x))) {
words <- x[i, ][str_detect(string = x[i, ], pattern = "^[a-zA-Z]+$")]
phrase <- ""
for(j in 1:length(words)){
phrase <- paste(phrase, words[j], sep = " ")
}
phrase <- str_remove(string = phrase, pattern = " ")
labs <- c(labs, phrase)
numbers <- x[i, ][str_detect(string = x[i, ],
pattern = "^(\\d{1}|\\d{2}|\\d{3}|\\d{4}|\\d{5}|\\d{6}|\\d{7}|\\d{8}|\\d{9}|\\d{10})$")]
numbers <- as.numeric(numbers)
df[i, ] <- numbers
}
tab6b <- cbind(df[ , 1], labs, df[ , 2:7])
tab6 <- health2[[7]][5:48, ]
tab6[ , 1] <- str_replace_all(string = tab6[ , 1], pattern = " - ", replacement = " ")
tab6[ , 1] <- str_replace_all(string = tab6[ , 1], pattern = "-", replacement = " ")
tab6[ , 1] <- str_replace_all(string = tab6[ , 1], pattern = "[[:punct:]]", replacement = "")
tab6[ , 2] <- str_replace_all(string = tab6[ , 2], pattern = "[[:punct:]]", replacement = "")
tab6[ , 3] <- str_replace_all(string = tab6[ , 3], pattern = "[[:punct:]]", replacement = "")
tab6[ , 4] <- str_replace_all(string = tab6[ , 4], pattern = "[[:punct:]]", replacement = "")
tab6[ , 5] <- str_replace_all(string = tab6[ , 5], pattern = "[[:punct:]]", replacement = "")
tab6[ , 6] <- str_replace_all(string = tab6[ , 6], pattern = "[[:punct:]]", replacement = "")
tab6[1, 1] <- paste(tab6[1, 1], tab6[2, 1], sep = " ")
tab6[3, 1] <- paste(tab6[3, 1], tab6[4, 1], sep = " ")
tab6[6, 1] <- paste(tab6[6, 1], tab6[7, 1], sep = " ")
tab6[12, 1] <- paste(tab6[12, 1], tab6[13, 1], sep = " ")
tab6[16, 1] <- paste(tab6[16, 1], tab6[17, 1], sep = " ")
tab6[19, 1] <- paste(tab6[19, 1], tab6[20, 1], sep = " ")
tab6[28, 1] <- paste(tab6[28, 1], tab6[29, 1], sep = " ")
tab6[30, 1] <- paste(tab6[30, 1], tab6[31, 1], sep = " ")
tab6[36, 1] <- paste(tab6[36, 1], tab6[37, 1], sep = " ")
tab6 <- tab6[c(1, 3, 5:6, 8:12, 14:16, 18:19, 21:28, 30, 32:36, 38:44), ]
tab6 <- paste(tab6[ , 1], tab6[ , 2], tab6[ , 3], tab6[ , 4], tab6[ , 5], tab6[ , 6], sep = " ")
x <- str_split_fixed(string = tab6, pattern = " ", n = 14)
labs <- NULL
df <- matrix(data = NA, nrow = nrow(x), ncol = 7)
for(i in 1:(nrow(x))) {
words <- x[i, ][str_detect(string = x[i, ], pattern = "^[a-zA-Z]+$")]
phrase <- ""
for(j in 1:length(words)){
phrase <- paste(phrase, words[j], sep = " ")
}
phrase <- str_remove(string = phrase, pattern = " ")
labs <- c(labs, phrase)
numbers <- x[i, ][str_detect(string = x[i, ],
pattern = "^(\\d{1}|\\d{2}|\\d{3}|\\d{4}|\\d{5}|\\d{6}|\\d{7}|\\d{8}|\\d{9}|\\d{10})$")]
numbers <- as.numeric(numbers)
df[i, ] <- numbers
}
tab6c <- cbind(df[ , 1], labs, df[ , 2:7])
tab6 <- rbind(tab6a, tab6b, tab6c)
tab6 <- tab6[c(3:4, 6:16, 18:30, 32:97), ]
departmentCode <- rep(100, nrow(tab6))
department <- rep("Curative Servives", nrow(tab6))
categoryCode <- c(rep(21, 2), rep(22, 11), rep(25, 13), rep(26, 66))
category <- c(rep("Compensation Of Employees", 2),
rep("Use Of Goods And Services", 11),
rep("Subsidy", 13),
rep("Grants", 66))
tab6 <- data.frame(departmentCode, department, categoryCode, category, tab6)
tab6$department <- str_to_title(string = tab6$department)
tab6$category <- str_to_title(string = tab6$category)
names(tab6) <- c("departmentCode", "department",
"categoryCode", "category",
"itemCode", "item",
"actual_2016_2017",
"budget_2017_2018", "outturn_2017_2018",
"budget_2018_2019", "projection_2019_2020",
"projection_2020_2021")
tab6[ , 5] <- as.numeric(as.character(tab6[ , 5]))
tab6[ , 6] <- as.character(tab6[ , 6])
tab6[ , 7] <- as.numeric(as.character(tab6[ , 7]))
tab6[ , 8] <- as.numeric(as.character(tab6[ , 8]))
tab6[ , 9] <- as.numeric(as.character(tab6[ , 9]))
tab6[ , 10] <- as.numeric(as.character(tab6[ , 10]))
tab6[ , 11] <- as.numeric(as.character(tab6[ , 11]))
tab6[ , 12] <- as.numeric(as.character(tab6[ , 12]))
################################################################################
mohHealthCurative2018 <- tab6
usethis::use_data(mohHealthCurative2018, overwrite = TRUE)
################################################################################
tab7 <- health2[[8]][12:28, ]
tab7[ , 1] <- str_replace_all(string = tab7[ , 1], pattern = " - ", replacement = " ")
tab7[ , 1] <- str_replace_all(string = tab7[ , 1], pattern = "-", replacement = " ")
tab7[ , 1] <- str_replace_all(string = tab7[ , 1], pattern = "[[:punct:]]", replacement = "")
tab7[ , 2] <- str_replace_all(string = tab7[ , 2], pattern = "[[:punct:]]", replacement = "")
tab7[ , 3] <- str_replace_all(string = tab7[ , 3], pattern = "[[:punct:]]", replacement = "")
tab7[ , 4] <- str_replace_all(string = tab7[ , 4], pattern = "[[:punct:]]", replacement = "")
tab7[ , 5] <- str_replace_all(string = tab7[ , 5], pattern = "[[:punct:]]", replacement = "")
tab7[ , 6] <- str_replace_all(string = tab7[ , 6], pattern = "[[:punct:]]", replacement = "")
tab7[5, 1] <- paste(tab7[5, 1], tab7[6, 1], sep = " ")
tab7[10, 1] <- paste(tab7[10, 1], tab7[11, 1], sep = " ")
tab7[16, 1] <- paste(tab7[16, 1], tab7[17, 1], sep = " ")
tab7 <- tab7[c(1:5, 7:10, 12:16), ]
tab7 <- paste(tab7[ , 1], tab7[ , 2], tab7[ , 3], tab7[ , 4], tab7[ , 5], tab7[ , 6], sep = " ")
x <- str_split_fixed(string = tab7, pattern = " ", n = 13)
labs <- NULL
df <- matrix(data = NA, nrow = nrow(x), ncol = 7)
for(i in 1:(nrow(x))) {
words <- x[i, ][str_detect(string = x[i, ], pattern = "^[a-zA-Z]+$")]
phrase <- ""
for(j in 1:length(words)){
phrase <- paste(phrase, words[j], sep = " ")
}################################################################################
#
# Load required libraries
#
################################################################################
library(pdftools)
library(tm)
library(tabulizer)
library(stringr)
library(tidyverse)
library(tidytext)
options(scipen = 999)
################################################################################
#
# Function to process tables
#
################################################################################
get_table <- function(tab, nrow = NULL, ncol = NULL) {
labs <- NULL
df <- matrix(data = NA, nrow = nrow, ncol = ncol)
for(i in 1:(length(tab))) {
words <- tab[[i]][str_detect(string = tab[[i]], pattern = "^[a-zA-Z]+$")]
phrase <- ""
for(j in 1:length(words)){
phrase <- paste(phrase, words[j], sep = " ")
}
phrase <- str_remove(string = phrase, pattern = " ")
labs <- c(labs, phrase)
numbers <- tab[[i]][str_detect(string = tab[[i]],
pattern = "^(\\d{6}|\\d{7}|\\d{8})$")]
numbers <- as.numeric(numbers)
df[i, ] <- numbers
}
}
################################################################################
#
# Create list for information and tables in page 253 (2018)
#
################################################################################
## Extract tables from page 253
health1 <- extract_tables(file = "data-raw/budget/2018 Ministry of Finance and Development Planning.pdf",
pages = 253,
method = "decide")
#goal <- health1[[1]][3, ]
#strategicObjective <- str_c(health1[[1]][5, ], health1[[1]][6, ], health1[[1]][7, ], sep = " ")
## Extract first table in page 253
tab1 <- health1[[1]][27:31, ]
tab1 <- paste(tab1[ , 1], tab1[ , 2], sep = " ")
tab1 <- str_replace_all(string = tab1, pattern = "[[:punct:]]", replace = "")
tab1 <- str_split_fixed(string = tab1, pattern = " ", n = 12)
## Extract relevant fields to re-create first table in page 255 and create data.frame
economic_classification <- NULL
df <- matrix(data = NA, nrow = 5, ncol = 7)
for(i in 1:(nrow(tab1))) {
words <- tab1[i, ][str_detect(string = tab1[i, ], pattern = "^[a-zA-Z]+$")]
phrase <- ""
for(j in 1:length(words)){
phrase <- paste(phrase, words[j], sep = " ")
}
phrase <- str_remove(string = phrase, pattern = " ")
economic_classification <- c(economic_classification, phrase)
numbers <- tab1[i, ][str_detect(string = tab1[i,],
pattern = "^(\\d{1}|\\d{2}|\\d{3}|\\d{4}|\\d{5}|\\d{6}|\\d{7}|\\d{8}|\\d{9}|\\d{10})$")]
numbers <- as.numeric(numbers)
df[i, ] <- numbers
}
df <- data.frame(df[ , 1], economic_classification, df[ , 2:7])
names(df) <- c("code", "economic_classification",
"actual_2016_2017",
"budget_2017_2018", "outturn_2017_2018",
"budget_2018_2019", "projection_2019_2020",
"projection_2020_2021")
df$economic_classification <- str_to_sentence(df$economic_classification)
summaryEconHealth2018 <- df
## Extract second table in page 255
tab2 <- health1[[1]][37:46, ]
tab2 <- paste(tab2[ , 1], tab2[ , 2], sep = " ")
tab2 <- str_replace_all(string = tab2, pattern = "[[:punct:]]", replace = "")
tab2 <- str_split_fixed(string = tab2, pattern = " ", n = 12)
## Extract relevant fields to re-create first table in page 255 and create data.frame
spending_entity <- NULL
df <- matrix(data = NA, nrow = 10, ncol = 7)
for(i in 1:nrow(tab2)) {
words <- tab2[i, ][str_detect(string = tab2[i, ], pattern = "^[a-zA-Z]+$")]
phrase <- ""
for(j in 1:length(words)){
phrase <- paste(phrase, words[j], sep = " ")
}
phrase <- str_remove(string = phrase, pattern = " ")
spending_entity <- c(spending_entity, phrase)
numbers <- tab2[i, ][str_detect(string = tab2[i, ],
pattern = "^(\\d{1}|\\d{2}|\\d{3}|\\d{4}|\\d{5}|\\d{6}|\\d{7}|\\d{8}|\\d{9}|\\d{10})$")]
numbers <- as.numeric(numbers)
df[i, ] <- numbers
}
df <- data.frame(df[ , 1], spending_entity, df[ , 2:7])
names(df) <- c("code", "spending_entity",
"actual_2016_2017",
"budget_2017_2018", "outturn_2017_2018",
"budget_2018_2019", "projection_2019_2020",
"projection_2020_2021")
df$spending_entity <- str_to_title(df$spending_entity)
summarySpendingHealth2018 <- df
summaryHealth2018 <- list(summaryEconHealth2018, summarySpendingHealth2018)
names(summaryHealth2018) <- c("summaryEconHealth2018", "summarySpendingHealth2018")
usethis::use_data(summaryHealth2018, overwrite = TRUE)
## Tidy-up
rm(words, phrase, numbers, economic_classification, tab1, tab2, df,
summaryEconHealth2018, summarySpendingHealth2018, health1)
################################################################################
#
#
#
################################################################################
## Extract tables from page 25-266
health2 <- extract_tables(file = "data-raw/budget/2018 Ministry of Finance and Development Planning.pdf",
pages = 254:266,
method = "decide")
tab1 <- health2[[1]][14:48, ]
tab1[ , 1] <- str_replace_all(string = tab1[ , 1], pattern = " – ", replacement = " ")
tab1[ , 1] <- str_replace_all(string = tab1[ , 1], pattern = "–", replacement = " ")
tab1[ , 1] <- str_replace_all(string = tab1[ , 1], pattern = "-", replacement = " ")
tab1[ , 1] <- str_replace_all(string = tab1[ , 1], pattern = "[[:punct:]]", replacement = "")
tab1[ , 2] <- str_replace_all(string = tab1[ , 2], pattern = "[[:punct:]]", replacement = "")
tab1[ , 3] <- str_replace_all(string = tab1[ , 3], pattern = "[[:punct:]]", replacement = "")
tab1[ , 4] <- str_replace_all(string = tab1[ , 4], pattern = "[[:punct:]]", replacement = "")
tab1[ , 5] <- str_replace_all(string = tab1[ , 5], pattern = "[[:punct:]]", replacement = "")
tab1[ , 6] <- str_replace_all(string = tab1[ , 6], pattern = "[[:punct:]]", replacement = "")
tab1[7, 1] <- paste(tab1[7, 1], tab1[8, 1], sep = " ")
tab1[11, 1] <- paste(tab1[11, 1], tab1[12, 1], sep = " ")
tab1 <- tab1[c(1:7, 9:11, 13:35), ]
y <- str_split_fixed(string = tab1[ , 6], pattern = " ", n = 2)
tab1 <- cbind(tab1[ , 1:5], y)
x <- str_split_fixed(string = tab1[ , 1], pattern = " ", n = 8)
labs <- NULL
df <- matrix(data = NA, nrow = nrow(x), ncol = 2)
for(i in 1:(nrow(x))) {
words <- x[i, ][str_detect(string = x[i, ], pattern = "^[a-zA-Z]+$")]
phrase <- ""
for(j in 1:length(words)){
phrase <- paste(phrase, words[j], sep = " ")
}
phrase <- str_remove(string = phrase, pattern = " ")
labs <- c(labs, phrase)
numbers <- x[i, ][str_detect(string = x[i, ],
pattern = "^(\\d{1}|\\d{2}|\\d{3}|\\d{4}|\\d{5}|\\d{6}|\\d{7}|\\d{8}|\\d{9}|\\d{10})$")]
numbers <- as.numeric(numbers)
df[i, ] <- numbers
}
tab1 <- data.frame(df[ , 1], labs, df[ , 2], tab1[ , 3:7])
tab1 <- tab1[c(2:4, 6:26, 28:33), ]
categoryCode <- c(rep(21, 3), rep(22, 21), rep(25, 6))
category <- c(rep(labs[1], 3), rep(labs[5], 21), rep(labs[27], 6))
tab1 <- data.frame(categoryCode, category, tab1)
tab1$category <- str_to_title(tab1$category)
names(tab1) <- c("categoryCode", "category",
"itemCode", "item",
"actual_2016_2017",
"budget_2017_2018", "outturn_2017_2018",
"budget_2018_2019", "projection_2019_2020",
"projection_2020_2021")
tab1[ , 4] <- as.character(tab1[ , 4])
tab1[ , 5] <- as.numeric(as.character(tab1[ , 5]))
tab1[ , 6] <- as.numeric(as.character(tab1[ , 6]))
tab1[ , 7] <- as.numeric(as.character(tab1[ , 7]))
tab1[ , 8] <- as.numeric(as.character(tab1[ , 8]))
tab1[ , 9] <- as.numeric(as.character(tab1[ , 9]))
tab1[ , 10] <- as.numeric(as.character(tab1[ , 10]))
################################################################################
tab2 <- health2[[2]][6:48, ]
tab2[ , 1] <- str_replace_all(string = tab2[ , 1], pattern = " – ", replacement = " ")
tab2[ , 1] <- str_replace_all(string = tab2[ , 1], pattern = "–", replacement = " ")
tab2[ , 1] <- str_replace_all(string = tab2[ , 1], pattern = "-", replacement = " ")
tab2[ , 1] <- str_replace_all(string = tab2[ , 1], pattern = "[[:punct:]]", replacement = "")
tab2[ , 2] <- str_replace_all(string = tab2[ , 2], pattern = "[[:punct:]]", replacement = "")
tab2[ , 3] <- str_replace_all(string = tab2[ , 3], pattern = "[[:punct:]]", replacement = "")
tab2[ , 4] <- str_replace_all(string = tab2[ , 4], pattern = "[[:punct:]]", replacement = "")
tab2[ , 5] <- str_replace_all(string = tab2[ , 5], pattern = "[[:punct:]]", replacement = "")
tab2[ , 6] <- str_replace_all(string = tab2[ , 6], pattern = "[[:punct:]]", replacement = "")
tab2[9, 1] <- paste(tab2[9, 1], tab2[10, 1], sep = " ")
tab2[15, 1] <- paste(tab2[15, 1], tab2[16, 1], sep = " ")
tab2[31, 1] <- paste(tab2[31, 1], tab2[32, 1], sep = " ")
tab2[36, 1] <- paste(tab2[36, 1], tab2[37, 1], sep = " ")
tab2 <- tab2[c(1:9, 11:15, 17:31, 33:36, 38:43), ]
y <- str_split_fixed(string = tab2[ , 6], pattern = " ", n = 2)
tab2 <- cbind(tab2[ , 1:5], y)
x <- str_split_fixed(string = tab2[ , 1], pattern = " ", n = 8)
labs <- NULL
df <- matrix(data = NA, nrow = nrow(x), ncol = 2)
for(i in 1:(nrow(x))) {
words <- x[i, ][str_detect(string = x[i, ], pattern = "^[a-zA-Z]+$")]
phrase <- ""
for(j in 1:length(words)){
phrase <- paste(phrase, words[j], sep = " ")
}
phrase <- str_remove(string = phrase, pattern = " ")
labs <- c(labs, phrase)
numbers <- x[i, ][str_detect(string = x[i, ],
pattern = "^(\\d{1}|\\d{2}|\\d{3}|\\d{4}|\\d{5}|\\d{6}|\\d{7}|\\d{8}|\\d{9}|\\d{10})$")]
numbers <- as.numeric(numbers)
df[i, ] <- numbers
}
tab2 <- data.frame(df[ , 1], labs, df[ , 2], tab2[ , 3:7])
tab2 <- tab2[c(1:7, 9:39), ]
categoryCode <- c(rep(25, 7), rep(26, 31))
category <- c(rep("Subsidy", 7), rep(labs[8], 31))
tab2 <- data.frame(categoryCode, category, tab2)
tab2$category <- str_to_title(tab2$category)
names(tab2) <- c("categoryCode", "category",
"itemCode", "item",
"actual_2016_2017",
"budget_2017_2018", "outturn_2017_2018",
"budget_2018_2019", "projection_2019_2020",
"projection_2020_2021")
tab2[ , 4] <- as.character(tab2[ , 4])
tab2[ , 5] <- as.numeric(as.character(tab2[ , 5]))
tab2[ , 6] <- as.numeric(as.character(tab2[ , 6]))
tab2[ , 7] <- as.numeric(as.character(tab2[ , 7]))
tab2[ , 8] <- as.numeric(as.character(tab2[ , 8]))
tab2[ , 9] <- as.numeric(as.character(tab2[ , 9]))
tab2[ , 10] <- as.numeric(as.character(tab2[ , 10]))
################################################################################
tab3 <- health2[[3]][6:46, ]
tab3[ , 1] <- str_replace_all(string = tab3[ , 1], pattern = " – ", replacement = " ")
tab3[ , 1] <- str_replace_all(string = tab3[ , 1], pattern = "–", replacement = " ")
tab3[ , 1] <- str_replace_all(string = tab3[ , 1], pattern = "-", replacement = " ")
tab3[ , 1] <- str_replace_all(string = tab3[ , 1], pattern = "[[:punct:]]", replacement = "")
tab3[ , 2] <- str_replace_all(string = tab3[ , 2], pattern = "[[:punct:]]", replacement = "")
tab3[ , 3] <- str_replace_all(string = tab3[ , 3], pattern = "[[:punct:]]", replacement = "")
tab3[ , 4] <- str_replace_all(string = tab3[ , 4], pattern = "[[:punct:]]", replacement = "")
tab3[ , 5] <- str_replace_all(string = tab3[ , 5], pattern = "[[:punct:]]", replacement = "")
tab3[ , 6] <- str_replace_all(string = tab3[ , 6], pattern = "[[:punct:]]", replacement = "")
tab3[1, 1] <- paste(tab3[1, 1], tab3[2, 1], sep = " ")
tab3[14, 1] <- paste(tab3[14, 1], tab3[15, 1], sep = " ")
tab3[23, 1] <- paste(tab3[23, 1], tab3[24, 1], sep = " ")
tab3[41, 1] <- paste(tab3[41, 1], "County", sep = " ")
tab3 <- tab3[c(1, 3:14, 16:23, 25:41), ]
y <- str_split_fixed(string = tab3[ , 6], pattern = " ", n = 2)
tab3 <- cbind(tab3[ , 1:5], y)
x <- str_split_fixed(string = tab3[ , 1], pattern = " ", n = 8)
labs <- NULL
df <- matrix(data = NA, nrow = nrow(x), ncol = 2)
for(i in 1:(nrow(x))) {
words <- x[i, ][str_detect(string = x[i, ], pattern = "^[a-zA-Z]+$")]
phrase <- ""
for(j in 1:length(words)){
phrase <- paste(phrase, words[j], sep = " ")
}
phrase <- str_remove(string = phrase, pattern = " ")
labs <- c(labs, phrase)
numbers <- x[i, ][str_detect(string = x[i, ],
pattern = "^(\\d{1}|\\d{2}|\\d{3}|\\d{4}|\\d{5}|\\d{6}|\\d{7}|\\d{8}|\\d{9}|\\d{10})$")]
numbers <- as.numeric(numbers)
df[i, ] <- numbers
}
tab3 <- data.frame(df[ , 1], labs, df[ , 2], tab3[ , 3:7])
categoryCode <- rep(26, nrow(tab3))
category <- rep("Grants", nrow(tab3))
tab3 <- data.frame(categoryCode, category, tab3)
tab3$category <- str_to_title(tab3$category)
names(tab3) <- c("categoryCode", "category",
"itemCode", "item",
"actual_2016_2017",
"budget_2017_2018", "outturn_2017_2018",
"budget_2018_2019", "projection_2019_2020",
"projection_2020_2021")
tab3[ , 4] <- as.character(tab3[ , 4])
tab3[ , 5] <- as.numeric(as.character(tab3[ , 5]))
tab3[ , 6] <- as.numeric(as.character(tab3[ , 6]))
tab3[ , 7] <- as.numeric(as.character(tab3[ , 7]))
tab3[ , 8] <- as.numeric(as.character(tab3[ , 8]))
tab3[ , 9] <- as.numeric(as.character(tab3[ , 9]))
tab3[ , 10] <- as.numeric(as.character(tab3[ , 10]))
################################################################################
tab4 <- health2[[4]][6:24, ]
tab4[1, 1] <- paste(tab4[1, 1], tab4[2, 1], sep = " ")
tab4 <- tab4[c(1, 3:19), ]
tab4[ , 1] <- str_replace_all(string = tab4[ , 1], pattern = " – ", replacement = " ")
tab4[ , 1] <- str_replace_all(string = tab4[ , 1], pattern = "–", replacement = " ")
tab4[ , 1] <- str_replace_all(string = tab4[ , 1], pattern = "-", replacement = " ")
tab4[ , 1] <- str_replace_all(string = tab4[ , 1], pattern = "[[:punct:]]", replacement = "")
tab4[ , 2] <- str_replace_all(string = tab4[ , 2], pattern = "[[:punct:]]", replacement = "")
tab4[ , 3] <- str_replace_all(string = tab4[ , 3], pattern = "[[:punct:]]", replacement = "")
tab4[ , 4] <- str_replace_all(string = tab4[ , 4], pattern = "[[:punct:]]", replacement = "")
tab4[ , 5] <- str_replace_all(string = tab4[ , 5], pattern = "[[:punct:]]", replacement = "")
tab4[ , 6] <- str_replace_all(string = tab4[ , 6], pattern = "[[:punct:]]", replacement = "")
y <- str_split_fixed(string = tab4[ , 6], pattern = " ", n = 2)
tab4 <- cbind(tab4[ , 1:5], y)
x <- str_split_fixed(string = tab4[ , 1], pattern = " ", n = 8)
labs <- NULL
df <- matrix(data = NA, nrow = nrow(x), ncol = 2)
for(i in 1:(nrow(x))) {
words <- x[i, ][str_detect(string = x[i, ], pattern = "^[a-zA-Z]+$")]
phrase <- ""
for(j in 1:length(words)){
phrase <- paste(phrase, words[j], sep = " ")
}
phrase <- str_remove(string = phrase, pattern = " ")
labs <- c(labs, phrase)
numbers <- x[i, ][str_detect(string = x[i, ],
pattern = "^(\\d{1}|\\d{2}|\\d{3}|\\d{4}|\\d{5}|\\d{6}|\\d{7}|\\d{8}|\\d{9}|\\d{10})$")]
numbers <- as.numeric(numbers)
df[i, ] <- numbers
}
tab4 <- data.frame(df[ , 1], labs, df[ , 2], tab4[ , 3:7])
tab4 <- tab4[c(1:16, 18), ]
categoryCode <- c(rep(26, 16), 31)
category <- c(rep("Grants", 16), labs[17])
tab4 <- data.frame(categoryCode, category, tab4)
tab4$category <- str_to_title(tab4$category)
names(tab4) <- c("categoryCode", "category",
"itemCode", "item",
"actual_2016_2017",
"budget_2017_2018", "outturn_2017_2018",
"budget_2018_2019", "projection_2019_2020",
"projection_2020_2021")
tab4[ , 4] <- as.character(tab4[ , 4])
tab4[ , 5] <- as.numeric(as.character(tab4[ , 5]))
tab4[ , 6] <- as.numeric(as.character(tab4[ , 6]))
tab4[ , 7] <- as.numeric(as.character(tab4[ , 7]))
tab4[ , 8] <- as.numeric(as.character(tab4[ , 8]))
tab4[ , 9] <- as.numeric(as.character(tab4[ , 9]))
tab4[ , 10] <- as.numeric(as.character(tab4[ , 10]))
################################################################################
mohHealthEcon2018 <- data.frame(rbind(tab1, tab2, tab3, tab4))
usethis::use_data(mohHealthEcon2018, overwrite = TRUE)
################################################################################
tab5 <- health2[[4]][30:44, ]
tab5[ , 1] <- str_replace_all(string = tab5[ , 1], pattern = "-", replacement = " ")
tab5[ , 1] <- str_replace_all(string = tab5[ , 1], pattern = "[[:punct:]]", replacement = "")
tab5[ , 2] <- str_replace_all(string = tab5[ , 2], pattern = "[[:punct:]]", replacement = "")
tab5[ , 3] <- str_replace_all(string = tab5[ , 3], pattern = "[[:punct:]]", replacement = "")
tab5[ , 4] <- str_replace_all(string = tab5[ , 4], pattern = "[[:punct:]]", replacement = "")
tab5[ , 5] <- str_replace_all(string = tab5[ , 5], pattern = "[[:punct:]]", replacement = "")
tab5[ , 6] <- str_replace_all(string = tab5[ , 6], pattern = "[[:punct:]]", replacement = "")
x <- str_split_fixed(string = tab5[ , 1], pattern = " ", n = 5)
y <- str_split_fixed(string = tab5[ , 6], pattern = " ", n = 2)
tab5 <- cbind(tab5[ , 1:5], y)
labs <- NULL
df <- matrix(data = NA, nrow = nrow(x), ncol = 2)
for(i in 1:(nrow(x))) {
words <- x[i, ][str_detect(string = x[i, ], pattern = "^[a-zA-Z]+$")]
phrase <- ""
for(j in 1:length(words)){
phrase <- paste(phrase, words[j], sep = " ")
}
phrase <- str_remove(string = phrase, pattern = " ")
labs <- c(labs, phrase)
numbers <- x[i, ][str_detect(string = x[i, ],
pattern = "^(\\d{1}|\\d{2}|\\d{3}|\\d{4}|\\d{5}|\\d{6}|\\d{7}|\\d{8}|\\d{9}|\\d{10})$")]
numbers <- as.numeric(numbers)
df[i, ] <- numbers
}
tab5 <- data.frame(df[ , 1], labs, df[ , 2], tab5[ , 3:7])
tab5$labs <- str_to_title(tab5$labs)
names(tab5) <- c("countyCode", "county",
"actual_2016_2017",
"budget_2017_2018", "outturn_2017_2018",
"budget_2018_2019", "projection_2019_2020",
"projection_2020_2021")
tab5[ , 4] <- as.numeric(as.character(tab5[ , 4]))
tab5[ , 5] <- as.numeric(as.character(tab5[ , 5]))
tab5[ , 6] <- as.numeric(as.character(tab5[ , 6]))
tab5[ , 7] <- as.numeric(as.character(tab5[ , 7]))
tab5[ , 8] <- as.numeric(as.character(tab5[ , 8]))
################################################################################
mohHealthCounty2018 <- tab5
usethis::use_data(mohHealthCounty2018, overwrite = TRUE)
################################################################################
tab6 <- health2[[5]][14:48, ]
tab6[ , 1] <- str_replace_all(string = tab6[ , 1], pattern = " - ", replacement = " ")
tab6[ , 1] <- str_replace_all(string = tab6[ , 1], pattern = "-", replacement = " ")
tab6[ , 1] <- str_replace_all(string = tab6[ , 1], pattern = "[[:punct:]]", replacement = "")
tab6[ , 2] <- str_replace_all(string = tab6[ , 2], pattern = "[[:punct:]]", replacement = "")
tab6[ , 3] <- str_replace_all(string = tab6[ , 3], pattern = "[[:punct:]]", replacement = "")
tab6[ , 4] <- str_replace_all(string = tab6[ , 4], pattern = "[[:punct:]]", replacement = "")
tab6[7, 1] <- paste(tab6[7, 1], tab6[8, 1], sep = " ")
tab6[9, 1] <- paste(tab6[9, 1], tab6[10, 1], sep = " ")
tab6[11, 1] <- paste(tab6[11, 1], tab6[12, 1], sep = " ")
tab6[16, 1] <- paste(tab6[16, 1], tab6[17, 1], sep = " ")
tab6[23, 1] <- paste(tab6[23, 1], tab6[24, 1], sep = " ")
tab6[28, 1] <- paste(tab6[28, 1], tab6[29, 1], sep = " ")
tab6[31, 1] <- paste(tab6[31, 1], tab6[32, 1], sep = " ")
tab6 <- tab6[c(1:7, 9, 11, 13:16, 18:23, 25:28, 30:31, 33:35), ]
tab6 <- paste(tab6[ , 1], tab6[ , 2], tab6[ , 3], tab6[ , 4], sep = " ")
x <- str_split_fixed(string = tab6, pattern = " ", n = 13)
labs <- NULL
df <- matrix(data = NA, nrow = nrow(x), ncol = 7)
for(i in 1:(nrow(x))) {
words <- x[i, ][str_detect(string = x[i, ], pattern = "^[a-zA-Z]+$")]
phrase <- ""
for(j in 1:length(words)){
phrase <- paste(phrase, words[j], sep = " ")
}
phrase <- str_remove(string = phrase, pattern = " ")
labs <- c(labs, phrase)
numbers <- x[i, ][str_detect(string = x[i, ],
pattern = "^(\\d{1}|\\d{2}|\\d{3}|\\d{4}|\\d{5}|\\d{6}|\\d{7}|\\d{8}|\\d{9}|\\d{10})$")]
numbers <- as.numeric(numbers)
df[i, ] <- numbers
}
tab6a <- cbind(df[ , 1], labs, df[ , 2:7])
tab6 <- health2[[6]][5:50, ]
tab6[ , 1] <- str_replace_all(string = tab6[ , 1], pattern = " - ", replacement = " ")
tab6[ , 1] <- str_replace_all(string = tab6[ , 1], pattern = "-", replacement = " ")
tab6[ , 1] <- str_replace_all(string = tab6[ , 1], pattern = "[[:punct:]]", replacement = "")
tab6[ , 2] <- str_replace_all(string = tab6[ , 2], pattern = "[[:punct:]]", replacement = "")
tab6[ , 3] <- str_replace_all(string = tab6[ , 3], pattern = "[[:punct:]]", replacement = "")
tab6[ , 4] <- str_replace_all(string = tab6[ , 4], pattern = "[[:punct:]]", replacement = "")
tab6[ , 5] <- str_replace_all(string = tab6[ , 5], pattern = "[[:punct:]]", replacement = "")
tab6[ , 6] <- str_replace_all(string = tab6[ , 6], pattern = "[[:punct:]]", replacement = "")
tab6[4, 1] <- paste(tab6[4, 1], tab6[5, 1], sep = " ")
tab6[11, 1] <- paste(tab6[11, 1], tab6[12, 1], sep = " ")
tab6[14, 1] <- paste(tab6[14, 1], tab6[15, 1], sep = " ")
tab6[19, 1] <- paste(tab6[19, 1], tab6[20, 1], sep = " ")
tab6[22, 1] <- paste(tab6[22, 1], tab6[23, 1], sep = " ")
tab6[24, 1] <- paste(tab6[24, 1], tab6[25, 1], sep = " ")
tab6[27, 1] <- paste(tab6[27, 1], tab6[28, 1], sep = " ")
tab6[33, 1] <- paste(tab6[33, 1], tab6[34, 1], sep = " ")
tab6[35, 1] <- paste(tab6[35, 1], tab6[36, 1], sep = " ")
tab6[37, 1] <- paste(tab6[37, 1], tab6[38, 1], sep = " ")
tab6[40, 1] <- paste(tab6[40, 1], tab6[41, 1], sep = " ")
tab6[44, 1] <- paste(tab6[44, 1], tab6[45, 1], sep = " ")
tab6 <- tab6[c(1:4, 6:11, 13:14, 16:19, 21:22, 24, 26:27, 29:33, 35, 37, 39:40, 42:44, 46), ]
tab6 <- paste(tab6[ , 1], tab6[ , 2], tab6[ , 3], tab6[ , 4], tab6[ , 5], tab6[ , 6], sep = " ")
x <- str_split_fixed(string = tab6, pattern = " ", n = 13)
labs <- NULL
df <- matrix(data = NA, nrow = nrow(x), ncol = 7)
for(i in 1:(nrow(x))) {
words <- x[i, ][str_detect(string = x[i, ], pattern = "^[a-zA-Z]+$")]
phrase <- ""
for(j in 1:length(words)){
phrase <- paste(phrase, words[j], sep = " ")
}
phrase <- str_remove(string = phrase, pattern = " ")
labs <- c(labs, phrase)
numbers <- x[i, ][str_detect(string = x[i, ],
pattern = "^(\\d{1}|\\d{2}|\\d{3}|\\d{4}|\\d{5}|\\d{6}|\\d{7}|\\d{8}|\\d{9}|\\d{10})$")]
numbers <- as.numeric(numbers)
df[i, ] <- numbers
}
tab6b <- cbind(df[ , 1], labs, df[ , 2:7])
tab6 <- health2[[7]][5:48, ]
tab6[ , 1] <- str_replace_all(string = tab6[ , 1], pattern = " - ", replacement = " ")
tab6[ , 1] <- str_replace_all(string = tab6[ , 1], pattern = "-", replacement = " ")
tab6[ , 1] <- str_replace_all(string = tab6[ , 1], pattern = "[[:punct:]]", replacement = "")
tab6[ , 2] <- str_replace_all(string = tab6[ , 2], pattern = "[[:punct:]]", replacement = "")
tab6[ , 3] <- str_replace_all(string = tab6[ , 3], pattern = "[[:punct:]]", replacement = "")
tab6[ , 4] <- str_replace_all(string = tab6[ , 4], pattern = "[[:punct:]]", replacement = "")
tab6[ , 5] <- str_replace_all(string = tab6[ , 5], pattern = "[[:punct:]]", replacement = "")
tab6[ , 6] <- str_replace_all(string = tab6[ , 6], pattern = "[[:punct:]]", replacement = "")
tab6[1, 1] <- paste(tab6[1, 1], tab6[2, 1], sep = " ")
tab6[3, 1] <- paste(tab6[3, 1], tab6[4, 1], sep = " ")
tab6[6, 1] <- paste(tab6[6, 1], tab6[7, 1], sep = " ")
tab6[12, 1] <- paste(tab6[12, 1], tab6[13, 1], sep = " ")
tab6[16, 1] <- paste(tab6[16, 1], tab6[17, 1], sep = " ")
tab6[19, 1] <- paste(tab6[19, 1], tab6[20, 1], sep = " ")
tab6[28, 1] <- paste(tab6[28, 1], tab6[29, 1], sep = " ")
tab6[30, 1] <- paste(tab6[30, 1], tab6[31, 1], sep = " ")
tab6[36, 1] <- paste(tab6[36, 1], tab6[37, 1], sep = " ")
tab6 <- tab6[c(1, 3, 5:6, 8:12, 14:16, 18:19, 21:28, 30, 32:36, 38:44), ]
tab6 <- paste(tab6[ , 1], tab6[ , 2], tab6[ , 3], tab6[ , 4], tab6[ , 5], tab6[ , 6], sep = " ")
x <- str_split_fixed(string = tab6, pattern = " ", n = 14)
labs <- NULL
df <- matrix(data = NA, nrow = nrow(x), ncol = 7)
for(i in 1:(nrow(x))) {
words <- x[i, ][str_detect(string = x[i, ], pattern = "^[a-zA-Z]+$")]
phrase <- ""
for(j in 1:length(words)){
phrase <- paste(phrase, words[j], sep = " ")
}
phrase <- str_remove(string = phrase, pattern = " ")
labs <- c(labs, phrase)
numbers <- x[i, ][str_detect(string = x[i, ],
pattern = "^(\\d{1}|\\d{2}|\\d{3}|\\d{4}|\\d{5}|\\d{6}|\\d{7}|\\d{8}|\\d{9}|\\d{10})$")]
numbers <- as.numeric(numbers)
df[i, ] <- numbers
}
tab6c <- cbind(df[ , 1], labs, df[ , 2:7])
tab6 <- rbind(tab6a, tab6b, tab6c)
tab6 <- tab6[c(3:4, 6:16, 18:30, 32:97), ]
departmentCode <- rep(100, nrow(tab6))
department <- rep("Curative Servives", nrow(tab6))
categoryCode <- c(rep(21, 2), rep(22, 11), rep(25, 13), rep(26, 66))
category <- c(rep("Compensation Of Employees", 2),
rep("Use Of Goods And Services", 11),
rep("Subsidy", 13),
rep("Grants", 66))
tab6 <- data.frame(departmentCode, department, categoryCode, category, tab6)
tab6$department <- str_to_title(string = tab6$department)
tab6$category <- str_to_title(string = tab6$category)
names(tab6) <- c("departmentCode", "department",
"categoryCode", "category",
"itemCode", "item",
"actual_2016_2017",
"budget_2017_2018", "outturn_2017_2018",
"budget_2018_2019", "projection_2019_2020",
"projection_2020_2021")
tab6[ , 5] <- as.numeric(as.character(tab6[ , 5]))
tab6[ , 6] <- as.character(tab6[ , 6])
tab6[ , 7] <- as.numeric(as.character(tab6[ , 7]))
tab6[ , 8] <- as.numeric(as.character(tab6[ , 8]))
tab6[ , 9] <- as.numeric(as.character(tab6[ , 9]))
tab6[ , 10] <- as.numeric(as.character(tab6[ , 10]))
tab6[ , 11] <- as.numeric(as.character(tab6[ , 11]))
tab6[ , 12] <- as.numeric(as.character(tab6[ , 12]))
################################################################################
mohHealthCurative2018 <- tab6
usethis::use_data(mohHealthCurative2018, overwrite = TRUE)
################################################################################
tab7 <- health2[[8]][12:28, ]
tab7[ , 1] <- str_replace_all(string = tab7[ , 1], pattern = " - ", replacement = " ")
tab7[ , 1] <- str_replace_all(string = tab7[ , 1], pattern = "-", replacement = " ")
tab7[ , 1] <- str_replace_all(string = tab7[ , 1], pattern = "[[:punct:]]", replacement = "")
tab7[ , 2] <- str_replace_all(string = tab7[ , 2], pattern = "[[:punct:]]", replacement = "")
tab7[ , 3] <- str_replace_all(string = tab7[ , 3], pattern = "[[:punct:]]", replacement = "")
tab7[ , 4] <- str_replace_all(string = tab7[ , 4], pattern = "[[:punct:]]", replacement = "")
tab7[ , 5] <- str_replace_all(string = tab7[ , 5], pattern = "[[:punct:]]", replacement = "")
tab7[ , 6] <- str_replace_all(string = tab7[ , 6], pattern = "[[:punct:]]", replacement = "")
tab7[5, 1] <- paste(tab7[5, 1], tab7[6, 1], sep = " ")
tab7[10, 1] <- paste(tab7[10, 1], tab7[11, 1], sep = " ")
tab7[16, 1] <- paste(tab7[16, 1], tab7[17, 1], sep = " ")
tab7 <- tab7[c(1:5, 7:10, 12:16), ]
tab7 <- paste(tab7[ , 1], tab7[ , 2], tab7[ , 3], tab7[ , 4], tab7[ , 5], tab7[ , 6], sep = " ")
x <- str_split_fixed(string = tab7, pattern = " ", n = 13)
labs <- NULL
df <- matrix(data = NA, nrow = nrow(x), ncol = 7)
for(i in 1:(nrow(x))) {
words <- x[i, ][str_detect(string = x[i, ], pattern = "^[a-zA-Z]+$")]
phrase <- ""
for(j in 1:length(words)){
phrase <- paste(phrase, words[j], sep = " ")
}
phrase <- str_remove(string = phrase, pattern = " ")
labs <- c(labs, phrase)
numbers <- x[i, ][str_detect(string = x[i, ],
pattern = "^(\\d{1}|\\d{2}|\\d{3}|\\d{4}|\\d{5}|\\d{6}|\\d{7}|\\d{8}|\\d{9}|\\d{10})$")]
#numbers <- as.numeric(numbers)
df[i, ] <- numbers
}
tab7 <- cbind(df[ , 1], labs, df[ , 2:7])
tab7 <- tab7[c(3, 5:12, 14), ]
departmentCode <- rep(200, nrow(tab7))
department <- rep(labs[1], nrow(tab7))
categoryCode <- c(21, rep(22, 8), 26)
category <- c(labs[2], rep(labs[4], 8), labs[13])
tab7 <- data.frame(departmentCode, department, categoryCode, category, tab7)
tab7$department <- str_to_title(string = tab7$department)
tab7$category <- str_to_title(string = tab7$category)
names(tab7) <- c("departmentCode", "department",
"categoryCode", "category",
"itemCode", "item",
"actual_2016_2017",
"budget_2017_2018", "outturn_2017_2018",
"budget_2018_2019", "projection_2019_2020",
"projection_2020_2021")
tab7[ , 5] <- as.numeric(as.character(tab7[ , 5]))
tab7[ , 6] <- as.character(tab7[ , 6])
tab7[ , 7] <- as.numeric(as.character(tab7[ , 7]))
tab7[ , 8] <- as.numeric(as.character(tab7[ , 8]))
tab7[ , 9] <- as.numeric(as.character(tab7[ , 9]))
tab7[ , 10] <- as.numeric(as.character(tab7[ , 10]))
tab7[ , 11] <- as.numeric(as.character(tab7[ , 11]))
tab7[ , 12] <- as.numeric(as.character(tab7[ , 12]))
################################################################################
mohHealthPreventive2018 <- tab7
usethis::use_data(mohHealthPreventive2018, overwrite = TRUE)
################################################################################
tab8 <- health2[[9]][10:30, ]
tab8[1, 1] <- paste(tab8[1, 1], tab8[2, 1], sep = " ")
tab8[7, 1] <- paste(tab8[7, 1], tab8[8, 1], sep = " ")
tab8[9, 1] <- paste(tab8[9, 1], tab8[10, 1], sep = " ")
tab8[11, 1] <- paste(tab8[11, 1], tab8[12, 1], sep = " ")
tab8[16, 1] <- paste(tab8[16, 1], tab8[17, 1], sep = " ")
tab8 <- tab8[c(1, 3:7, 9, 11, 13:16, 18:21), ]
tab8[ , 1] <- str_replace_all(string = tab8[ , 1], pattern = "-", replacement = " ")
tab8[ , 1] <- str_replace_all(string = tab8[ , 1], pattern = "[[:punct:]]", replacement = "")
tab8[ , 2] <- str_replace_all(string = tab8[ , 2], pattern = "[[:punct:]]", replacement = "")
tab8[ , 3] <- str_replace_all(string = tab8[ , 3], pattern = "[[:punct:]]", replacement = "")
tab8[ , 4] <- str_replace_all(string = tab8[ , 4], pattern = "[[:punct:]]", replacement = "")
tab8[ , 5] <- str_replace_all(string = tab8[ , 5], pattern = "[[:punct:]]", replacement = "")
tab8[ , 6] <- str_replace_all(string = tab8[ , 6], pattern = "[[:punct:]]", replacement = "")
tab8 <- paste(tab8[ , 1], tab8[ , 2], tab8[ , 3], tab8[ , 4], tab8[ , 5], tab8[ , 6], sep = " ")
x <- str_split_fixed(string = tab8, pattern = " ", n = 14)
labs <- NULL
df <- matrix(data = NA, nrow = nrow(x), ncol = 7)
for(i in 1:(nrow(x))) {
words <- x[i, ][str_detect(string = x[i, ], pattern = "^[a-zA-Z]+$")]
phrase <- ""
for(j in 1:length(words)){
phrase <- paste(phrase, words[j], sep = " ")
}
phrase <- str_remove(string = phrase, pattern = " ")
labs <- c(labs, phrase)
numbers <- x[i, ][str_detect(string = x[i, ],
pattern = "^(\\d{1}|\\d{2}|\\d{3}|\\d{4}|\\d{5}|\\d{6}|\\d{7}|\\d{8}|\\d{9}|\\d{10})$")]
numbers <- as.numeric(numbers)
df[i, ] <- numbers
}
tab8 <- cbind(df[ , 1], labs, df[ , 2:7])
tab8 <- tab8[c(3, 5:14, 16), ]
departmentCode <- rep(400, nrow(tab8))
department <- rep(labs[1], nrow(tab8))
categoryCode <- c(21, rep(22, 10), 26)
category <- c(labs[2], rep(labs[4], 10), labs[15])
tab8 <- data.frame(departmentCode, department, categoryCode, category, tab8)
tab8$department <- str_to_title(string = tab8$department)
tab8$category <- str_to_title(string = tab8$category)
names(tab8) <- c("departmentCode", "department",
"categoryCode", "category",
"itemCode", "item",
"actual_2016_2017",
"budget_2017_2018", "outturn_2017_2018",
"budget_2018_2019", "projection_2019_2020",
"projection_2020_2021")
tab8[ , 5] <- as.numeric(as.character(tab8[ , 5]))
tab8[ , 6] <- as.character(tab8[ , 6])
tab8[ , 7] <- as.numeric(as.character(tab8[ , 7]))
tab8[ , 8] <- as.numeric(as.character(tab8[ , 8]))
tab8[ , 9] <- as.numeric(as.character(tab8[ , 9]))
tab8[ , 10] <- as.numeric(as.character(tab8[ , 10]))
tab8[ , 11] <- as.numeric(as.character(tab8[ , 11]))
tab8[ , 12] <- as.numeric(as.character(tab8[ , 12]))
################################################################################
mohHealthPlanning2018 <- tab8
usethis::use_data(mohHealthPlanning2018, overwrite = TRUE)
################################################################################
tab9 <- health2[[10]][5:20, ]
tab9[7, 1] <- paste(tab9[7, 1], tab9[8, 1], sep = " ")
tab9[9, 1] <- paste(tab9[9, 1], tab9[10, 1], sep = " ")
tab9[13, 1] <- paste(tab9[13, 1], tab9[14, 1], sep = " ")
tab9 <- tab9[c(1:7, 9, 11:13, 15:16), ]
tab9[ , 1] <- str_replace_all(string = tab9[ , 1], pattern = " - ", replacement = " ")
tab9[ , 1] <- str_replace_all(string = tab9[ , 1], pattern = "-", replacement = " ")
tab9[ , 1] <- str_replace_all(string = tab9[ , 1], pattern = "–", replacement = " ")
tab9[ , 1] <- str_replace_all(string = tab9[ , 1], pattern = "[[:punct:]]", replacement = "")
tab9[ , 2] <- str_replace_all(string = tab9[ , 2], pattern = "[[:punct:]]", replacement = "")
tab9[ , 3] <- str_replace_all(string = tab9[ , 3], pattern = "[[:punct:]]", replacement = "")
tab9[ , 4] <- str_replace_all(string = tab9[ , 4], pattern = "[[:punct:]]", replacement = "")
tab9[ , 5] <- str_replace_all(string = tab9[ , 5], pattern = "[[:punct:]]", replacement = "")
tab9[ , 6] <- str_replace_all(string = tab9[ , 6], pattern = "[[:punct:]]", replacement = "")
tab9 <- paste(tab9[ , 1], tab9[ , 2], tab9[ , 3], tab9[ , 4], tab9[ , 5], tab9[ , 6], sep = " ")
x <- str_split_fixed(string = tab9, pattern = " ", n = 13)
labs <- NULL
df <- matrix(data = NA, nrow = nrow(x), ncol = 7)
for(i in 1:(nrow(x))) {
words <- x[i, ][str_detect(string = x[i, ], pattern = "^[a-zA-Z]+$")]
phrase <- ""
for(j in 1:length(words)){
phrase <- paste(phrase, words[j], sep = " ")
}
phrase <- str_remove(string = phrase, pattern = " ")
labs <- c(labs, phrase)
numbers <- x[i, ][str_detect(string = x[i, ],
pattern = "^(\\d{1}|\\d{2}|\\d{3}|\\d{4}|\\d{5}|\\d{6}|\\d{7}|\\d{8}|\\d{9}|\\d{10})$")]
numbers <- as.numeric(numbers)
df[i, ] <- numbers
}
tab9 <- cbind(df[ , 1], labs, df[ , 2:7])
tab9 <- tab9[c(3:4, 6:13), ]
departmentCode <- rep(500, nrow(tab9))
department <- rep(labs[1], nrow(tab9))
categoryCode <- c(rep(21, 2), rep(22, 8))
category <- c(rep(labs[2], 2), rep(labs[5], 8))
tab9 <- data.frame(departmentCode, department, categoryCode, category, tab9)
tab9$department <- str_to_title(string = tab9$department)
tab9$category <- str_to_title(string = tab9$category)
names(tab9) <- c("departmentCode", "department",
"categoryCode", "category",
"itemCode", "item",
"actual_2016_2017",
"budget_2017_2018", "outturn_2017_2018",
"budget_2018_2019", "projection_2019_2020",
"projection_2020_2021")
tab9[ , 5] <- as.numeric(as.character(tab9[ , 5]))
tab9[ , 6] <- as.character(tab9[ , 6])
tab9[ , 7] <- as.numeric(as.character(tab9[ , 7]))
tab9[ , 8] <- as.numeric(as.character(tab9[ , 8]))
tab9[ , 9] <- as.numeric(as.character(tab9[ , 9]))
tab9[ , 10] <- as.numeric(as.character(tab9[ , 10]))
tab9[ , 11] <- as.numeric(as.character(tab9[ , 11]))
tab9[ , 12] <- as.numeric(as.character(tab9[ , 12]))
################################################################################
mohHealthVital2018 <- tab9
usethis::use_data(mohHealthVital2018, overwrite = TRUE)
################################################################################
tab10 <- rbind(health2[[10]][32:48, ], health2[[11]][5:17, ])
tab10[1, 1] <- paste(tab10[1, 1], tab10[2, 1], sep = " ")
tab10[9, 1] <- pas################################################################################
#
# Load required libraries
#
################################################################################
library(pdftools)
library(tm)
library(tabulizer)
library(stringr)
library(tidyverse)
library(tidytext)
options(scipen = 999)
################################################################################
#
# Function to process tables
#
################################################################################
get_table <- function(tab, nrow = NULL, ncol = NULL) {
labs <- NULL
df <- matrix(data = NA, nrow = nrow, ncol = ncol)
for(i in 1:(length(tab))) {
words <- tab[[i]][str_detect(string = tab[[i]], pattern = "^[a-zA-Z]+$")]
phrase <- ""
for(j in 1:length(words)){
phrase <- paste(phrase, words[j], sep = " ")
}
phrase <- str_remove(string = phrase, pattern = " ")
labs <- c(labs, phrase)
numbers <- tab[[i]][str_detect(string = tab[[i]],
pattern = "^(\\d{6}|\\d{7}|\\d{8})$")]
numbers <- as.numeric(numbers)
df[i, ] <- numbers
}
}
################################################################################
#
# Create list for information and tables in page 253 (2018)
#
################################################################################
## Extract tables from page 253
health1 <- extract_tables(file = "data-raw/budget/2018 Ministry of Finance and Development Planning.pdf",
pages = 253,
method = "decide")
#goal <- health1[[1]][3, ]
#strategicObjective <- str_c(health1[[1]][5, ], health1[[1]][6, ], health1[[1]][7, ], sep = " ")
## Extract first table in page 253
tab1 <- health1[[1]][27:31, ]
tab1 <- paste(tab1[ , 1], tab1[ , 2], sep = " ")
tab1 <- str_replace_all(string = tab1, pattern = "[[:punct:]]", replace = "")
tab1 <- str_split_fixed(string = tab1, pattern = " ", n = 12)
## Extract relevant fields to re-create first table in page 255 and create data.frame
economic_classification <- NULL
df <- matrix(data = NA, nrow = 5, ncol = 7)
for(i in 1:(nrow(tab1))) {
words <- tab1[i, ][str_detect(string = tab1[i, ], pattern = "^[a-zA-Z]+$")]
phrase <- ""
for(j in 1:length(words)){
phrase <- paste(phrase, words[j], sep = " ")
}
phrase <- str_remove(string = phrase, pattern = " ")
economic_classification <- c(economic_classification, phrase)
numbers <- tab1[i, ][str_detect(string = tab1[i,],
pattern = "^(\\d{1}|\\d{2}|\\d{3}|\\d{4}|\\d{5}|\\d{6}|\\d{7}|\\d{8}|\\d{9}|\\d{10})$")]
numbers <- as.numeric(numbers)
df[i, ] <- numbers
}
df <- data.frame(df[ , 1], economic_classification, df[ , 2:7])
names(df) <- c("code", "economic_classification",
"actual_2016_2017",
"budget_2017_2018", "outturn_2017_2018",
"budget_2018_2019", "projection_2019_2020",
"projection_2020_2021")
df$economic_classification <- str_to_sentence(df$economic_classification)
summaryEconHealth2018 <- df
## Extract second table in page 255
tab2 <- health1[[1]][37:46, ]
tab2 <- paste(tab2[ , 1], tab2[ , 2], sep = " ")
tab2 <- str_replace_all(string = tab2, pattern = "[[:punct:]]", replace = "")
tab2 <- str_split_fixed(string = tab2, pattern = " ", n = 12)
## Extract relevant fields to re-create first table in page 255 and create data.frame
spending_entity <- NULL
df <- matrix(data = NA, nrow = 10, ncol = 7)
for(i in 1:nrow(tab2)) {
words <- tab2[i, ][str_detect(string = tab2[i, ], pattern = "^[a-zA-Z]+$")]
phrase <- ""
for(j in 1:length(words)){
phrase <- paste(phrase, words[j], sep = " ")
}
phrase <- str_remove(string = phrase, pattern = " ")
spending_entity <- c(spending_entity, phrase)
numbers <- tab2[i, ][str_detect(string = tab2[i, ],
pattern = "^(\\d{1}|\\d{2}|\\d{3}|\\d{4}|\\d{5}|\\d{6}|\\d{7}|\\d{8}|\\d{9}|\\d{10})$")]
numbers <- as.numeric(numbers)
df[i, ] <- numbers
}
df <- data.frame(df[ , 1], spending_entity, df[ , 2:7])
names(df) <- c("code", "spending_entity",
"actual_2016_2017",
"budget_2017_2018", "outturn_2017_2018",
"budget_2018_2019", "projection_2019_2020",
"projection_2020_2021")
df$spending_entity <- str_to_title(df$spending_entity)
summarySpendingHealth2018 <- df
summaryHealth2018 <- list(summaryEconHealth2018, summarySpendingHealth2018)
names(summaryHealth2018) <- c("summaryEconHealth2018", "summarySpendingHealth2018")
usethis::use_data(summaryHealth2018, overwrite = TRUE)
## Tidy-up
rm(words, phrase, numbers, economic_classification, tab1, tab2, df,
summaryEconHealth2018, summarySpendingHealth2018, health1)
################################################################################
#
#
#
################################################################################
## Extract tables from page 25-266
health2 <- extract_tables(file = "data-raw/budget/2018 Ministry of Finance and Development Planning.pdf",
pages = 254:266,
method = "decide")
tab1 <- health2[[1]][14:48, ]
tab1[ , 1] <- str_replace_all(string = tab1[ , 1], pattern = " – ", replacement = " ")
tab1[ , 1] <- str_replace_all(string = tab1[ , 1], pattern = "–", replacement = " ")
tab1[ , 1] <- str_replace_all(string = tab1[ , 1], pattern = "-", replacement = " ")
tab1[ , 1] <- str_replace_all(string = tab1[ , 1], pattern = "[[:punct:]]", replacement = "")
tab1[ , 2] <- str_replace_all(string = tab1[ , 2], pattern = "[[:punct:]]", replacement = "")
tab1[ , 3] <- str_replace_all(string = tab1[ , 3], pattern = "[[:punct:]]", replacement = "")
tab1[ , 4] <- str_replace_all(string = tab1[ , 4], pattern = "[[:punct:]]", replacement = "")
tab1[ , 5] <- str_replace_all(string = tab1[ , 5], pattern = "[[:punct:]]", replacement = "")
tab1[ , 6] <- str_replace_all(string = tab1[ , 6], pattern = "[[:punct:]]", replacement = "")
tab1[7, 1] <- paste(tab1[7, 1], tab1[8, 1], sep = " ")
tab1[11, 1] <- paste(tab1[11, 1], tab1[12, 1], sep = " ")
tab1 <- tab1[c(1:7, 9:11, 13:35), ]
y <- str_split_fixed(string = tab1[ , 6], pattern = " ", n = 2)
tab1 <- cbind(tab1[ , 1:5], y)
x <- str_split_fixed(string = tab1[ , 1], pattern = " ", n = 8)
labs <- NULL
df <- matrix(data = NA, nrow = nrow(x), ncol = 2)
for(i in 1:(nrow(x))) {
words <- x[i, ][str_detect(string = x[i, ], pattern = "^[a-zA-Z]+$")]
phrase <- ""
for(j in 1:length(words)){
phrase <- paste(phrase, words[j], sep = " ")
}
phrase <- str_remove(string = phrase, pattern = " ")
labs <- c(labs, phrase)
numbers <- x[i, ][str_detect(string = x[i, ],
pattern = "^(\\d{1}|\\d{2}|\\d{3}|\\d{4}|\\d{5}|\\d{6}|\\d{7}|\\d{8}|\\d{9}|\\d{10})$")]
numbers <- as.numeric(numbers)
df[i, ] <- numbers
}
tab1 <- data.frame(df[ , 1], labs, df[ , 2], tab1[ , 3:7])
tab1 <- tab1[c(2:4, 6:26, 28:33), ]
categoryCode <- c(rep(21, 3), rep(22, 21), rep(25, 6))
category <- c(rep(labs[1], 3), rep(labs[5], 21), rep(labs[27], 6))
tab1 <- data.frame(categoryCode, category, tab1)
tab1$category <- str_to_title(tab1$category)
names(tab1) <- c("categoryCode", "category",
"itemCode", "item",
"actual_2016_2017",
"budget_2017_2018", "outturn_2017_2018",
"budget_2018_2019", "projection_2019_2020",
"projection_2020_2021")
tab1[ , 4] <- as.character(tab1[ , 4])
tab1[ , 5] <- as.numeric(as.character(tab1[ , 5]))
tab1[ , 6] <- as.numeric(as.character(tab1[ , 6]))
tab1[ , 7] <- as.numeric(as.character(tab1[ , 7]))
tab1[ , 8] <- as.numeric(as.character(tab1[ , 8]))
tab1[ , 9] <- as.numeric(as.character(tab1[ , 9]))
tab1[ , 10] <- as.numeric(as.character(tab1[ , 10]))
################################################################################
tab2 <- health2[[2]][6:48, ]
tab2[ , 1] <- str_replace_all(string = tab2[ , 1], pattern = " – ", replacement = " ")
tab2[ , 1] <- str_replace_all(string = tab2[ , 1], pattern = "–", replacement = " ")
tab2[ , 1] <- str_replace_all(string = tab2[ , 1], pattern = "-", replacement = " ")
tab2[ , 1] <- str_replace_all(string = tab2[ , 1], pattern = "[[:punct:]]", replacement = "")
tab2[ , 2] <- str_replace_all(string = tab2[ , 2], pattern = "[[:punct:]]", replacement = "")
tab2[ , 3] <- str_replace_all(string = tab2[ , 3], pattern = "[[:punct:]]", replacement = "")
tab2[ , 4] <- str_replace_all(string = tab2[ , 4], pattern = "[[:punct:]]", replacement = "")
tab2[ , 5] <- str_replace_all(string = tab2[ , 5], pattern = "[[:punct:]]", replacement = "")
tab2[ , 6] <- str_replace_all(string = tab2[ , 6], pattern = "[[:punct:]]", replacement = "")
tab2[9, 1] <- paste(tab2[9, 1], tab2[10, 1], sep = " ")
tab2[15, 1] <- paste(tab2[15, 1], tab2[16, 1], sep = " ")
tab2[31, 1] <- paste(tab2[31, 1], tab2[32, 1], sep = " ")
tab2[36, 1] <- paste(tab2[36, 1], tab2[37, 1], sep = " ")
tab2 <- tab2[c(1:9, 11:15, 17:31, 33:36, 38:43), ]
y <- str_split_fixed(string = tab2[ , 6], pattern = " ", n = 2)
tab2 <- cbind(tab2[ , 1:5], y)
x <- str_split_fixed(string = tab2[ , 1], pattern = " ", n = 8)
labs <- NULL
df <- matrix(data = NA, nrow = nrow(x), ncol = 2)
for(i in 1:(nrow(x))) {
words <- x[i, ][str_detect(string = x[i, ], pattern = "^[a-zA-Z]+$")]
phrase <- ""
for(j in 1:length(words)){
phrase <- paste(phrase, words[j], sep = " ")
}
phrase <- str_remove(string = phrase, pattern = " ")
labs <- c(labs, phrase)
numbers <- x[i, ][str_detect(string = x[i, ],
pattern = "^(\\d{1}|\\d{2}|\\d{3}|\\d{4}|\\d{5}|\\d{6}|\\d{7}|\\d{8}|\\d{9}|\\d{10})$")]
numbers <- as.numeric(numbers)
df[i, ] <- numbers
}
tab2 <- data.frame(df[ , 1], labs, df[ , 2], tab2[ , 3:7])
tab2 <- tab2[c(1:7, 9:39), ]
categoryCode <- c(rep(25, 7), rep(26, 31))
category <- c(rep("Subsidy", 7), rep(labs[8], 31))
tab2 <- data.frame(categoryCode, category, tab2)
tab2$category <- str_to_title(tab2$category)
names(tab2) <- c("categoryCode", "category",
"itemCode", "item",
"actual_2016_2017",
"budget_2017_2018", "outturn_2017_2018",
"budget_2018_2019", "projection_2019_2020",
"projection_2020_2021")
tab2[ , 4] <- as.character(tab2[ , 4])
tab2[ , 5] <- as.numeric(as.character(tab2[ , 5]))
tab2[ , 6] <- as.numeric(as.character(tab2[ , 6]))
tab2[ , 7] <- as.numeric(as.character(tab2[ , 7]))
tab2[ , 8] <- as.numeric(as.character(tab2[ , 8]))
tab2[ , 9] <- as.numeric(as.character(tab2[ , 9]))
tab2[ , 10] <- as.numeric(as.character(tab2[ , 10]))
################################################################################
tab3 <- health2[[3]][6:46, ]
tab3[ , 1] <- str_replace_all(string = tab3[ , 1], pattern = " – ", replacement = " ")
tab3[ , 1] <- str_replace_all(string = tab3[ , 1], pattern = "–", replacement = " ")
tab3[ , 1] <- str_replace_all(string = tab3[ , 1], pattern = "-", replacement = " ")
tab3[ , 1] <- str_replace_all(string = tab3[ , 1], pattern = "[[:punct:]]", replacement = "")
tab3[ , 2] <- str_replace_all(string = tab3[ , 2], pattern = "[[:punct:]]", replacement = "")
tab3[ , 3] <- str_replace_all(string = tab3[ , 3], pattern = "[[:punct:]]", replacement = "")
tab3[ , 4] <- str_replace_all(string = tab3[ , 4], pattern = "[[:punct:]]", replacement = "")
tab3[ , 5] <- str_replace_all(string = tab3[ , 5], pattern = "[[:punct:]]", replacement = "")
tab3[ , 6] <- str_replace_all(string = tab3[ , 6], pattern = "[[:punct:]]", replacement = "")
tab3[1, 1] <- paste(tab3[1, 1], tab3[2, 1], sep = " ")
tab3[14, 1] <- paste(tab3[14, 1], tab3[15, 1], sep = " ")
tab3[23, 1] <- paste(tab3[23, 1], tab3[24, 1], sep = " ")
tab3[41, 1] <- paste(tab3[41, 1], "County", sep = " ")
tab3 <- tab3[c(1, 3:14, 16:23, 25:41), ]
y <- str_split_fixed(string = tab3[ , 6], pattern = " ", n = 2)
tab3 <- cbind(tab3[ , 1:5], y)
x <- str_split_fixed(string = tab3[ , 1], pattern = " ", n = 8)
labs <- NULL
df <- matrix(data = NA, nrow = nrow(x), ncol = 2)
for(i in 1:(nrow(x))) {
words <- x[i, ][str_detect(string = x[i, ], pattern = "^[a-zA-Z]+$")]
phrase <- ""
for(j in 1:length(words)){
phrase <- paste(phrase, words[j], sep = " ")
}
phrase <- str_remove(string = phrase, pattern = " ")
labs <- c(labs, phrase)
numbers <- x[i, ][str_detect(string = x[i, ],
pattern = "^(\\d{1}|\\d{2}|\\d{3}|\\d{4}|\\d{5}|\\d{6}|\\d{7}|\\d{8}|\\d{9}|\\d{10})$")]
numbers <- as.numeric(numbers)
df[i, ] <- numbers
}
tab3 <- data.frame(df[ , 1], labs, df[ , 2], tab3[ , 3:7])
categoryCode <- rep(26, nrow(tab3))
category <- rep("Grants", nrow(tab3))
tab3 <- data.frame(categoryCode, category, tab3)
tab3$category <- str_to_title(tab3$category)
names(tab3) <- c("categoryCode", "category",
"itemCode", "item",
"actual_2016_2017",
"budget_2017_2018", "outturn_2017_2018",
"budget_2018_2019", "projection_2019_2020",
"projection_2020_2021")
tab3[ , 4] <- as.character(tab3[ , 4])
tab3[ , 5] <- as.numeric(as.character(tab3[ , 5]))
tab3[ , 6] <- as.numeric(as.character(tab3[ , 6]))
tab3[ , 7] <- as.numeric(as.character(tab3[ , 7]))
tab3[ , 8] <- as.numeric(as.character(tab3[ , 8]))
tab3[ , 9] <- as.numeric(as.character(tab3[ , 9]))
tab3[ , 10] <- as.numeric(as.character(tab3[ , 10]))
################################################################################
tab4 <- health2[[4]][6:24, ]
tab4[1, 1] <- paste(tab4[1, 1], tab4[2, 1], sep = " ")
tab4 <- tab4[c(1, 3:19), ]
tab4[ , 1] <- str_replace_all(string = tab4[ , 1], pattern = " – ", replacement = " ")
tab4[ , 1] <- str_replace_all(string = tab4[ , 1], pattern = "–", replacement = " ")
tab4[ , 1] <- str_replace_all(string = tab4[ , 1], pattern = "-", replacement = " ")
tab4[ , 1] <- str_replace_all(string = tab4[ , 1], pattern = "[[:punct:]]", replacement = "")
tab4[ , 2] <- str_replace_all(string = tab4[ , 2], pattern = "[[:punct:]]", replacement = "")
tab4[ , 3] <- str_replace_all(string = tab4[ , 3], pattern = "[[:punct:]]", replacement = "")
tab4[ , 4] <- str_replace_all(string = tab4[ , 4], pattern = "[[:punct:]]", replacement = "")
tab4[ , 5] <- str_replace_all(string = tab4[ , 5], pattern = "[[:punct:]]", replacement = "")
tab4[ , 6] <- str_replace_all(string = tab4[ , 6], pattern = "[[:punct:]]", replacement = "")
y <- str_split_fixed(string = tab4[ , 6], pattern = " ", n = 2)
tab4 <- cbind(tab4[ , 1:5], y)
x <- str_split_fixed(string = tab4[ , 1], pattern = " ", n = 8)
labs <- NULL
df <- matrix(data = NA, nrow = nrow(x), ncol = 2)
for(i in 1:(nrow(x))) {
words <- x[i, ][str_detect(string = x[i, ], pattern = "^[a-zA-Z]+$")]
phrase <- ""
for(j in 1:length(words)){
phrase <- paste(phrase, words[j], sep = " ")
}
phrase <- str_remove(string = phrase, pattern = " ")
labs <- c(labs, phrase)
numbers <- x[i, ][str_detect(string = x[i, ],
pattern = "^(\\d{1}|\\d{2}|\\d{3}|\\d{4}|\\d{5}|\\d{6}|\\d{7}|\\d{8}|\\d{9}|\\d{10})$")]
numbers <- as.numeric(numbers)
df[i, ] <- numbers
}
tab4 <- data.frame(df[ , 1], labs, df[ , 2], tab4[ , 3:7])
tab4 <- tab4[c(1:16, 18), ]
categoryCode <- c(rep(26, 16), 31)
category <- c(rep("Grants", 16), labs[17])
tab4 <- data.frame(categoryCode, category, tab4)
tab4$category <- str_to_title(tab4$category)
names(tab4) <- c("categoryCode", "category",
"itemCode", "item",
"actual_2016_2017",
"budget_2017_2018", "outturn_2017_2018",
"budget_2018_2019", "projection_2019_2020",
"projection_2020_2021")
tab4[ , 4] <- as.character(tab4[ , 4])
tab4[ , 5] <- as.numeric(as.character(tab4[ , 5]))
tab4[ , 6] <- as.numeric(as.character(tab4[ , 6]))
tab4[ , 7] <- as.numeric(as.character(tab4[ , 7]))
tab4[ , 8] <- as.numeric(as.character(tab4[ , 8]))
tab4[ , 9] <- as.numeric(as.character(tab4[ , 9]))
tab4[ , 10] <- as.numeric(as.character(tab4[ , 10]))
################################################################################
mohHealthEcon2018 <- data.frame(rbind(tab1, tab2, tab3, tab4))
usethis::use_data(mohHealthEcon2018, overwrite = TRUE)
################################################################################
tab5 <- health2[[4]][30:44, ]
tab5[ , 1] <- str_replace_all(string = tab5[ , 1], pattern = "-", replacement = " ")
tab5[ , 1] <- str_replace_all(string = tab5[ , 1], pattern = "[[:punct:]]", replacement = "")
tab5[ , 2] <- str_replace_all(string = tab5[ , 2], pattern = "[[:punct:]]", replacement = "")
tab5[ , 3] <- str_replace_all(string = tab5[ , 3], pattern = "[[:punct:]]", replacement = "")
tab5[ , 4] <- str_replace_all(string = tab5[ , 4], pattern = "[[:punct:]]", replacement = "")
tab5[ , 5] <- str_replace_all(string = tab5[ , 5], pattern = "[[:punct:]]", replacement = "")
tab5[ , 6] <- str_replace_all(string = tab5[ , 6], pattern = "[[:punct:]]", replacement = "")
x <- str_split_fixed(string = tab5[ , 1], pattern = " ", n = 5)
y <- str_split_fixed(string = tab5[ , 6], pattern = " ", n = 2)
tab5 <- cbind(tab5[ , 1:5], y)
labs <- NULL
df <- matrix(data = NA, nrow = nrow(x), ncol = 2)
for(i in 1:(nrow(x))) {
words <- x[i, ][str_detect(string = x[i, ], pattern = "^[a-zA-Z]+$")]
phrase <- ""
for(j in 1:length(words)){
phrase <- paste(phrase, words[j], sep = " ")
}
phrase <- str_remove(string = phrase, pattern = " ")
labs <- c(labs, phrase)
numbers <- x[i, ][str_detect(string = x[i, ],
pattern = "^(\\d{1}|\\d{2}|\\d{3}|\\d{4}|\\d{5}|\\d{6}|\\d{7}|\\d{8}|\\d{9}|\\d{10})$")]
numbers <- as.numeric(numbers)
df[i, ] <- numbers
}
tab5 <- data.frame(df[ , 1], labs, df[ , 2], tab5[ , 3:7])
tab5$labs <- str_to_title(tab5$labs)
names(tab5) <- c("countyCode", "county",
"actual_2016_2017",
"budget_2017_2018", "outturn_2017_2018",
"budget_2018_2019", "projection_2019_2020",
"projection_2020_2021")
tab5[ , 4] <- as.numeric(as.character(tab5[ , 4]))
tab5[ , 5] <- as.numeric(as.character(tab5[ , 5]))
tab5[ , 6] <- as.numeric(as.character(tab5[ , 6]))
tab5[ , 7] <- as.numeric(as.character(tab5[ , 7]))
tab5[ , 8] <- as.numeric(as.character(tab5[ , 8]))
################################################################################
mohHealthCounty2018 <- tab5
usethis::use_data(mohHealthCounty2018, overwrite = TRUE)
################################################################################
tab6 <- health2[[5]][14:48, ]
tab6[ , 1] <- str_replace_all(string = tab6[ , 1], pattern = " - ", replacement = " ")
tab6[ , 1] <- str_replace_all(string = tab6[ , 1], pattern = "-", replacement = " ")
tab6[ , 1] <- str_replace_all(string = tab6[ , 1], pattern = "[[:punct:]]", replacement = "")
tab6[ , 2] <- str_replace_all(string = tab6[ , 2], pattern = "[[:punct:]]", replacement = "")
tab6[ , 3] <- str_replace_all(string = tab6[ , 3], pattern = "[[:punct:]]", replacement = "")
tab6[ , 4] <- str_replace_all(string = tab6[ , 4], pattern = "[[:punct:]]", replacement = "")
tab6[7, 1] <- paste(tab6[7, 1], tab6[8, 1], sep = " ")
tab6[9, 1] <- paste(tab6[9, 1], tab6[10, 1], sep = " ")
tab6[11, 1] <- paste(tab6[11, 1], tab6[12, 1], sep = " ")
tab6[16, 1] <- paste(tab6[16, 1], tab6[17, 1], sep = " ")
tab6[23, 1] <- paste(tab6[23, 1], tab6[24, 1], sep = " ")
tab6[28, 1] <- paste(tab6[28, 1], tab6[29, 1], sep = " ")
tab6[31, 1] <- paste(tab6[31, 1], tab6[32, 1], sep = " ")
tab6 <- tab6[c(1:7, 9, 11, 13:16, 18:23, 25:28, 30:31, 33:35), ]
tab6 <- paste(tab6[ , 1], tab6[ , 2], tab6[ , 3], tab6[ , 4], sep = " ")
x <- str_split_fixed(string = tab6, pattern = " ", n = 13)
labs <- NULL
df <- matrix(data = NA, nrow = nrow(x), ncol = 7)
for(i in 1:(nrow(x))) {
words <- x[i, ][str_detect(string = x[i, ], pattern = "^[a-zA-Z]+$")]
phrase <- ""
for(j in 1:length(words)){
phrase <- paste(phrase, words[j], sep = " ")
}
phrase <- str_remove(string = phrase, pattern = " ")
labs <- c(labs, phrase)
numbers <- x[i, ][str_detect(string = x[i, ],
pattern = "^(\\d{1}|\\d{2}|\\d{3}|\\d{4}|\\d{5}|\\d{6}|\\d{7}|\\d{8}|\\d{9}|\\d{10})$")]
numbers <- as.numeric(numbers)
df[i, ] <- numbers
}
tab6a <- cbind(df[ , 1], labs, df[ , 2:7])
tab6 <- health2[[6]][5:50, ]
tab6[ , 1] <- str_replace_all(string = tab6[ , 1], pattern = " - ", replacement = " ")
tab6[ , 1] <- str_replace_all(string = tab6[ , 1], pattern = "-", replacement = " ")
tab6[ , 1] <- str_replace_all(string = tab6[ , 1], pattern = "[[:punct:]]", replacement = "")
tab6[ , 2] <- str_replace_all(string = tab6[ , 2], pattern = "[[:punct:]]", replacement = "")
tab6[ , 3] <- str_replace_all(string = tab6[ , 3], pattern = "[[:punct:]]", replacement = "")
tab6[ , 4] <- str_replace_all(string = tab6[ , 4], pattern = "[[:punct:]]", replacement = "")
tab6[ , 5] <- str_replace_all(string = tab6[ , 5], pattern = "[[:punct:]]", replacement = "")
tab6[ , 6] <- str_replace_all(string = tab6[ , 6], pattern = "[[:punct:]]", replacement = "")
tab6[4, 1] <- paste(tab6[4, 1], tab6[5, 1], sep = " ")
tab6[11, 1] <- paste(tab6[11, 1], tab6[12, 1], sep = " ")
tab6[14, 1] <- paste(tab6[14, 1], tab6[15, 1], sep = " ")
tab6[19, 1] <- paste(tab6[19, 1], tab6[20, 1], sep = " ")
tab6[22, 1] <- paste(tab6[22, 1], tab6[23, 1], sep = " ")
tab6[24, 1] <- paste(tab6[24, 1], tab6[25, 1], sep = " ")
tab6[27, 1] <- paste(tab6[27, 1], tab6[28, 1], sep = " ")
tab6[33, 1] <- paste(tab6[33, 1], tab6[34, 1], sep = " ")
tab6[35, 1] <- paste(tab6[35, 1], tab6[36, 1], sep = " ")
tab6[37, 1] <- paste(tab6[37, 1], tab6[38, 1], sep = " ")
tab6[40, 1] <- paste(tab6[40, 1], tab6[41, 1], sep = " ")
tab6[44, 1] <- paste(tab6[44, 1], tab6[45, 1], sep = " ")
tab6 <- tab6[c(1:4, 6:11, 13:14, 16:19, 21:22, 24, 26:27, 29:33, 35, 37, 39:40, 42:44, 46), ]
tab6 <- paste(tab6[ , 1], tab6[ , 2], tab6[ , 3], tab6[ , 4], tab6[ , 5], tab6[ , 6], sep = " ")
x <- str_split_fixed(string = tab6, pattern = " ", n = 13)
labs <- NULL
df <- matrix(data = NA, nrow = nrow(x), ncol = 7)
for(i in 1:(nrow(x))) {
words <- x[i, ][str_detect(string = x[i, ], pattern = "^[a-zA-Z]+$")]
phrase <- ""
for(j in 1:length(words)){
phrase <- paste(phrase, words[j], sep = " ")
}
phrase <- str_remove(string = phrase, pattern = " ")
labs <- c(labs, phrase)
numbers <- x[i, ][str_detect(string = x[i, ],
pattern = "^(\\d{1}|\\d{2}|\\d{3}|\\d{4}|\\d{5}|\\d{6}|\\d{7}|\\d{8}|\\d{9}|\\d{10})$")]
numbers <- as.numeric(numbers)
df[i, ] <- numbers
}
tab6b <- cbind(df[ , 1], labs, df[ , 2:7])
tab6 <- health2[[7]][5:48, ]
tab6[ , 1] <- str_replace_all(string = tab6[ , 1], pattern = " - ", replacement = " ")
tab6[ , 1] <- str_replace_all(string = tab6[ , 1], pattern = "-", replacement = " ")
tab6[ , 1] <- str_replace_all(string = tab6[ , 1], pattern = "[[:punct:]]", replacement = "")
tab6[ , 2] <- str_replace_all(string = tab6[ , 2], pattern = "[[:punct:]]", replacement = "")
tab6[ , 3] <- str_replace_all(string = tab6[ , 3], pattern = "[[:punct:]]", replacement = "")
tab6[ , 4] <- str_replace_all(string = tab6[ , 4], pattern = "[[:punct:]]", replacement = "")
tab6[ , 5] <- str_replace_all(string = tab6[ , 5], pattern = "[[:punct:]]", replacement = "")
tab6[ , 6] <- str_replace_all(string = tab6[ , 6], pattern = "[[:punct:]]", replacement = "")
tab6[1, 1] <- paste(tab6[1, 1], tab6[2, 1], sep = " ")
tab6[3, 1] <- paste(tab6[3, 1], tab6[4, 1], sep = " ")
tab6[6, 1] <- paste(tab6[6, 1], tab6[7, 1], sep = " ")
tab6[12, 1] <- paste(tab6[12, 1], tab6[13, 1], sep = " ")
tab6[16, 1] <- paste(tab6[16, 1], tab6[17, 1], sep = " ")
tab6[19, 1] <- paste(tab6[19, 1], tab6[20, 1], sep = " ")
tab6[28, 1] <- paste(tab6[28, 1], tab6[29, 1], sep = " ")
tab6[30, 1] <- paste(tab6[30, 1], tab6[31, 1], sep = " ")
tab6[36, 1] <- paste(tab6[36, 1], tab6[37, 1], sep = " ")
tab6 <- tab6[c(1, 3, 5:6, 8:12, 14:16, 18:19, 21:28, 30, 32:36, 38:44), ]
tab6 <- paste(tab6[ , 1], tab6[ , 2], tab6[ , 3], tab6[ , 4], tab6[ , 5], tab6[ , 6], sep = " ")
x <- str_split_fixed(string = tab6, pattern = " ", n = 14)
labs <- NULL
df <- matrix(data = NA, nrow = nrow(x), ncol = 7)
for(i in 1:(nrow(x))) {
words <- x[i, ][str_detect(string = x[i, ], pattern = "^[a-zA-Z]+$")]
phrase <- ""
for(j in 1:length(words)){
phrase <- paste(phrase, words[j], sep = " ")
}
phrase <- str_remove(string = phrase, pattern = " ")
labs <- c(labs, phrase)
numbers <- x[i, ][str_detect(string = x[i, ],
pattern = "^(\\d{1}|\\d{2}|\\d{3}|\\d{4}|\\d{5}|\\d{6}|\\d{7}|\\d{8}|\\d{9}|\\d{10})$")]
numbers <- as.numeric(numbers)
df[i, ] <- numbers
}
tab6c <- cbind(df[ , 1], labs, df[ , 2:7])
tab6 <- rbind(tab6a, tab6b, tab6c)
tab6 <- tab6[c(3:4, 6:16, 18:30, 32:97), ]
departmentCode <- rep(100, nrow(tab6))
department <- rep("Curative Servives", nrow(tab6))
categoryCode <- c(rep(21, 2), rep(22, 11), rep(25, 13), rep(26, 66))
category <- c(rep("Compensation Of Employees", 2),
rep("Use Of Goods And Services", 11),
rep("Subsidy", 13),
rep("Grants", 66))
tab6 <- data.frame(departmentCode, department, categoryCode, category, tab6)
tab6$department <- str_to_title(string = tab6$department)
tab6$category <- str_to_title(string = tab6$category)
names(tab6) <- c("departmentCode", "department",
"categoryCode", "category",
"itemCode", "item",
"actual_2016_2017",
"budget_2017_2018", "outturn_2017_2018",
"budget_2018_2019", "projection_2019_2020",
"projection_2020_2021")
tab6[ , 5] <- as.numeric(as.character(tab6[ , 5]))
tab6[ , 6] <- as.character(tab6[ , 6])
tab6[ , 7] <- as.numeric(as.character(tab6[ , 7]))
tab6[ , 8] <- as.numeric(as.character(tab6[ , 8]))
tab6[ , 9] <- as.numeric(as.character(tab6[ , 9]))
tab6[ , 10] <- as.numeric(as.character(tab6[ , 10]))
tab6[ , 11] <- as.numeric(as.character(tab6[ , 11]))
tab6[ , 12] <- as.numeric(as.character(tab6[ , 12]))
################################################################################
mohHealthCurative2018 <- tab6
usethis::use_data(mohHealthCurative2018, overwrite = TRUE)
################################################################################
tab7 <- health2[[8]][12:28, ]
tab7[ , 1] <- str_replace_all(string = tab7[ , 1], pattern = " - ", replacement = " ")
tab7[ , 1] <- str_replace_all(string = tab7[ , 1], pattern = "-", replacement = " ")
tab7[ , 1] <- str_replace_all(string = tab7[ , 1], pattern = "[[:punct:]]", replacement = "")
tab7[ , 2] <- str_replace_all(string = tab7[ , 2], pattern = "[[:punct:]]", replacement = "")
tab7[ , 3] <- str_replace_all(string = tab7[ , 3], pattern = "[[:punct:]]", replacement = "")
tab7[ , 4] <- str_replace_all(string = tab7[ , 4], pattern = "[[:punct:]]", replacement = "")
tab7[ , 5] <- str_replace_all(string = tab7[ , 5], pattern = "[[:punct:]]", replacement = "")
tab7[ , 6] <- str_replace_all(string = tab7[ , 6], pattern = "[[:punct:]]", replacement = "")
tab7[5, 1] <- paste(tab7[5, 1], tab7[6, 1], sep = " ")
tab7[10, 1] <- paste(tab7[10, 1], tab7[11, 1], sep = " ")
tab7[16, 1] <- paste(tab7[16, 1], tab7[17, 1], sep = " ")
tab7 <- tab7[c(1:5, 7:10, 12:16), ]
tab7 <- paste(tab7[ , 1], tab7[ , 2], tab7[ , 3], tab7[ , 4], tab7[ , 5], tab7[ , 6], sep = " ")
x <- str_split_fixed(string = tab7, pattern = " ", n = 13)
labs <- NULL
df <- matrix(data = NA, nrow = nrow(x), ncol = 7)
for(i in 1:(nrow(x))) {
words <- x[i, ][str_detect(string = x[i, ], pattern = "^[a-zA-Z]+$")]
phrase <- ""
for(j in 1:length(words)){
phrase <- paste(phrase, words[j], sep = " ")
}
phrase <- str_remove(string = phrase, pattern = " ")
labs <- c(labs, phrase)
numbers <- x[i, ][str_detect(string = x[i, ],
pattern = "^(\\d{1}|\\d{2}|\\d{3}|\\d{4}|\\d{5}|\\d{6}|\\d{7}|\\d{8}|\\d{9}|\\d{10})$")]
#numbers <- as.numeric(numbers)
df[i, ] <- numbers
}
tab7 <- cbind(df[ , 1], labs, df[ , 2:7])
tab7 <- tab7[c(3, 5:12, 14), ]
departmentCode <- rep(200, nrow(tab7))
department <- rep(labs[1], nrow(tab7))
categoryCode <- c(21, rep(22, 8), 26)
category <- c(labs[2], rep(labs[4], 8), labs[13])
tab7 <- data.frame(departmentCode, department, categoryCode, category, tab7)
tab7$department <- str_to_title(string = tab7$department)
tab7$category <- str_to_title(string = tab7$category)
names(tab7) <- c("departmentCode", "department",
"categoryCode", "category",
"itemCode", "item",
"actual_2016_2017",
"budget_2017_2018", "outturn_2017_2018",
"budget_2018_2019", "projection_2019_2020",
"projection_2020_2021")
tab7[ , 5] <- as.numeric(as.character(tab7[ , 5]))
tab7[ , 6] <- as.character(tab7[ , 6])
tab7[ , 7] <- as.numeric(as.character(tab7[ , 7]))
tab7[ , 8] <- as.numeric(as.character(tab7[ , 8]))
tab7[ , 9] <- as.numeric(as.character(tab7[ , 9]))
tab7[ , 10] <- as.numeric(as.character(tab7[ , 10]))
tab7[ , 11] <- as.numeric(as.character(tab7[ , 11]))
tab7[ , 12] <- as.numeric(as.character(tab7[ , 12]))
################################################################################
mohHealthPreventive2018 <- tab7
usethis::use_data(mohHealthPreventive2018, overwrite = TRUE)
################################################################################
tab8 <- health2[[9]][10:30, ]
tab8[1, 1] <- paste(tab8[1, 1], tab8[2, 1], sep = " ")
tab8[7, 1] <- paste(tab8[7, 1], tab8[8, 1], sep = " ")
tab8[9, 1] <- paste(tab8[9, 1], tab8[10, 1], sep = " ")
tab8[11, 1] <- paste(tab8[11, 1], tab8[12, 1], sep = " ")
tab8[16, 1] <- paste(tab8[16, 1], tab8[17, 1], sep = " ")
tab8 <- tab8[c(1, 3:7, 9, 11, 13:16, 18:21), ]
tab8[ , 1] <- str_replace_all(string = tab8[ , 1], pattern = "-", replacement = " ")
tab8[ , 1] <- str_replace_all(string = tab8[ , 1], pattern = "[[:punct:]]", replacement = "")
tab8[ , 2] <- str_replace_all(string = tab8[ , 2], pattern = "[[:punct:]]", replacement = "")
tab8[ , 3] <- str_replace_all(string = tab8[ , 3], pattern = "[[:punct:]]", replacement = "")
tab8[ , 4] <- str_replace_all(string = tab8[ , 4], pattern = "[[:punct:]]", replacement = "")
tab8[ , 5] <- str_replace_all(string = tab8[ , 5], pattern = "[[:punct:]]", replacement = "")
tab8[ , 6] <- str_replace_all(string = tab8[ , 6], pattern = "[[:punct:]]", replacement = "")
tab8 <- paste(tab8[ , 1], tab8[ , 2], tab8[ , 3], tab8[ , 4], tab8[ , 5], tab8[ , 6], sep = " ")
x <- str_split_fixed(string = tab8, pattern = " ", n = 14)
labs <- NULL
df <- matrix(data = NA, nrow = nrow(x), ncol = 7)
for(i in 1:(nrow(x))) {
words <- x[i, ][str_detect(string = x[i, ], pattern = "^[a-zA-Z]+$")]
phrase <- ""
for(j in 1:length(words)){
phrase <- paste(phrase, words[j], sep = " ")
}
phrase <- str_remove(string = phrase, pattern = " ")
labs <- c(labs, phrase)
numbers <- x[i, ][str_detect(string = x[i, ],
pattern = "^(\\d{1}|\\d{2}|\\d{3}|\\d{4}|\\d{5}|\\d{6}|\\d{7}|\\d{8}|\\d{9}|\\d{10})$")]
numbers <- as.numeric(numbers)
df[i, ] <- numbers
}
tab8 <- cbind(df[ , 1], labs, df[ , 2:7])
tab8 <- tab8[c(3, 5:14, 16), ]
departmentCode <- rep(400, nrow(tab8))
department <- rep(labs[1], nrow(tab8))
categoryCode <- c(21, rep(22, 10), 26)
category <- c(labs[2], rep(labs[4], 10), labs[15])
tab8 <- data.frame(departmentCode, department, categoryCode, category, tab8)
tab8$department <- str_to_title(string = tab8$department)
tab8$category <- str_to_title(string = tab8$category)
names(tab8) <- c("departmentCode", "department",
"categoryCode", "category",
"itemCode", "item",
"actual_2016_2017",
"budget_2017_2018", "outturn_2017_2018",
"budget_2018_2019", "projection_2019_2020",
"projection_2020_2021")
tab8[ , 5] <- as.numeric(as.character(tab8[ , 5]))
tab8[ , 6] <- as.character(tab8[ , 6])
tab8[ , 7] <- as.numeric(as.character(tab8[ , 7]))
tab8[ , 8] <- as.numeric(as.character(tab8[ , 8]))
tab8[ , 9] <- as.numeric(as.character(tab8[ , 9]))
tab8[ , 10] <- as.numeric(as.character(tab8[ , 10]))
tab8[ , 11] <- as.numeric(as.character(tab8[ , 11]))
tab8[ , 12] <- as.numeric(as.character(tab8[ , 12]))
################################################################################
mohHealthPlanning2018 <- tab8
usethis::use_data(mohHealthPlanning2018, overwrite = TRUE)
################################################################################
tab9 <- health2[[10]][5:20, ]
tab9[7, 1] <- paste(tab9[7, 1], tab9[8, 1], sep = " ")
tab9[9, 1] <- paste(tab9[9, 1], tab9[10, 1], sep = " ")
tab9[13, 1] <- paste(tab9[13, 1], tab9[14, 1], sep = " ")
tab9 <- tab9[c(1:7, 9, 11:13, 15:16), ]
tab9[ , 1] <- str_replace_all(string = tab9[ , 1], pattern = " - ", replacement = " ")
tab9[ , 1] <- str_replace_all(string = tab9[ , 1], pattern = "-", replacement = " ")
tab9[ , 1] <- str_replace_all(string = tab9[ , 1], pattern = "–", replacement = " ")
tab9[ , 1] <- str_replace_all(string = tab9[ , 1], pattern = "[[:punct:]]", replacement = "")
tab9[ , 2] <- str_replace_all(string = tab9[ , 2], pattern = "[[:punct:]]", replacement = "")
tab9[ , 3] <- str_replace_all(string = tab9[ , 3], pattern = "[[:punct:]]", replacement = "")
tab9[ , 4] <- str_replace_all(string = tab9[ , 4], pattern = "[[:punct:]]", replacement = "")
tab9[ , 5] <- str_replace_all(string = tab9[ , 5], pattern = "[[:punct:]]", replacement = "")
tab9[ , 6] <- str_replace_all(string = tab9[ , 6], pattern = "[[:punct:]]", replacement = "")
tab9 <- paste(tab9[ , 1], tab9[ , 2], tab9[ , 3], tab9[ , 4], tab9[ , 5], tab9[ , 6], sep = " ")
x <- str_split_fixed(string = tab9, pattern = " ", n = 13)
labs <- NULL
df <- matrix(data = NA, nrow = nrow(x), ncol = 7)
for(i in 1:(nrow(x))) {
words <- x[i, ][str_detect(string = x[i, ], pattern = "^[a-zA-Z]+$")]
phrase <- ""
for(j in 1:length(words)){
phrase <- paste(phrase, words[j], sep = " ")
}
phrase <- str_remove(string = phrase, pattern = " ")
labs <- c(labs, phrase)
numbers <- x[i, ][str_detect(string = x[i, ],
pattern = "^(\\d{1}|\\d{2}|\\d{3}|\\d{4}|\\d{5}|\\d{6}|\\d{7}|\\d{8}|\\d{9}|\\d{10})$")]
numbers <- as.numeric(numbers)
df[i, ] <- numbers
}
tab9 <- cbind(df[ , 1], labs, df[ , 2:7])
tab9 <- tab9[c(3:4, 6:13), ]
departmentCode <- rep(500, nrow(tab9))
department <- rep(labs[1], nrow(tab9))
categoryCode <- c(rep(21, 2), rep(22, 8))
category <- c(rep(labs[2], 2), rep(labs[5], 8))
tab9 <- data.frame(departmentCode, department, categoryCode, category, tab9)
tab9$department <- str_to_title(string = tab9$department)
tab9$category <- str_to_title(string = tab9$category)
names(tab9) <- c("departmentCode", "department",
"categoryCode", "category",
"itemCode", "item",
"actual_2016_2017",
"budget_2017_2018", "outturn_2017_2018",
"budget_2018_2019", "projection_2019_2020",
"projection_2020_2021")
tab9[ , 5] <- as.numeric(as.character(tab9[ , 5]))
tab9[ , 6] <- as.character(tab9[ , 6])
tab9[ , 7] <- as.numeric(as.character(tab9[ , 7]))
tab9[ , 8] <- as.numeric(as.character(tab9[ , 8]))
tab9[ , 9] <- as.numeric(as.character(tab9[ , 9]))
tab9[ , 10] <- as.numeric(as.character(tab9[ , 10]))
tab9[ , 11] <- as.numeric(as.character(tab9[ , 11]))
tab9[ , 12] <- as.numeric(as.character(tab9[ , 12]))
################################################################################
mohHealthVital2018 <- tab9
usethis::use_data(mohHealthVital2018, overwrite = TRUE)
################################################################################
tab10 <- rbind(health2[[10]][32:48, ], health2[[11]][5:17, ])
tab10[1, 1] <- paste(tab10[1, 1], tab10[2, 1], sep = " ")
tab10[9, 1] <- paste(tab10[9, 1], tab10[10, 1], sep = " ")
tab10[11, 1] <- paste(tab10[11, 1], tab10[12, 1], sep = " ")
tab10[14, 1] <- paste(tab10[14, 1], tab10[15, 1], sep = " ")
tab10[21, 1] <- paste(tab10[21, 1], tab10[22, 1], sep = " ")
tab10 <- tab10[c(1, 3:9, 11, 13:14, 16:21, 23:30), ]
tab10[ , 1] <- str_replace_all(string = tab10[ , 1], pattern = " - ", replacement = " ")
tab10[ , 1] <- str_replace_all(string = tab10[ , 1], pattern = "-", replacement = " ")
tab10[ , 1] <- str_replace_all(string = tab10[ , 1], pattern = "–", replacement = " ")
tab10[ , 1] <- str_replace_all(string = tab10[ , 1], pattern = "[[:punct:]]", replacement = "")
tab10[ , 2] <- str_replace_all(string = tab10[ , 2], pattern = "[[:punct:]]", replacement = "")
tab10[ , 3] <- str_replace_all(string = tab10[ , 3], pattern = "[[:punct:]]", replacement = "")
tab10[ , 4] <- str_replace_all(string = tab10[ , 4], pattern = "[[:punct:]]", replacement = "")
tab10[ , 5] <- str_replace_all(string = tab10[ , 5], pattern = "[[:punct:]]", replacement = "")
tab10[ , 6] <- str_replace_all(string = tab10[ , 6], pattern = "[[:punct:]]", replacement = "")
tab10 <- paste(tab10[ , 1], tab10[ , 2], tab10[ , 3], tab10[ , 4], tab10[ , 5], tab10[ , 6], sep = " ")
x <- str_split_fixed(string = tab10, pattern = " ", n = 14)
labs <- NULL
df <- matrix(data = NA, nrow = nrow(x), ncol = 7)
for(i in 1:(nrow(x))) {
words <- x[i, ][str_detect(string = x[i, ], pattern = "^[a-zA-Z]+$")]
phrase <- ""
for(j in 1:length(words)){
phrase <- paste(phrase, words[j], sep = " ")
}
phrase <- str_remove(string = phrase, pattern = " ")
labs <- c(labs, phrase)
numbers <- x[i, ][str_detect(string = x[i, ],
pattern = "^(\\d{1}|\\d{2}|\\d{3}|\\d{4}|\\d{5}|\\d{6}|\\d{7}|\\d{8}|\\d{9}|\\d{10})$")]
numbers <- as.numeric(numbers)
df[i, ] <- numbers
}
tab10 <- cbind(df[ , 1], labs, df[ , 2:7])
tab10 <- tab10[c(3:5, 7:23, 25), ]
departmentCode <- rep(600, nrow(tab10))
department <- rep(labs[1], nrow(tab10))
categoryCode <- c(rep(21, 3), rep(22, 17), 31)
category <- c(rep(labs[2], 3), rep(labs[6], 17), labs[24])
tab10 <- data.frame(departmentCode, department, categoryCode, category, tab10)
tab10$department <- str_to_title(string = tab10$department)
tab10$category <- str_to_title(string = tab10$category)
names(tab10) <- c("departmentCode", "department",
"categoryCode", "category",
"itemCode", "item",
"actual_2016_2017",
"budget_2017_2018", "outturn_2017_2018",
"budget_2018_2019", "projection_2019_2020",
"projection_2020_2021")
tab10[ , 5] <- as.numeric(as.character(tab10[ , 5]))
tab10[ , 6] <- as.character(tab10[ , 6])
tab10[ , 7] <- as.numeric(as.character(tab10[ , 7]))
tab10[ , 8] <- as.numeric(as.character(tab10[ , 8]))
tab10[ , 9] <- as.numeric(as.character(tab10[ , 9]))
tab10[ , 10] <- as.numeric(as.character(tab10[ , 10]))
tab10[ , 11] <- as.numeric(as.character(tab10[ , 11]))
tab10[ , 12] <- as.numeric(as.character(tab10[ , 12]))
################################################################################
mohHealthAdmin2018 <- tab10
usethis::use_data(mohHealthAdmin2018, overwrite = TRUE)
################################################################################
tab11 <- rbind(health2[[11]][27:30, ], health2[[12]][c(5:10, 20:27, 37:45), ], health2[[13]][5:8, ])
tab11[7, 1] <- paste(tab11[7, 1], tab11[8, 1], sep = " ")
tab11[9, 1] <- paste(tab11[9, 1], tab11[10, 1], sep = " ")
tab11[15, 1] <- paste(tab11[15, 1], tab11[16, 1], sep = " ")
tab11[24, 1] <- paste(tab11[24, 1], tab11[25, 1], sep = " ")
tab11[28, 1] <- paste(tab11[28, 1], tab11[29, 1], sep = " ")
tab11 <- tab11[c(1:7, 9, 11:15, 17:24, 26:28, 30:31), ]
tab11[ , 1] <- str_replace_all(string = tab11[ , 1], pattern = " - ", replacement = " ")
tab11[ , 1] <- str_replace_all(string = tab11[ , 1], pattern = "-", replacement = " ")
tab11[ , 1] <- str_replace_all(string = tab11[ , 1], pattern = "–", replacement = " ")
tab11[ , 1] <- str_replace_all(string = tab11[ , 1], pattern = "[[:punct:]]", replacement = "")
tab11[ , 2] <- str_replace_all(string = tab11[ , 2], pattern = "[[:punct:]]", replacement = "")
tab11[ , 3] <- str_replace_all(string = tab11[ , 3], pattern = "[[:punct:]]", replacement = "")
tab11[ , 4] <- str_replace_all(string = tab11[ , 4], pattern = "[[:punct:]]", replacement = "")
tab11[ , 5] <- str_replace_all(string = tab11[ , 5], pattern = "[[:punct:]]", replacement = "")
tab11[ , 6] <- str_replace_all(string = tab11[ , 6], pattern = "[[:punct:]]", replacement = "")
tab11 <- paste(tab11[ , 1], tab11[ , 2], tab11[ , 3], tab11[ , 4], tab11[ , 5], tab11[ , 6], sep = " ")
x <- str_split_fixed(string = tab11, pattern = " ", n = 14)
labs <- NULL
df <- matrix(data = NA, nrow = nrow(x), ncol = 7)
for(i in 1:(nrow(x))) {
words <- x[i, ][str_detect(string = x[i, ], pattern = "^[a-zA-Z]+$")]
phrase <- ""
for(j in 1:length(words)){
phrase <- paste(phrase, words[j], sep = " ")
}
phrase <- str_remove(string = phrase, pattern = " ")
labs <- c(labs, phrase)
numbers <- x[i, ][str_detect(string = x[i, ],
pattern = "^(\\d{1}|\\d{2}|\\d{3}|\\d{4}|\\d{5}|\\d{6}|\\d{7}|\\d{8}|\\d{9}|\\d{10})$")]
numbers <- as.numeric(numbers)
df[i, ] <- numbers
}
tab11 <- cbind(df[ , 1], labs, df[ , 2:7])
tab11 <- tab11[c(3:4, 7:8, 11:15, 18:26), ]
departmentCode <- c(rep(702, 2), rep(704, 2), rep(709, 5), rep(712, 9))
department <- c(rep(labs[1], 2), rep(labs[5], 2), rep(labs[9], 5), rep(labs[16], 9))
categoryCode <- c(rep(26, 2), rep(26, 2), rep(26, 5), rep(26, 9))
category <- c(rep(labs[2], 2), rep(labs[6], 2), rep(labs[10], 5), rep(labs[17], 9))
tab11 <- data.frame(departmentCode, department, categoryCode, category, tab11)
tab11$department <- str_to_title(string = tab11$department)
tab11$category <- str_to_title(string = tab11$category)
names(tab11) <- c("departmentCode", "department",
"categoryCode", "category",
"itemCode", "item",
"actual_2016_2017",
"budget_2017_2018", "outturn_2017_2018",
"budget_2018_2019", "projection_2019_2020",
"projection_2020_2021")
tab11[ , 5] <- as.numeric(as.character(tab11[ , 5]))
tab11[ , 6] <- as.character(tab11[ , 6])
tab11[ , 7] <- as.numeric(as.character(tab11[ , 7]))
tab11[ , 8] <- as.numeric(as.character(tab11[ , 8]))
tab11[ , 9] <- as.numeric(as.character(tab11[ , 9]))
tab11[ , 10] <- as.numeric(as.character(tab11[ , 10]))
tab11[ , 11] <- as.numeric(as.character(tab11[ , 11]))
tab11[ , 12] <- as.numeric(as.character(tab11[ , 12]))
################################################################################
mohHealthCountyAdd2018 <- tab11
usethis::use_data(mohHealthCountyAdd2018, overwrite = TRUE)
################################################################################
tab12 <- health2[[13]][18:20, ]
tab12[ , 1] <- str_replace_all(string = tab12[ , 1], pattern = " - ", replacement = " ")
tab12[ , 1] <- str_replace_all(string = tab12[ , 1], pattern = "-", replacement = " ")
tab12[ , 1] <- str_replace_all(string = tab12[ , 1], pattern = "–", replacement = " ")
tab12[ , 1] <- str_replace_all(string = tab12[ , 1], pattern = "[[:punct:]]", replacement = "")
tab12[ , 2] <- str_replace_all(string = tab12[ , 2], pattern = "[[:punct:]]", replacement = "")
tab12[ , 3] <- str_replace_all(string = tab12[ , 3], pattern = "[[:punct:]]", replacement = "")
tab12[ , 4] <- str_replace_all(string = tab12[ , 4], pattern = "[[:punct:]]", replacement = "")
tab12[ , 5] <- str_replace_all(string = tab12[ , 5], pattern = "[[:punct:]]", replacement = "")
tab12[ , 6] <- str_replace_all(string = tab12[ , 6], pattern = "[[:punct:]]", replacement = "")
tab12 <- paste(tab12[ , 1], tab12[ , 2], tab12[ , 3], tab12[ , 4], tab12[ , 5], tab12[ , 6], sep = " ")
x <- str_split_fixed(string = tab12, pattern = " ", n = 12)
labs <- NULL
df <- matrix(data = NA, nrow = nrow(x), ncol = 7)
for(i in 1:(nrow(x))) {
words <- x[i, ][str_detect(string = x[i, ], pattern = "^[a-zA-Z]+$")]
phrase <- ""
for(j in 1:length(words)){
phrase <- paste(phrase, words[j], sep = " ")
}
phrase <- str_remove(string = phrase, pattern = " ")
labs <- c(labs, phrase)
numbers <- x[i, ][str_detect(string = x[i, ],
pattern = "^(\\d{1}|\\d{2}|\\d{3}|\\d{4}|\\d{5}|\\d{6}|\\d{7}|\\d{8}|\\d{9}|\\d{10})$")]
numbers <- as.numeric(numbers)
df[i, ] <- numbers
}
tab12 <- cbind(df[ , 1], labs, df[ , 2:7])
tab12 <- tab12[3, ]
departmentCode <- 5500
department <- "General Claims"
categoryCode <- 26
category <- "Grants"
tab12 <- data.frame(rbind(c(departmentCode, department, categoryCode, category, tab12)))
#tab12$department <- str_to_title(string = tab12$department)
#tab12$category <- str_to_title(string = tab12$category)
names(tab12) <- c("departmentCode", "department",
"categoryCode", "category",
"itemCode", "item",
"actual_2016_2017",
"budget_2017_2018", "outturn_2017_2018",
"budget_2018_2019", "projection_2019_2020",
"projection_2020_2021")
tab12[ , 5] <- as.numeric(as.character(tab12[ , 5]))
tab12[ , 6] <- as.character(tab12[ , 6])
tab12[ , 7] <- as.numeric(as.character(tab12[ , 7]))
tab12[ , 8] <- as.numeric(as.character(tab12[ , 8]))
tab12[ , 9] <- as.numeric(as.character(tab12[ , 9]))
tab12[ , 10] <- as.numeric(as.character(tab12[ , 10]))
tab12[ , 11] te(tab10[9, 1], tab10[10, 1], sep = " ")
tab10[11, 1] <- paste(tab10[11, 1], tab10[12, 1], sep = " ")
tab10[14, 1] <- paste(tab10[14, 1], tab10[15, 1], sep = " ")
tab10[21, 1] <- paste(tab10[21, 1], tab10[22, 1], sep = " ")
tab10 <- tab10[c(1, 3:9, 11, 13:14, 16:21, 23:30), ]
tab10[ , 1] <- str_replace_all(string = tab10[ , 1], pattern = " - ", replacement = " ")
tab10[ , 1] <- str_replace_all(string = tab10[ , 1], pattern = "-", replacement = " ")
tab10[ , 1] <- str_replace_all(string = tab10[ , 1], pattern = "–", replacement = " ")
tab10[ , 1] <- str_replace_all(string = tab10[ , 1], pattern = "[[:punct:]]", replacement = "")
tab10[ , 2] <- str_replace_all(string = tab10[ , 2], pattern = "[[:punct:]]", replacement = "")
tab10[ , 3] <- str_replace_all(string = tab10[ , 3], pattern = "[[:punct:]]", replacement = "")
tab10[ , 4] <- str_replace_all(string = tab10[ , 4], pattern = "[[:punct:]]", replacement = "")
tab10[ , 5] <- str_replace_all(string = tab10[ , 5], pattern = "[[:punct:]]", replacement = "")
tab10[ , 6] <- str_replace_all(string = tab10[ , 6], pattern = "[[:punct:]]", replacement = "")
tab10 <- paste(tab10[ , 1], tab10[ , 2], tab10[ , 3], tab10[ , 4], tab10[ , 5], tab10[ , 6], sep = " ")
x <- str_split_fixed(string = tab10, pattern = " ", n = 14)
labs <- NULL
df <- matrix(data = NA, nrow = nrow(x), ncol = 7)
for(i in 1:(nrow(x))) {
words <- x[i, ][str_detect(string = x[i, ], pattern = "^[a-zA-Z]+$")]
phrase <- ""
for(j in 1:length(words)){
phrase <- paste(phrase, words[j], sep = " ")
}
phrase <- str_remove(string = phrase, pattern = " ")
labs <- c(labs, phrase)
numbers <- x[i, ][str_detect(string = x[i, ],
pattern = "^(\\d{1}|\\d{2}|\\d{3}|\\d{4}|\\d{5}|\\d{6}|\\d{7}|\\d{8}|\\d{9}|\\d{10})$")]
numbers <- as.numeric(numbers)
df[i, ] <- numbers
}
tab10 <- cbind(df[ , 1], labs, df[ , 2:7])
tab10 <- tab10[c(3:5, 7:23, 25), ]
departmentCode <- rep(600, nrow(tab10))
department <- rep(labs[1], nrow(tab10))
categoryCode <- c(rep(21, 3), rep(22, 17), 31)
category <- c(rep(labs[2], 3), rep(labs[6], 17), labs[24])
tab10 <- data.frame(departmentCode, department, categoryCode, category, tab10)
tab10$department <- str_to_title(string = tab10$department)
tab10$category <- str_to_title(string = tab10$category)
names(tab10) <- c("departmentCode", "department",
"categoryCode", "category",
"itemCode", "item",
"actual_2016_2017",
"budget_2017_2018", "outturn_2017_2018",
"budget_2018_2019", "projection_2019_2020",
"projection_2020_2021")
tab10[ , 5] <- as.numeric(as.character(tab10[ , 5]))
tab10[ , 6] <- as.character(tab10[ , 6])
tab10[ , 7] <- as.numeric(as.character(tab10[ , 7]))
tab10[ , 8] <- as.numeric(as.character(tab10[ , 8]))
tab10[ , 9] <- as.numeric(as.character(tab10[ , 9]))
tab10[ , 10] <- as.numeric(as.character(tab10[ , 10]))
tab10[ , 11] <- as.numeric(as.character(tab10[ , 11]))
tab10[ , 12] <- as.numeric(as.character(tab10[ , 12]))
################################################################################
mohHealthAdmin2018 <- tab10
usethis::use_data(mohHealthAdmin2018, overwrite = TRUE)
################################################################################
tab11 <- rbind(health2[[11]][27:30, ], health2[[12]][c(5:10, 20:27, 37:45), ], health2[[13]][5:8, ])
tab11[7, 1] <- paste(tab11[7, 1], tab11[8, 1], sep = " ")
tab11[9, 1] <- paste(tab11[9, 1], tab11[10, 1], sep = " ")
tab11[15, 1] <- paste(tab11[15, 1], tab11[16, 1], sep = " ")
tab11[24, 1] <- paste(tab11[24, 1], tab11[25, 1], sep = " ")
tab11[28, 1] <- paste(tab11[28, 1], tab11[29, 1], sep = " ")
tab11 <- tab11[c(1:7, 9, 11:15, 17:24, 26:28, 30:31), ]
tab11[ , 1] <- str_replace_all(string = tab11[ , 1], pattern = " - ", replacement = " ")
tab11[ , 1] <- str_replace_all(string = tab11[ , 1], pattern = "-", replacement = " ")
tab11[ , 1] <- str_replace_all(string = tab11[ , 1], pattern = "–", replacement = " ")
tab11[ , 1] <- str_replace_all(string = tab11[ , 1], pattern = "[[:punct:]]", replacement = "")
tab11[ , 2] <- str_replace_all(string = tab11[ , 2], pattern = "[[:punct:]]", replacement = "")
tab11[ , 3] <- str_replace_all(string = tab11[ , 3], pattern = "[[:punct:]]", replacement = "")
tab11[ , 4] <- str_replace_all(string = tab11[ , 4], pattern = "[[:punct:]]", replacement = "")
tab11[ , 5] <- str_replace_all(string = tab11[ , 5], pattern = "[[:punct:]]", replacement = "")
tab11[ , 6] <- str_replace_all(string = tab11[ , 6], pattern = "[[:punct:]]", replacement = "")
tab11 <- paste(tab11[ , 1], tab11[ , 2], tab11[ , 3], tab11[ , 4], tab11[ , 5], tab11[ , 6], sep = " ")
x <- str_split_fixed(string = tab11, pattern = " ", n = 14)
labs <- NULL
df <- matrix(data = NA, nrow = nrow(x), ncol = 7)
for(i in 1:(nrow(x))) {
words <- x[i, ][str_detect(string = x[i, ], pattern = "^[a-zA-Z]+$")]
phrase <- ""
for(j in 1:length(words)){
phrase <- paste(phrase, words[j], sep = " ")
}
phrase <- str_remove(string = phrase, pattern = " ")
labs <- c(labs, phrase)
numbers <- x[i, ][str_detect(string = x[i, ],
pattern = "^(\\d{1}|\\d{2}|\\d{3}|\\d{4}|\\d{5}|\\d{6}|\\d{7}|\\d{8}|\\d{9}|\\d{10})$")]
numbers <- as.numeric(numbers)
df[i, ] <- numbers
}
tab11 <- cbind(df[ , 1], labs, df[ , 2:7])
tab11 <- tab11[c(3:4, 7:8, 11:15, 18:26), ]
departmentCode <- c(rep(702, 2), rep(704, 2), rep(709, 5), rep(712, 9))
department <- c(rep(labs[1], 2), rep(labs[5], 2), rep(labs[9], 5), rep(labs[16], 9))
categoryCode <- c(rep(26, 2), rep(26, 2), rep(26, 5), rep(26, 9))
category <- c(rep(labs[2], 2), rep(labs[6], 2), rep(labs[10], 5), rep(labs[17], 9))
tab11 <- data.frame(departmentCode, department, categoryCode, category, tab11)
tab11$department <- str_to_title(string = tab11$department)
tab11$category <- str_to_title(string = tab11$category)
names(tab11) <- c("departmentCode", "department",
"categoryCode", "category",
"itemCode", "item",
"actual_2016_2017",
"budget_2017_2018", "outturn_2017_2018",
"budget_2018_2019", "projection_2019_2020",
"projection_2020_2021")
tab11[ , 5] <- as.numeric(as.character(tab11[ , 5]))
tab11[ , 6] <- as.character(tab11[ , 6])
tab11[ , 7] <- as.numeric(as.character(tab11[ , 7]))
tab11[ , 8] <- as.numeric(as.character(tab11[ , 8]))
tab11[ , 9] <- as.numeric(as.character(tab11[ , 9]))
tab11[ , 10] <- as.numeric(as.character(tab11[ , 10]))
tab11[ , 11] <- as.numeric(as.character(tab11[ , 11]))
tab11[ , 12] <- as.numeric(as.character(tab11[ , 12]))
################################################################################
mohHealthCountyAdd2018 <- tab11
usethis::use_data(mohHealthCountyAdd2018, overwrite = TRUE)
################################################################################
tab12 <- health2[[13]][18:20, ]
tab12[ , 1] <- str_replace_all(string = tab12[ , 1], pattern = " - ", replacement = " ")
tab12[ , 1] <- str_replace_all(string = tab12[ , 1], pattern = "-", replacement = " ")
tab12[ , 1] <- str_replace_all(string = tab12[ , 1], pattern = "–", replacement = " ")
tab12[ , 1] <- str_replace_all(string = tab12[ , 1], pattern = "[[:punct:]]", replacement = "")
tab12[ , 2] <- str_replace_all(string = tab12[ , 2], pattern = "[[:punct:]]", replacement = "")
tab12[ , 3] <- str_replace_all(string = tab12[ , 3], pattern = "[[:punct:]]", replacement = "")
tab12[ , 4] <- str_replace_all(string = tab12[ , 4], pattern = "[[:punct:]]", replacement = "")
tab12[ , 5] <- str_replace_all(string = tab12[ , 5], pattern = "[[:punct:]]", replacement = "")
tab12[ , 6] <- str_replace_all(string = tab12[ , 6], pattern = "[[:punct:]]", replacement = "")
tab12 <- paste(tab12[ , 1], tab12[ , 2], tab12[ , 3], tab12[ , 4], tab12[ , 5], tab12[ , 6], sep = " ")
x <- str_split_fixed(string = tab12, pattern = " ", n = 12)
labs <- NULL
df <- matrix(data = NA, nrow = nrow(x), ncol = 7)
for(i in 1:(nrow(x))) {
words <- x[i, ][str_detect(string = x[i, ], pattern = "^[a-zA-Z]+$")]
phrase <- ""
for(j in 1:length(words)){
phrase <- paste(phrase, words[j], sep = " ")
}
phrase <- str_remove(string = phrase, pattern = " ")
labs <- c(labs, phrase)
numbers <- x[i, ][str_detect(string = x[i, ],
pattern = "^(\\d{1}|\\d{2}|\\d{3}|\\d{4}|\\d{5}|\\d{6}|\\d{7}|\\d{8}|\\d{9}|\\d{10})$")]
numbers <- as.numeric(numbers)
df[i, ] <- numbers
}
tab12 <- cbind(df[ , 1], labs, df[ , 2:7])
tab12 <- tab12[3, ]
departmentCode <- 5500
department <- "General Claims"
categoryCode <- 26
category <- "Grants"
tab12 <- data.frame(rbind(c(departmentCode, department, categoryCode, category, tab12)))
#tab12$department <- str_to_title(string = tab12$department)
#tab12$category <- str_to_title(string = tab12$category)
names(tab12) <- c("departmentCode", "department",
"categoryCode", "category",
"itemCode", "item",
"actual_2016_2017",
"budget_2017_2018", "outturn_2017_2018",
"budget_2018_2019", "projection_2019_2020",
"projection_2020_2021")
tab12[ , 5] <- as.numeric(as.character(tab12[ , 5]))
tab12[ , 6] <- as.character(tab12[ , 6])
tab12[ , 7] <- as.numeric(as.character(tab12[ , 7]))
tab12[ , 8] <- as.numeric(as.character(tab12[ , 8]))
tab12[ , 9] <- as.numeric(as.character(tab12[ , 9]))
tab12[ , 10] <- as.numeric(as.character(tab12[ , 10]))
tab12[ , 11]
phrase <- str_remove(string = phrase, pattern = " ")
labs <- c(labs, phrase)
numbers <- x[i, ][str_detect(string = x[i, ],
pattern = "^(\\d{1}|\\d{2}|\\d{3}|\\d{4}|\\d{5}|\\d{6}|\\d{7}|\\d{8}|\\d{9}|\\d{10})$")]
#numbers <- as.numeric(numbers)
df[i, ] <- numbers
}
tab7 <- cbind(df[ , 1], labs, df[ , 2:7])
tab7 <- tab7[c(3, 5:12, 14), ]
departmentCode <- rep(200, nrow(tab7))
department <- rep(labs[1], nrow(tab7))
categoryCode <- c(21, rep(22, 8), 26)
category <- c(labs[2], rep(labs[4], 8), labs[13])
tab7 <- data.frame(departmentCode, department, categoryCode, category, tab7)
tab7$department <- str_to_title(string = tab7$department)
tab7$category <- str_to_title(string = tab7$category)
names(tab7) <- c("departmentCode", "department",
"categoryCode", "category",
"itemCode", "item",
"actual_2016_2017",
"budget_2017_2018", "outturn_2017_2018",
"budget_2018_2019", "projection_2019_2020",
"projection_2020_2021")
tab7[ , 5] <- as.numeric(as.character(tab7[ , 5]))
tab7[ , 6] <- as.character(tab7[ , 6])
tab7[ , 7] <- as.numeric(as.character(tab7[ , 7]))
tab7[ , 8] <- as.numeric(as.character(tab7[ , 8]))
tab7[ , 9] <- as.numeric(as.character(tab7[ , 9]))
tab7[ , 10] <- as.numeric(as.character(tab7[ , 10]))
tab7[ , 11] <- as.numeric(as.character(tab7[ , 11]))
tab7[ , 12] <- as.numeric(as.character(tab7[ , 12]))
################################################################################
mohHealthPreventive2018 <- tab7
usethis::use_data(mohHealthPreventive2018, overwrite = TRUE)
################################################################################
tab8 <- health2[[9]][10:30, ]
tab8[1, 1] <- paste(tab8[1, 1], tab8[2, 1], sep = " ")
tab8[7, 1] <- paste(tab8[7, 1], tab8[8, 1], sep = " ")
tab8[9, 1] <- paste(tab8[9, 1], tab8[10, 1], sep = " ")
tab8[11, 1] <- paste(tab8[11, 1], tab8[12, 1], sep = " ")
tab8[16, 1] <- paste(tab8[16, 1], tab8[17, 1], sep = " ")
tab8 <- tab8[c(1, 3:7, 9, 11, 13:16, 18:21), ]
tab8[ , 1] <- str_replace_all(string = tab8[ , 1], pattern = "-", replacement = " ")
tab8[ , 1] <- str_replace_all(string = tab8[ , 1], pattern = "[[:punct:]]", replacement = "")
tab8[ , 2] <- str_replace_all(string = tab8[ , 2], pattern = "[[:punct:]]", replacement = "")
tab8[ , 3] <- str_replace_all(string = tab8[ , 3], pattern = "[[:punct:]]", replacement = "")
tab8[ , 4] <- str_replace_all(string = tab8[ , 4], pattern = "[[:punct:]]", replacement = "")
tab8[ , 5] <- str_replace_all(string = tab8[ , 5], pattern = "[[:punct:]]", replacement = "")
tab8[ , 6] <- str_replace_all(string = tab8[ , 6], pattern = "[[:punct:]]", replacement = "")
tab8 <- paste(tab8[ , 1], tab8[ , 2], tab8[ , 3], tab8[ , 4], tab8[ , 5], tab8[ , 6], sep = " ")
x <- str_split_fixed(string = tab8, pattern = " ", n = 14)
labs <- NULL
df <- matrix(data = NA, nrow = nrow(x), ncol = 7)
for(i in 1:(nrow(x))) {
words <- x[i, ][str_detect(string = x[i, ], pattern = "^[a-zA-Z]+$")]
phrase <- ""
for(j in 1:length(words)){
phrase <- paste(phrase, words[j], sep = " ")
}
phrase <- str_remove(string = phrase, pattern = " ")
labs <- c(labs, phrase)
numbers <- x[i, ][str_detect(string = x[i, ],
pattern = "^(\\d{1}|\\d{2}|\\d{3}|\\d{4}|\\d{5}|\\d{6}|\\d{7}|\\d{8}|\\d{9}|\\d{10})$")]
numbers <- as.numeric(numbers)
df[i, ] <- numbers
}
tab8 <- cbind(df[ , 1], labs, df[ , 2:7])
tab8 <- tab8[c(3, 5:14, 16), ]
departmentCode <- rep(400, nrow(tab8))
department <- rep(labs[1], nrow(tab8))
categoryCode <- c(21, rep(22, 10), 26)
category <- c(labs[2], rep(labs[4], 10), labs[15])
tab8 <- data.frame(departmentCode, department, categoryCode, category, tab8)
tab8$department <- str_to_title(string = tab8$department)
tab8$category <- str_to_title(string = tab8$category)
names(tab8) <- c("departmentCode", "department",
"categoryCode", "category",
"itemCode", "item",
"actual_2016_2017",
"budget_2017_2018", "outturn_2017_2018",
"budget_2018_2019", "projection_2019_2020",
"projection_2020_2021")
tab8[ , 5] <- as.numeric(as.character(tab8[ , 5]))
tab8[ , 6] <- as.character(tab8[ , 6])
tab8[ , 7] <- as.numeric(as.character(tab8[ , 7]))
tab8[ , 8] <- as.numeric(as.character(tab8[ , 8]))
tab8[ , 9] <- as.numeric(as.character(tab8[ , 9]))
tab8[ , 10] <- as.numeric(as.character(tab8[ , 10]))
tab8[ , 11] <- as.numeric(as.character(tab8[ , 11]))
tab8[ , 12] <- as.numeric(as.character(tab8[ , 12]))
################################################################################
mohHealthPlanning2018 <- tab8
usethis::use_data(mohHealthPlanning2018, overwrite = TRUE)
################################################################################
tab9 <- health2[[10]][5:20, ]
tab9[7, 1] <- paste(tab9[7, 1], tab9[8, 1], sep = " ")
tab9[9, 1] <- paste(tab9[9, 1], tab9[10, 1], sep = " ")
tab9[13, 1] <- paste(tab9[13, 1], tab9[14, 1], sep = " ")
tab9 <- tab9[c(1:7, 9, 11:13, 15:16), ]
tab9[ , 1] <- str_replace_all(string = tab9[ , 1], pattern = " - ", replacement = " ")
tab9[ , 1] <- str_replace_all(string = tab9[ , 1], pattern = "-", replacement = " ")
tab9[ , 1] <- str_replace_all(string = tab9[ , 1], pattern = "–", replacement = " ")
tab9[ , 1] <- str_replace_all(string = tab9[ , 1], pattern = "[[:punct:]]", replacement = "")
tab9[ , 2] <- str_replace_all(string = tab9[ , 2], pattern = "[[:punct:]]", replacement = "")
tab9[ , 3] <- str_replace_all(string = tab9[ , 3], pattern = "[[:punct:]]", replacement = "")
tab9[ , 4] <- str_replace_all(string = tab9[ , 4], pattern = "[[:punct:]]", replacement = "")
tab9[ , 5] <- str_replace_all(string = tab9[ , 5], pattern = "[[:punct:]]", replacement = "")
tab9[ , 6] <- str_replace_all(string = tab9[ , 6], pattern = "[[:punct:]]", replacement = "")
tab9 <- paste(tab9[ , 1], tab9[ , 2], tab9[ , 3], tab9[ , 4], tab9[ , 5], tab9[ , 6], sep = " ")
x <- str_split_fixed(string = tab9, pattern = " ", n = 13)
labs <- NULL
df <- matrix(data = NA, nrow = nrow(x), ncol = 7)
for(i in 1:(nrow(x))) {
words <- x[i, ][str_detect(string = x[i, ], pattern = "^[a-zA-Z]+$")]
phrase <- ""
for(j in 1:length(words)){
phrase <- paste(phrase, words[j], sep = " ")
}
phrase <- str_remove(string = phrase, pattern = " ")
labs <- c(labs, phrase)
numbers <- x[i, ][str_detect(string = x[i, ],
pattern = "^(\\d{1}|\\d{2}|\\d{3}|\\d{4}|\\d{5}|\\d{6}|\\d{7}|\\d{8}|\\d{9}|\\d{10})$")]
numbers <- as.numeric(numbers)
df[i, ] <- numbers
}
tab9 <- cbind(df[ , 1], labs, df[ , 2:7])
tab9 <- tab9[c(3:4, 6:13), ]
departmentCode <- rep(500, nrow(tab9))
department <- rep(labs[1], nrow(tab9))
categoryCode <- c(rep(21, 2), rep(22, 8))
category <- c(rep(labs[2], 2), rep(labs[5], 8))
tab9 <- data.frame(departmentCode, department, categoryCode, category, tab9)
tab9$department <- str_to_title(string = tab9$department)
tab9$category <- str_to_title(string = tab9$category)
names(tab9) <- c("departmentCode", "department",
"categoryCode", "category",
"itemCode", "item",
"actual_2016_2017",
"budget_2017_2018", "outturn_2017_2018",
"budget_2018_2019", "projection_2019_2020",
"projection_2020_2021")
tab9[ , 5] <- as.numeric(as.character(tab9[ , 5]))
tab9[ , 6] <- as.character(tab9[ , 6])
tab9[ , 7] <- as.numeric(as.character(tab9[ , 7]))
tab9[ , 8] <- as.numeric(as.character(tab9[ , 8]))
tab9[ , 9] <- as.numeric(as.character(tab9[ , 9]))
tab9[ , 10] <- as.numeric(as.character(tab9[ , 10]))
tab9[ , 11] <- as.numeric(as.character(tab9[ , 11]))
tab9[ , 12] <- as.numeric(as.character(tab9[ , 12]))
################################################################################
mohHealthVital2018 <- tab9
usethis::use_data(mohHealthVital2018, overwrite = TRUE)
################################################################################
tab10 <- rbind(health2[[10]][32:48, ], health2[[11]][5:17, ])
tab10[1, 1] <- paste(tab10[1, 1], tab10[2, 1], sep = " ")
tab10[9, 1] <- paste(tab10[9, 1], tab10[10, 1], sep = " ")
tab10[11, 1] <- paste(tab10[11, 1], tab10[12, 1], sep = " ")
tab10[14, 1] <- paste(tab10[14, 1], tab10[15, 1], sep = " ")
tab10[21, 1] <- paste(tab10[21, 1], tab10[22, 1], sep = " ")
tab10 <- tab10[c(1, 3:9, 11, 13:14, 16:21, 23:30), ]
tab10[ , 1] <- str_replace_all(string = tab10[ , 1], pattern = " - ", replacement = " ")
tab10[ , 1] <- str_replace_all(string = tab10[ , 1], pattern = "-", replacement = " ")
tab10[ , 1] <- str_replace_all(string = tab10[ , 1], pattern = "–", replacement = " ")
tab10[ , 1] <- str_replace_all(string = tab10[ , 1], pattern = "[[:punct:]]", replacement = "")
tab10[ , 2] <- str_replace_all(string = tab10[ , 2], pattern = "[[:punct:]]", replacement = "")
tab10[ , 3] <- str_replace_all(string = tab10[ , 3], pattern = "[[:punct:]]", replacement = "")
tab10[ , 4] <- str_replace_all(string = tab10[ , 4], pattern = "[[:punct:]]", replacement = "")
tab10[ , 5] <- str_replace_all(string = tab10[ , 5], pattern = "[[:punct:]]", replacement = "")
tab10[ , 6] <- str_replace_all(string = tab10[ , 6], pattern = "[[:punct:]]", replacement = "")
tab10 <- paste(tab10[ , 1], tab10[ , 2], tab10[ , 3], tab10[ , 4], tab10[ , 5], tab10[ , 6], sep = " ")
x <- str_split_fixed(string = tab10, pattern = " ", n = 14)
labs <- NULL
df <- matrix(data = NA, nrow = nrow(x), ncol = 7)
for(i in 1:(nrow(x))) {
words <- x[i, ][str_detect(string = x[i, ], pattern = "^[a-zA-Z]+$")]
phrase <- ""
for(j in 1:length(words)){
phrase <- paste(phrase, words[j], sep = " ")
}
phrase <- str_remove(string = phrase, pattern = " ")
labs <- c(labs, phrase)
numbers <- x[i, ][str_detect(string = x[i, ],
pattern = "^(\\d{1}|\\d{2}|\\d{3}|\\d{4}|\\d{5}|\\d{6}|\\d{7}|\\d{8}|\\d{9}|\\d{10})$")]
numbers <- as.numeric(numbers)
df[i, ] <- numbers
}
tab10 <- cbind(df[ , 1], labs, df[ , 2:7])
tab10 <- tab10[c(3:5, 7:23, 25), ]
departmentCode <- rep(600, nrow(tab10))
department <- rep(labs[1], nrow(tab10))
categoryCode <- c(rep(21, 3), rep(22, 17), 31)
category <- c(rep(labs[2], 3), rep(labs[6], 17), labs[24])
tab10 <- data.frame(departmentCode, department, categoryCode, category, tab10)
tab10$department <- str_to_title(string = tab10$department)
tab10$category <- str_to_title(string = tab10$category)
names(tab10) <- c("departmentCode", "department",
"categoryCode", "category",
"itemCode", "item",
"actual_2016_2017",
"budget_2017_2018", "outturn_2017_2018",
"budget_2018_2019", "projection_2019_2020",
"projection_2020_2021")
tab10[ , 5] <- as.numeric(as.character(tab10[ , 5]))
tab10[ , 6] <- as.character(tab10[ , 6])
tab10[ , 7] <- as.numeric(as.character(tab10[ , 7]))
tab10[ , 8] <- as.numeric(as.character(tab10[ , 8]))
tab10[ , 9] <- as.numeric(as.character(tab10[ , 9]))
tab10[ , 10] <- as.numeric(as.character(tab10[ , 10]))
tab10[ , 11] <- as.numeric(as.character(tab10[ , 11]))
tab10[ , 12] <- as.numeric(as.character(tab10[ , 12]))
################################################################################
mohHealthAdmin2018 <- tab10
usethis::use_data(mohHealthAdmin2018, overwrite = TRUE)
################################################################################
tab11 <- rbind(health2[[11]][27:30, ], health2[[12]][c(5:10, 20:27, 37:45), ], health2[[13]][5:8, ])
tab11[7, 1] <- paste(tab11[7, 1], tab11[8, 1], sep = " ")
tab11[9, 1] <- paste(tab11[9, 1], tab11[10, 1], sep = " ")
tab11[15, 1] <- paste(tab11[15, 1], tab11[16, 1], sep = " ")
tab11[24, 1] <- paste(tab11[24, 1], tab11[25, 1], sep = " ")
tab11[28, 1] <- paste(tab11[28, 1], tab11[29, 1], sep = " ")
tab11 <- tab11[c(1:7, 9, 11:15, 17:24, 26:28, 30:31), ]
tab11[ , 1] <- str_replace_all(string = tab11[ , 1], pattern = " - ", replacement = " ")
tab11[ , 1] <- str_replace_all(string = tab11[ , 1], pattern = "-", replacement = " ")
tab11[ , 1] <- str_replace_all(string = tab11[ , 1], pattern = "–", replacement = " ")
tab11[ , 1] <- str_replace_all(string = tab11[ , 1], pattern = "[[:punct:]]", replacement = "")
tab11[ , 2] <- str_replace_all(string = tab11[ , 2], pattern = "[[:punct:]]", replacement = "")
tab11[ , 3] <- str_replace_all(string = tab11[ , 3], pattern = "[[:punct:]]", replacement = "")
tab11[ , 4] <- str_replace_all(string = tab11[ , 4], pattern = "[[:punct:]]", replacement = "")
tab11[ , 5] <- str_replace_all(string = tab11[ , 5], pattern = "[[:punct:]]", replacement = "")
tab11[ , 6] <- str_replace_all(string = tab11[ , 6], pattern = "[[:punct:]]", replacement = "")
tab11 <- paste(tab11[ , 1], tab11[ , 2], tab11[ , 3], tab11[ , 4], tab11[ , 5], tab11[ , 6], sep = " ")
x <- str_split_fixed(string = tab11, pattern = " ", n = 14)
labs <- NULL
df <- matrix(data = NA, nrow = nrow(x), ncol = 7)
for(i in 1:(nrow(x))) {
words <- x[i, ][str_detect(string = x[i, ], pattern = "^[a-zA-Z]+$")]
phrase <- ""
for(j in 1:length(words)){
phrase <- paste(phrase, words[j], sep = " ")
}
phrase <- str_remove(string = phrase, pattern = " ")
labs <- c(labs, phrase)
numbers <- x[i, ][str_detect(string = x[i, ],
pattern = "^(\\d{1}|\\d{2}|\\d{3}|\\d{4}|\\d{5}|\\d{6}|\\d{7}|\\d{8}|\\d{9}|\\d{10})$")]
numbers <- as.numeric(numbers)
df[i, ] <- numbers
}
tab11 <- cbind(df[ , 1], labs, df[ , 2:7])
tab11 <- tab11[c(3:4, 7:8, 11:15, 18:26), ]
departmentCode <- c(rep(702, 2), rep(704, 2), rep(709, 5), rep(712, 9))
department <- c(rep(labs[1], 2), rep(labs[5], 2), rep(labs[9], 5), rep(labs[16], 9))
categoryCode <- c(rep(26, 2), rep(26, 2), rep(26, 5), rep(26, 9))
category <- c(rep(labs[2], 2), rep(labs[6], 2), rep(labs[10], 5), rep(labs[17], 9))
tab11 <- data.frame(departmentCode, department, categoryCode, category, tab11)
tab11$department <- str_to_title(string = tab11$department)
tab11$category <- str_to_title(string = tab11$category)
names(tab11) <- c("departmentCode", "department",
"categoryCode", "category",
"itemCode", "item",
"actual_2016_2017",
"budget_2017_2018", "outturn_2017_2018",
"budget_2018_2019", "projection_2019_2020",
"projection_2020_2021")
tab11[ , 5] <- as.numeric(as.character(tab11[ , 5]))
tab11[ , 6] <- as.character(tab11[ , 6])
tab11[ , 7] <- as.numeric(as.character(tab11[ , 7]))
tab11[ , 8] <- as.numeric(as.character(tab11[ , 8]))
tab11[ , 9] <- as.numeric(as.character(tab11[ , 9]))
tab11[ , 10] <- as.numeric(as.character(tab11[ , 10]))
tab11[ , 11] <- as.numeric(as.character(tab11[ , 11]))
tab11[ , 12] <- as.numeric(as.character(tab11[ , 12]))
################################################################################
mohHealthCountyAdd2018 <- tab11
usethis::use_data(mohHealthCountyAdd2018, overwrite = TRUE)
################################################################################
tab12 <- health2[[13]][18:20, ]
tab12[ , 1] <- str_replace_all(string = tab12[ , 1], pattern = " - ", replacement = " ")
tab12[ , 1] <- str_replace_all(string = tab12[ , 1], pattern = "-", replacement = " ")
tab12[ , 1] <- str_replace_all(string = tab12[ , 1], pattern = "–", replacement = " ")
tab12[ , 1] <- str_replace_all(string = tab12[ , 1], pattern = "[[:punct:]]", replacement = "")
tab12[ , 2] <- str_replace_all(string = tab12[ , 2], pattern = "[[:punct:]]", replacement = "")
tab12[ , 3] <- str_replace_all(string = tab12[ , 3], pattern = "[[:punct:]]", replacement = "")
tab12[ , 4] <- str_replace_all(string = tab12[ , 4], pattern = "[[:punct:]]", replacement = "")
tab12[ , 5] <- str_replace_all(string = tab12[ , 5], pattern = "[[:punct:]]", replacement = "")
tab12[ , 6] <- str_replace_all(string = tab12[ , 6], pattern = "[[:punct:]]", replacement = "")
tab12 <- paste(tab12[ , 1], tab12[ , 2], tab12[ , 3], tab12[ , 4], tab12[ , 5], tab12[ , 6], sep = " ")
x <- str_split_fixed(string = tab12, pattern = " ", n = 12)
labs <- NULL
df <- matrix(data = NA, nrow = nrow(x), ncol = 7)
for(i in 1:(nrow(x))) {
words <- x[i, ][str_detect(string = x[i, ], pattern = "^[a-zA-Z]+$")]
phrase <- ""
for(j in 1:length(words)){
phrase <- paste(phrase, words[j], sep = " ")
}
phrase <- str_remove(string = phrase, pattern = " ")
labs <- c(labs, phrase)
numbers <- x[i, ][str_detect(string = x[i, ],
pattern = "^(\\d{1}|\\d{2}|\\d{3}|\\d{4}|\\d{5}|\\d{6}|\\d{7}|\\d{8}|\\d{9}|\\d{10})$")]
numbers <- as.numeric(numbers)
df[i, ] <- numbers
}
tab12 <- cbind(df[ , 1], labs, df[ , 2:7])
tab12 <- tab12[3, ]
departmentCode <- 5500
department <- "General Claims"
categoryCode <- 26
category <- "Grants"
tab12 <- data.frame(rbind(c(departmentCode, department, categoryCode, category, tab12)))
#tab12$department <- str_to_title(string = tab12$department)
#tab12$category <- str_to_title(string = tab12$category)
names(tab12) <- c("departmentCode", "department",
"categoryCode", "category",
"itemCode", "item",
"actual_2016_2017",
"budget_2017_2018", "outturn_2017_2018",
"budget_2018_2019", "projection_2019_2020",
"projection_2020_2021")
tab12[ , 5] <- as.numeric(as.character(tab12[ , 5]))
tab12[ , 6] <- as.character(tab12[ , 6])
tab12[ , 7] <- as.numeric(as.character(tab12[ , 7]))
tab12[ , 8] <- as.numeric(as.character(tab12[ , 8]))
tab12[ , 9] <- as.numeric(as.character(tab12[ , 9]))
tab12[ , 10] <- as.numeric(as.character(tab12[ , 10]))
tab12[ , 11]
numbers <- x[i, ][str_detect(string = x[i, ],
pattern = "^(\\d{1}|\\d{2}|\\d{3}|\\d{4}|\\d{5}|\\d{6}|\\d{7}|\\d{8}|\\d{9}|\\d{10})$")]
numbers <- as.numeric(numbers)
df[i, ] <- numbers
}
tab11 <- cbind(df[ , 1], labs, df[ , 2:7])
tab11 <- tab11[c(3:4, 7:8, 11:15, 18:26), ]
departmentCode <- c(rep(702, 2), rep(704, 2), rep(709, 5), rep(712, 9))
department <- c(rep(labs[1], 2), rep(labs[5], 2), rep(labs[9], 5), rep(labs[16], 9))
categoryCode <- c(rep(26, 2), rep(26, 2), rep(26, 5), rep(26, 9))
category <- c(rep(labs[2], 2), rep(labs[6], 2), rep(labs[10], 5), rep(labs[17], 9))
tab11 <- data.frame(departmentCode, department, categoryCode, category, tab11)
tab11$department <- str_to_title(string = tab11$department)
tab11$category <- str_to_title(string = tab11$category)
names(tab11) <- c("departmentCode", "department",
"categoryCode", "category",
"itemCode", "item",
"actual_2016_2017",
"budget_2017_2018", "outturn_2017_2018",
"budget_2018_2019", "projection_2019_2020",
"projection_2020_2021")
tab11[ , 5] <- as.numeric(as.character(tab11[ , 5]))
tab11[ , 6] <- as.character(tab11[ , 6])
tab11[ , 7] <- as.numeric(as.character(tab11[ , 7]))
tab11[ , 8] <- as.numeric(as.character(tab11[ , 8]))
tab11[ , 9] <- as.numeric(as.character(tab11[ , 9]))
tab11[ , 10] <- as.numeric(as.character(tab11[ , 10]))
tab11[ , 11] <- as.numeric(as.character(tab11[ , 11]))
tab11[ , 12] <- as.numeric(as.character(tab11[ , 12]))
################################################################################
mohHealthCountyAdd2018 <- tab11
usethis::use_data(mohHealthCountyAdd2018, overwrite = TRUE)
################################################################################
tab12 <- health2[[13]][18:20, ]
tab12[ , 1] <- str_replace_all(string = tab12[ , 1], pattern = " - ", replacement = " ")
tab12[ , 1] <- str_replace_all(string = tab12[ , 1], pattern = "-", replacement = " ")
tab12[ , 1] <- str_replace_all(string = tab12[ , 1], pattern = "–", replacement = " ")
tab12[ , 1] <- str_replace_all(string = tab12[ , 1], pattern = "[[:punct:]]", replacement = "")
tab12[ , 2] <- str_replace_all(string = tab12[ , 2], pattern = "[[:punct:]]", replacement = "")
tab12[ , 3] <- str_replace_all(string = tab12[ , 3], pattern = "[[:punct:]]", replacement = "")
tab12[ , 4] <- str_replace_all(string = tab12[ , 4], pattern = "[[:punct:]]", replacement = "")
tab12[ , 5] <- str_replace_all(string = tab12[ , 5], pattern = "[[:punct:]]", replacement = "")
tab12[ , 6] <- str_replace_all(string = tab12[ , 6], pattern = "[[:punct:]]", replacement = "")
tab12 <- paste(tab12[ , 1], tab12[ , 2], tab12[ , 3], tab12[ , 4], tab12[ , 5], tab12[ , 6], sep = " ")
x <- str_split_fixed(string = tab12, pattern = " ", n = 12)
labs <- NULL
df <- matrix(data = NA, nrow = nrow(x), ncol = 7)
for(i in 1:(nrow(x))) {
words <- x[i, ][str_detect(string = x[i, ], pattern = "^[a-zA-Z]+$")]
phrase <- ""
for(j in 1:length(words)){
phrase <- paste(phrase, words[j], sep = " ")
}
phrase <- str_remove(string = phrase, pattern = " ")
labs <- c(labs, phrase)
numbers <- x[i, ][str_detect(string = x[i, ],
pattern = "^(\\d{1}|\\d{2}|\\d{3}|\\d{4}|\\d{5}|\\d{6}|\\d{7}|\\d{8}|\\d{9}|\\d{10})$")]
numbers <- as.numeric(numbers)
df[i, ] <- numbers
}
tab12 <- cbind(df[ , 1], labs, df[ , 2:7])
tab12 <- tab12[3, ]
departmentCode <- 5500
department <- "General Claims"
categoryCode <- 26
category <- "Grants"
tab12 <- data.frame(rbind(c(departmentCode, department, categoryCode, category, tab12)))
#tab12$department <- str_to_title(string = tab12$department)
#tab12$category <- str_to_title(string = tab12$category)
names(tab12) <- c("departmentCode", "department",
"categoryCode", "category",
"itemCode", "item",
"actual_2016_2017",
"budget_2017_2018", "outturn_2017_2018",
"budget_2018_2019", "projection_2019_2020",
"projection_2020_2021")
tab12[ , 5] <- as.numeric(as.character(tab12[ , 5]))
tab12[ , 6] <- as.character(tab12[ , 6])
tab12[ , 7] <- as.numeric(as.character(tab12[ , 7]))
tab12[ , 8] <- as.numeric(as.character(tab12[ , 8]))
tab12[ , 9] <- as.numeric(as.character(tab12[ , 9]))
tab12[ , 10] <- as.numeric(as.character(tab12[ , 10]))
tab12[ , 11] <- as.numeric(as.character(tab12[ , 11]))
tab12[ , 12] <- as.numeric(as.character(tab12[ , 12]))
################################################################################
mohHealthClaims2018 <- tab12
usethis::use_data(mohHealthClaims2018, overwrite = TRUE)
################################################################################
mohHealthDepartment2018 <- data.frame(rbind(tab6, tab7, tab8, tab9, tab10, tab12))
usethis::use_data(mohHealthDepartment2018, overwrite = TRUE)
################################################################################
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.