data-raw/processData2018.R

################################################################################
#
# Load required libraries
#
################################################################################

library(pdftools)
library(tm)
library(tabulizer)
library(stringr)
library(tidyverse)
library(tidytext)

options(scipen = 999)


################################################################################
#
# Function to process tables
#
################################################################################

get_table <- function(tab, nrow = NULL, ncol = NULL) {
  labs <- NULL
  df <- matrix(data = NA, nrow = nrow, ncol = ncol)

  for(i in 1:(length(tab))) {
    words <- tab[[i]][str_detect(string = tab[[i]], pattern = "^[a-zA-Z]+$")]
    phrase <- ""

    for(j in 1:length(words)){
      phrase <- paste(phrase, words[j], sep = " ")
    }

    phrase <- str_remove(string = phrase, pattern = " ")
    labs <- c(labs, phrase)
    numbers <- tab[[i]][str_detect(string = tab[[i]],
                                   pattern = "^(\\d{6}|\\d{7}|\\d{8})$")]
    numbers <- as.numeric(numbers)
    df[i, ] <- numbers
  }
}



################################################################################
#
# Create list for information and tables in page 253 (2018)
#
################################################################################

## Extract tables from page 253
health1 <- extract_tables(file = "data-raw/budget/2018 Ministry of Finance and Development Planning.pdf",
                          pages = 253,
                          method = "decide")

#goal <- health1[[1]][3, ]
#strategicObjective <- str_c(health1[[1]][5, ], health1[[1]][6, ], health1[[1]][7, ], sep = " ")

## Extract first table in page 253
tab1 <- health1[[1]][27:31, ]
tab1 <- paste(tab1[ , 1], tab1[ , 2], sep = " ")

tab1 <- str_replace_all(string = tab1,  pattern = "[[:punct:]]", replace = "")
tab1 <- str_split_fixed(string = tab1, pattern = " ", n = 12)

## Extract relevant fields to re-create first table in page 255 and create data.frame
economic_classification <- NULL
df <- matrix(data = NA, nrow = 5, ncol = 7)

for(i in 1:(nrow(tab1))) {
  words <- tab1[i, ][str_detect(string = tab1[i, ], pattern = "^[a-zA-Z]+$")]
  phrase <- ""

  for(j in 1:length(words)){
    phrase <- paste(phrase, words[j], sep = " ")
  }

  phrase <- str_remove(string = phrase, pattern = " ")
  economic_classification <- c(economic_classification, phrase)
  numbers <- tab1[i, ][str_detect(string = tab1[i,],
                                  pattern = "^(\\d{1}|\\d{2}|\\d{3}|\\d{4}|\\d{5}|\\d{6}|\\d{7}|\\d{8}|\\d{9}|\\d{10})$")]
  numbers <- as.numeric(numbers)
  df[i, ] <- numbers
}

df <- data.frame(df[ , 1], economic_classification, df[ , 2:7])
names(df) <- c("code", "economic_classification",
               "actual_2016_2017",
               "budget_2017_2018", "outturn_2017_2018",
               "budget_2018_2019", "projection_2019_2020",
               "projection_2020_2021")

df$economic_classification <- str_to_sentence(df$economic_classification)

summaryEconHealth2018 <- df

## Extract second table in page 255
tab2 <- health1[[1]][37:46, ]
tab2 <- paste(tab2[ , 1], tab2[ , 2], sep = " ")
tab2 <- str_replace_all(string = tab2,  pattern = "[[:punct:]]", replace = "")

tab2 <- str_split_fixed(string = tab2, pattern = " ", n = 12)

## Extract relevant fields to re-create first table in page 255 and create data.frame
spending_entity <- NULL
df <- matrix(data = NA, nrow = 10, ncol = 7)

for(i in 1:nrow(tab2)) {
  words <- tab2[i, ][str_detect(string = tab2[i, ], pattern = "^[a-zA-Z]+$")]
  phrase <- ""

  for(j in 1:length(words)){
    phrase <- paste(phrase, words[j], sep = " ")
  }

  phrase <- str_remove(string = phrase, pattern = " ")
  spending_entity <- c(spending_entity, phrase)
  numbers <- tab2[i, ][str_detect(string = tab2[i, ],
                                  pattern = "^(\\d{1}|\\d{2}|\\d{3}|\\d{4}|\\d{5}|\\d{6}|\\d{7}|\\d{8}|\\d{9}|\\d{10})$")]
  numbers <- as.numeric(numbers)
  df[i, ] <- numbers
}

df <- data.frame(df[ , 1], spending_entity, df[ , 2:7])
names(df) <- c("code", "spending_entity",
               "actual_2016_2017",
               "budget_2017_2018", "outturn_2017_2018",
               "budget_2018_2019", "projection_2019_2020",
               "projection_2020_2021")

df$spending_entity <- str_to_title(df$spending_entity)

summarySpendingHealth2018 <- df

summaryHealth2018 <- list(summaryEconHealth2018, summarySpendingHealth2018)

names(summaryHealth2018) <- c("summaryEconHealth2018", "summarySpendingHealth2018")

usethis::use_data(summaryHealth2018, overwrite = TRUE)

## Tidy-up
rm(words, phrase, numbers, economic_classification, tab1, tab2, df,
   summaryEconHealth2018, summarySpendingHealth2018, health1)

################################################################################
#
#
#
################################################################################

## Extract tables from page 25-266
health2 <- extract_tables(file = "data-raw/budget/2018 Ministry of Finance and Development Planning.pdf",
                          pages = 254:266,
                          method = "decide")

tab1 <- health2[[1]][14:48, ]

tab1[ , 1] <- str_replace_all(string = tab1[ , 1], pattern = " – ", replacement = " ")
tab1[ , 1] <- str_replace_all(string = tab1[ , 1], pattern = "–", replacement = " ")
tab1[ , 1] <- str_replace_all(string = tab1[ , 1], pattern = "-", replacement = " ")
tab1[ , 1] <- str_replace_all(string = tab1[ , 1], pattern = "[[:punct:]]", replacement = "")
tab1[ , 2] <- str_replace_all(string = tab1[ , 2], pattern = "[[:punct:]]", replacement = "")
tab1[ , 3] <- str_replace_all(string = tab1[ , 3], pattern = "[[:punct:]]", replacement = "")
tab1[ , 4] <- str_replace_all(string = tab1[ , 4], pattern = "[[:punct:]]", replacement = "")
tab1[ , 5] <- str_replace_all(string = tab1[ , 5], pattern = "[[:punct:]]", replacement = "")
tab1[ , 6] <- str_replace_all(string = tab1[ , 6], pattern = "[[:punct:]]", replacement = "")

tab1[7, 1] <- paste(tab1[7, 1], tab1[8, 1], sep = " ")
tab1[11, 1] <- paste(tab1[11, 1], tab1[12, 1], sep = " ")

tab1 <- tab1[c(1:7, 9:11, 13:35), ]

y <- str_split_fixed(string = tab1[ , 6], pattern = " ", n = 2)

tab1 <- cbind(tab1[ , 1:5], y)

x <- str_split_fixed(string = tab1[ , 1], pattern = " ", n = 8)

labs <- NULL
df <- matrix(data = NA, nrow = nrow(x), ncol = 2)

for(i in 1:(nrow(x))) {
  words <- x[i, ][str_detect(string = x[i, ], pattern = "^[a-zA-Z]+$")]
  phrase <- ""


  for(j in 1:length(words)){
    phrase <- paste(phrase, words[j], sep = " ")
  }

  phrase <- str_remove(string = phrase, pattern = " ")
  labs <- c(labs, phrase)
  numbers <- x[i, ][str_detect(string = x[i, ],
                               pattern = "^(\\d{1}|\\d{2}|\\d{3}|\\d{4}|\\d{5}|\\d{6}|\\d{7}|\\d{8}|\\d{9}|\\d{10})$")]
  numbers <- as.numeric(numbers)
  df[i, ] <- numbers
}

tab1 <- data.frame(df[ , 1], labs, df[ , 2], tab1[ , 3:7])
tab1 <- tab1[c(2:4, 6:26, 28:33), ]

categoryCode <- c(rep(21, 3), rep(22, 21), rep(25, 6))
category <- c(rep(labs[1], 3), rep(labs[5], 21), rep(labs[27], 6))

tab1 <- data.frame(categoryCode, category, tab1)

tab1$category <- str_to_title(tab1$category)

names(tab1) <- c("categoryCode", "category",
                 "itemCode", "item",
                 "actual_2016_2017",
                 "budget_2017_2018", "outturn_2017_2018",
                 "budget_2018_2019", "projection_2019_2020",
                 "projection_2020_2021")

tab1[ , 4] <- as.character(tab1[ , 4])
tab1[ , 5] <- as.numeric(as.character(tab1[ , 5]))
tab1[ , 6] <- as.numeric(as.character(tab1[ , 6]))
tab1[ , 7] <- as.numeric(as.character(tab1[ , 7]))
tab1[ , 8] <- as.numeric(as.character(tab1[ , 8]))
tab1[ , 9] <- as.numeric(as.character(tab1[ , 9]))
tab1[ , 10] <- as.numeric(as.character(tab1[ , 10]))


################################################################################

tab2 <- health2[[2]][6:48, ]

tab2[ , 1] <- str_replace_all(string = tab2[ , 1], pattern = " – ", replacement = " ")
tab2[ , 1] <- str_replace_all(string = tab2[ , 1], pattern = "–", replacement = " ")
tab2[ , 1] <- str_replace_all(string = tab2[ , 1], pattern = "-", replacement = " ")
tab2[ , 1] <- str_replace_all(string = tab2[ , 1], pattern = "[[:punct:]]", replacement = "")
tab2[ , 2] <- str_replace_all(string = tab2[ , 2], pattern = "[[:punct:]]", replacement = "")
tab2[ , 3] <- str_replace_all(string = tab2[ , 3], pattern = "[[:punct:]]", replacement = "")
tab2[ , 4] <- str_replace_all(string = tab2[ , 4], pattern = "[[:punct:]]", replacement = "")
tab2[ , 5] <- str_replace_all(string = tab2[ , 5], pattern = "[[:punct:]]", replacement = "")
tab2[ , 6] <- str_replace_all(string = tab2[ , 6], pattern = "[[:punct:]]", replacement = "")

tab2[9, 1] <- paste(tab2[9, 1], tab2[10, 1], sep = " ")
tab2[15, 1] <- paste(tab2[15, 1], tab2[16, 1], sep = " ")
tab2[31, 1] <- paste(tab2[31, 1], tab2[32, 1], sep = " ")
tab2[36, 1] <- paste(tab2[36, 1], tab2[37, 1], sep = " ")

tab2 <- tab2[c(1:9, 11:15, 17:31, 33:36, 38:43), ]

y <- str_split_fixed(string = tab2[ , 6], pattern = " ", n = 2)

tab2 <- cbind(tab2[ , 1:5], y)

x <- str_split_fixed(string = tab2[ , 1], pattern = " ", n = 8)

labs <- NULL
df <- matrix(data = NA, nrow = nrow(x), ncol = 2)

for(i in 1:(nrow(x))) {
  words <- x[i, ][str_detect(string = x[i, ], pattern = "^[a-zA-Z]+$")]
  phrase <- ""


  for(j in 1:length(words)){
    phrase <- paste(phrase, words[j], sep = " ")
  }

  phrase <- str_remove(string = phrase, pattern = " ")
  labs <- c(labs, phrase)
  numbers <- x[i, ][str_detect(string = x[i, ],
                               pattern = "^(\\d{1}|\\d{2}|\\d{3}|\\d{4}|\\d{5}|\\d{6}|\\d{7}|\\d{8}|\\d{9}|\\d{10})$")]
  numbers <- as.numeric(numbers)
  df[i, ] <- numbers
}

tab2 <- data.frame(df[ , 1], labs, df[ , 2], tab2[ , 3:7])
tab2 <- tab2[c(1:7, 9:39), ]

categoryCode <- c(rep(25, 7), rep(26, 31))
category <- c(rep("Subsidy", 7), rep(labs[8], 31))

tab2 <- data.frame(categoryCode, category, tab2)

tab2$category <- str_to_title(tab2$category)

names(tab2) <- c("categoryCode", "category",
                 "itemCode", "item",
                 "actual_2016_2017",
                 "budget_2017_2018", "outturn_2017_2018",
                 "budget_2018_2019", "projection_2019_2020",
                 "projection_2020_2021")

tab2[ , 4] <- as.character(tab2[ , 4])
tab2[ , 5] <- as.numeric(as.character(tab2[ , 5]))
tab2[ , 6] <- as.numeric(as.character(tab2[ , 6]))
tab2[ , 7] <- as.numeric(as.character(tab2[ , 7]))
tab2[ , 8] <- as.numeric(as.character(tab2[ , 8]))
tab2[ , 9] <- as.numeric(as.character(tab2[ , 9]))
tab2[ , 10] <- as.numeric(as.character(tab2[ , 10]))

################################################################################

tab3 <- health2[[3]][6:46, ]

tab3[ , 1] <- str_replace_all(string = tab3[ , 1], pattern = " – ", replacement = " ")
tab3[ , 1] <- str_replace_all(string = tab3[ , 1], pattern = "–", replacement = " ")
tab3[ , 1] <- str_replace_all(string = tab3[ , 1], pattern = "-", replacement = " ")
tab3[ , 1] <- str_replace_all(string = tab3[ , 1], pattern = "[[:punct:]]", replacement = "")
tab3[ , 2] <- str_replace_all(string = tab3[ , 2], pattern = "[[:punct:]]", replacement = "")
tab3[ , 3] <- str_replace_all(string = tab3[ , 3], pattern = "[[:punct:]]", replacement = "")
tab3[ , 4] <- str_replace_all(string = tab3[ , 4], pattern = "[[:punct:]]", replacement = "")
tab3[ , 5] <- str_replace_all(string = tab3[ , 5], pattern = "[[:punct:]]", replacement = "")
tab3[ , 6] <- str_replace_all(string = tab3[ , 6], pattern = "[[:punct:]]", replacement = "")

tab3[1, 1] <- paste(tab3[1, 1], tab3[2, 1], sep = " ")
tab3[14, 1] <- paste(tab3[14, 1], tab3[15, 1], sep = " ")
tab3[23, 1] <- paste(tab3[23, 1], tab3[24, 1], sep = " ")
tab3[41, 1] <- paste(tab3[41, 1], "County", sep = " ")

tab3 <- tab3[c(1, 3:14, 16:23, 25:41), ]

y <- str_split_fixed(string = tab3[ , 6], pattern = " ", n = 2)

tab3 <- cbind(tab3[ , 1:5], y)

x <- str_split_fixed(string = tab3[ , 1], pattern = " ", n = 8)

labs <- NULL
df <- matrix(data = NA, nrow = nrow(x), ncol = 2)

for(i in 1:(nrow(x))) {
  words <- x[i, ][str_detect(string = x[i, ], pattern = "^[a-zA-Z]+$")]
  phrase <- ""


  for(j in 1:length(words)){
    phrase <- paste(phrase, words[j], sep = " ")
  }

  phrase <- str_remove(string = phrase, pattern = " ")
  labs <- c(labs, phrase)
  numbers <- x[i, ][str_detect(string = x[i, ],
                               pattern = "^(\\d{1}|\\d{2}|\\d{3}|\\d{4}|\\d{5}|\\d{6}|\\d{7}|\\d{8}|\\d{9}|\\d{10})$")]
  numbers <- as.numeric(numbers)
  df[i, ] <- numbers
}

tab3 <- data.frame(df[ , 1], labs, df[ , 2], tab3[ , 3:7])

categoryCode <- rep(26, nrow(tab3))
category <- rep("Grants", nrow(tab3))

tab3 <- data.frame(categoryCode, category, tab3)

tab3$category <- str_to_title(tab3$category)

names(tab3) <- c("categoryCode", "category",
                 "itemCode", "item",
                 "actual_2016_2017",
                 "budget_2017_2018", "outturn_2017_2018",
                 "budget_2018_2019", "projection_2019_2020",
                 "projection_2020_2021")

tab3[ , 4] <- as.character(tab3[ , 4])
tab3[ , 5] <- as.numeric(as.character(tab3[ , 5]))
tab3[ , 6] <- as.numeric(as.character(tab3[ , 6]))
tab3[ , 7] <- as.numeric(as.character(tab3[ , 7]))
tab3[ , 8] <- as.numeric(as.character(tab3[ , 8]))
tab3[ , 9] <- as.numeric(as.character(tab3[ , 9]))
tab3[ , 10] <- as.numeric(as.character(tab3[ , 10]))

################################################################################

tab4 <- health2[[4]][6:24, ]

tab4[1, 1] <- paste(tab4[1, 1], tab4[2, 1], sep = " ")

tab4 <- tab4[c(1, 3:19), ]

tab4[ , 1] <- str_replace_all(string = tab4[ , 1], pattern = " – ", replacement = " ")
tab4[ , 1] <- str_replace_all(string = tab4[ , 1], pattern = "–", replacement = " ")
tab4[ , 1] <- str_replace_all(string = tab4[ , 1], pattern = "-", replacement = " ")
tab4[ , 1] <- str_replace_all(string = tab4[ , 1], pattern = "[[:punct:]]", replacement = "")
tab4[ , 2] <- str_replace_all(string = tab4[ , 2], pattern = "[[:punct:]]", replacement = "")
tab4[ , 3] <- str_replace_all(string = tab4[ , 3], pattern = "[[:punct:]]", replacement = "")
tab4[ , 4] <- str_replace_all(string = tab4[ , 4], pattern = "[[:punct:]]", replacement = "")
tab4[ , 5] <- str_replace_all(string = tab4[ , 5], pattern = "[[:punct:]]", replacement = "")
tab4[ , 6] <- str_replace_all(string = tab4[ , 6], pattern = "[[:punct:]]", replacement = "")

y <- str_split_fixed(string = tab4[ , 6], pattern = " ", n = 2)

tab4 <- cbind(tab4[ , 1:5], y)

x <- str_split_fixed(string = tab4[ , 1], pattern = " ", n = 8)

labs <- NULL
df <- matrix(data = NA, nrow = nrow(x), ncol = 2)

for(i in 1:(nrow(x))) {
  words <- x[i, ][str_detect(string = x[i, ], pattern = "^[a-zA-Z]+$")]
  phrase <- ""


  for(j in 1:length(words)){
    phrase <- paste(phrase, words[j], sep = " ")
  }

  phrase <- str_remove(string = phrase, pattern = " ")
  labs <- c(labs, phrase)
  numbers <- x[i, ][str_detect(string = x[i, ],
                               pattern = "^(\\d{1}|\\d{2}|\\d{3}|\\d{4}|\\d{5}|\\d{6}|\\d{7}|\\d{8}|\\d{9}|\\d{10})$")]
  numbers <- as.numeric(numbers)
  df[i, ] <- numbers
}

tab4 <- data.frame(df[ , 1], labs, df[ , 2], tab4[ , 3:7])
tab4 <- tab4[c(1:16, 18), ]

categoryCode <- c(rep(26, 16), 31)
category <- c(rep("Grants", 16), labs[17])

tab4 <- data.frame(categoryCode, category, tab4)

tab4$category <- str_to_title(tab4$category)

names(tab4) <- c("categoryCode", "category",
                 "itemCode", "item",
                 "actual_2016_2017",
                 "budget_2017_2018", "outturn_2017_2018",
                 "budget_2018_2019", "projection_2019_2020",
                 "projection_2020_2021")

tab4[ , 4] <- as.character(tab4[ , 4])
tab4[ , 5] <- as.numeric(as.character(tab4[ , 5]))
tab4[ , 6] <- as.numeric(as.character(tab4[ , 6]))
tab4[ , 7] <- as.numeric(as.character(tab4[ , 7]))
tab4[ , 8] <- as.numeric(as.character(tab4[ , 8]))
tab4[ , 9] <- as.numeric(as.character(tab4[ , 9]))
tab4[ , 10] <- as.numeric(as.character(tab4[ , 10]))

################################################################################

mohHealthEcon2018 <- data.frame(rbind(tab1, tab2, tab3, tab4))
usethis::use_data(mohHealthEcon2018, overwrite = TRUE)

################################################################################

tab5 <- health2[[4]][30:44, ]

tab5[ , 1] <- str_replace_all(string = tab5[ , 1], pattern = "-", replacement = " ")
tab5[ , 1] <- str_replace_all(string = tab5[ , 1], pattern = "[[:punct:]]", replacement = "")
tab5[ , 2] <- str_replace_all(string = tab5[ , 2], pattern = "[[:punct:]]", replacement = "")
tab5[ , 3] <- str_replace_all(string = tab5[ , 3], pattern = "[[:punct:]]", replacement = "")
tab5[ , 4] <- str_replace_all(string = tab5[ , 4], pattern = "[[:punct:]]", replacement = "")
tab5[ , 5] <- str_replace_all(string = tab5[ , 5], pattern = "[[:punct:]]", replacement = "")
tab5[ , 6] <- str_replace_all(string = tab5[ , 6], pattern = "[[:punct:]]", replacement = "")

x <- str_split_fixed(string = tab5[ , 1], pattern = " ", n = 5)
y <- str_split_fixed(string = tab5[ , 6], pattern = " ", n = 2)

tab5 <- cbind(tab5[ , 1:5], y)

labs <- NULL
df <- matrix(data = NA, nrow = nrow(x), ncol = 2)

for(i in 1:(nrow(x))) {
  words <- x[i, ][str_detect(string = x[i, ], pattern = "^[a-zA-Z]+$")]
  phrase <- ""


  for(j in 1:length(words)){
    phrase <- paste(phrase, words[j], sep = " ")
  }

  phrase <- str_remove(string = phrase, pattern = " ")
  labs <- c(labs, phrase)
  numbers <- x[i, ][str_detect(string = x[i, ],
                               pattern = "^(\\d{1}|\\d{2}|\\d{3}|\\d{4}|\\d{5}|\\d{6}|\\d{7}|\\d{8}|\\d{9}|\\d{10})$")]
  numbers <- as.numeric(numbers)
  df[i, ] <- numbers
}

tab5 <- data.frame(df[ , 1], labs, df[ , 2], tab5[ , 3:7])

tab5$labs <- str_to_title(tab5$labs)

names(tab5) <- c("countyCode", "county",
                 "actual_2016_2017",
                 "budget_2017_2018", "outturn_2017_2018",
                 "budget_2018_2019", "projection_2019_2020",
                 "projection_2020_2021")

tab5[ , 4] <- as.numeric(as.character(tab5[ , 4]))
tab5[ , 5] <- as.numeric(as.character(tab5[ , 5]))
tab5[ , 6] <- as.numeric(as.character(tab5[ , 6]))
tab5[ , 7] <- as.numeric(as.character(tab5[ , 7]))
tab5[ , 8] <- as.numeric(as.character(tab5[ , 8]))

################################################################################

mohHealthCounty2018 <- tab5
usethis::use_data(mohHealthCounty2018, overwrite = TRUE)

################################################################################

tab6 <- health2[[5]][14:48, ]

tab6[ , 1] <- str_replace_all(string = tab6[ , 1], pattern = " - ", replacement = " ")
tab6[ , 1] <- str_replace_all(string = tab6[ , 1], pattern = "-", replacement = " ")
tab6[ , 1] <- str_replace_all(string = tab6[ , 1], pattern = "[[:punct:]]", replacement = "")
tab6[ , 2] <- str_replace_all(string = tab6[ , 2], pattern = "[[:punct:]]", replacement = "")
tab6[ , 3] <- str_replace_all(string = tab6[ , 3], pattern = "[[:punct:]]", replacement = "")
tab6[ , 4] <- str_replace_all(string = tab6[ , 4], pattern = "[[:punct:]]", replacement = "")

tab6[7, 1] <- paste(tab6[7, 1], tab6[8, 1], sep = " ")
tab6[9, 1] <- paste(tab6[9, 1], tab6[10, 1], sep = " ")
tab6[11, 1] <- paste(tab6[11, 1], tab6[12, 1], sep = " ")
tab6[16, 1] <- paste(tab6[16, 1], tab6[17, 1], sep = " ")
tab6[23, 1] <- paste(tab6[23, 1], tab6[24, 1], sep = " ")
tab6[28, 1] <- paste(tab6[28, 1], tab6[29, 1], sep = " ")
tab6[31, 1] <- paste(tab6[31, 1], tab6[32, 1], sep = " ")

tab6 <- tab6[c(1:7, 9, 11, 13:16, 18:23, 25:28, 30:31, 33:35), ]

tab6 <- paste(tab6[ , 1], tab6[ , 2], tab6[ , 3], tab6[ , 4], sep = " ")

x <- str_split_fixed(string = tab6, pattern = " ", n = 13)

labs <- NULL
df <- matrix(data = NA, nrow = nrow(x), ncol = 7)

for(i in 1:(nrow(x))) {
  words <- x[i, ][str_detect(string = x[i, ], pattern = "^[a-zA-Z]+$")]
  phrase <- ""


  for(j in 1:length(words)){
    phrase <- paste(phrase, words[j], sep = " ")
  }

  phrase <- str_remove(string = phrase, pattern = " ")
  labs <- c(labs, phrase)
  numbers <- x[i, ][str_detect(string = x[i, ],
                               pattern = "^(\\d{1}|\\d{2}|\\d{3}|\\d{4}|\\d{5}|\\d{6}|\\d{7}|\\d{8}|\\d{9}|\\d{10})$")]
  numbers <- as.numeric(numbers)
  df[i, ] <- numbers
}

tab6a <- cbind(df[ , 1], labs, df[ , 2:7])

tab6 <- health2[[6]][5:50, ]

tab6[ , 1] <- str_replace_all(string = tab6[ , 1], pattern = " - ", replacement = " ")
tab6[ , 1] <- str_replace_all(string = tab6[ , 1], pattern = "-", replacement = " ")
tab6[ , 1] <- str_replace_all(string = tab6[ , 1], pattern = "[[:punct:]]", replacement = "")
tab6[ , 2] <- str_replace_all(string = tab6[ , 2], pattern = "[[:punct:]]", replacement = "")
tab6[ , 3] <- str_replace_all(string = tab6[ , 3], pattern = "[[:punct:]]", replacement = "")
tab6[ , 4] <- str_replace_all(string = tab6[ , 4], pattern = "[[:punct:]]", replacement = "")
tab6[ , 5] <- str_replace_all(string = tab6[ , 5], pattern = "[[:punct:]]", replacement = "")
tab6[ , 6] <- str_replace_all(string = tab6[ , 6], pattern = "[[:punct:]]", replacement = "")

tab6[4, 1] <- paste(tab6[4, 1], tab6[5, 1], sep = " ")
tab6[11, 1] <- paste(tab6[11, 1], tab6[12, 1], sep = " ")
tab6[14, 1] <- paste(tab6[14, 1], tab6[15, 1], sep = " ")
tab6[19, 1] <- paste(tab6[19, 1], tab6[20, 1], sep = " ")
tab6[22, 1] <- paste(tab6[22, 1], tab6[23, 1], sep = " ")
tab6[24, 1] <- paste(tab6[24, 1], tab6[25, 1], sep = " ")
tab6[27, 1] <- paste(tab6[27, 1], tab6[28, 1], sep = " ")
tab6[33, 1] <- paste(tab6[33, 1], tab6[34, 1], sep = " ")
tab6[35, 1] <- paste(tab6[35, 1], tab6[36, 1], sep = " ")
tab6[37, 1] <- paste(tab6[37, 1], tab6[38, 1], sep = " ")
tab6[40, 1] <- paste(tab6[40, 1], tab6[41, 1], sep = " ")
tab6[44, 1] <- paste(tab6[44, 1], tab6[45, 1], sep = " ")

tab6 <- tab6[c(1:4, 6:11, 13:14, 16:19, 21:22, 24, 26:27, 29:33, 35, 37, 39:40, 42:44, 46), ]

tab6 <- paste(tab6[ , 1], tab6[ , 2], tab6[ , 3], tab6[ , 4], tab6[ , 5], tab6[ , 6], sep = " ")

x <- str_split_fixed(string = tab6, pattern = " ", n = 13)

labs <- NULL
df <- matrix(data = NA, nrow = nrow(x), ncol = 7)

for(i in 1:(nrow(x))) {
  words <- x[i, ][str_detect(string = x[i, ], pattern = "^[a-zA-Z]+$")]
  phrase <- ""


  for(j in 1:length(words)){
    phrase <- paste(phrase, words[j], sep = " ")
  }

  phrase <- str_remove(string = phrase, pattern = " ")
  labs <- c(labs, phrase)
  numbers <- x[i, ][str_detect(string = x[i, ],
                               pattern = "^(\\d{1}|\\d{2}|\\d{3}|\\d{4}|\\d{5}|\\d{6}|\\d{7}|\\d{8}|\\d{9}|\\d{10})$")]
  numbers <- as.numeric(numbers)
  df[i, ] <- numbers
}

tab6b <- cbind(df[ , 1], labs, df[ , 2:7])

tab6 <- health2[[7]][5:48, ]

tab6[ , 1] <- str_replace_all(string = tab6[ , 1], pattern = " - ", replacement = " ")
tab6[ , 1] <- str_replace_all(string = tab6[ , 1], pattern = "-", replacement = " ")
tab6[ , 1] <- str_replace_all(string = tab6[ , 1], pattern = "[[:punct:]]", replacement = "")
tab6[ , 2] <- str_replace_all(string = tab6[ , 2], pattern = "[[:punct:]]", replacement = "")
tab6[ , 3] <- str_replace_all(string = tab6[ , 3], pattern = "[[:punct:]]", replacement = "")
tab6[ , 4] <- str_replace_all(string = tab6[ , 4], pattern = "[[:punct:]]", replacement = "")
tab6[ , 5] <- str_replace_all(string = tab6[ , 5], pattern = "[[:punct:]]", replacement = "")
tab6[ , 6] <- str_replace_all(string = tab6[ , 6], pattern = "[[:punct:]]", replacement = "")

tab6[1, 1] <- paste(tab6[1, 1], tab6[2, 1], sep = " ")
tab6[3, 1] <- paste(tab6[3, 1], tab6[4, 1], sep = " ")
tab6[6, 1] <- paste(tab6[6, 1], tab6[7, 1], sep = " ")
tab6[12, 1] <- paste(tab6[12, 1], tab6[13, 1], sep = " ")
tab6[16, 1] <- paste(tab6[16, 1], tab6[17, 1], sep = " ")
tab6[19, 1] <- paste(tab6[19, 1], tab6[20, 1], sep = " ")
tab6[28, 1] <- paste(tab6[28, 1], tab6[29, 1], sep = " ")
tab6[30, 1] <- paste(tab6[30, 1], tab6[31, 1], sep = " ")
tab6[36, 1] <- paste(tab6[36, 1], tab6[37, 1], sep = " ")

tab6 <- tab6[c(1, 3, 5:6, 8:12, 14:16, 18:19, 21:28, 30, 32:36, 38:44), ]

tab6 <- paste(tab6[ , 1], tab6[ , 2], tab6[ , 3], tab6[ , 4], tab6[ , 5], tab6[ , 6], sep = " ")

x <- str_split_fixed(string = tab6, pattern = " ", n = 14)

labs <- NULL
df <- matrix(data = NA, nrow = nrow(x), ncol = 7)

for(i in 1:(nrow(x))) {
  words <- x[i, ][str_detect(string = x[i, ], pattern = "^[a-zA-Z]+$")]
  phrase <- ""


  for(j in 1:length(words)){
    phrase <- paste(phrase, words[j], sep = " ")
  }

  phrase <- str_remove(string = phrase, pattern = " ")
  labs <- c(labs, phrase)
  numbers <- x[i, ][str_detect(string = x[i, ],
                               pattern = "^(\\d{1}|\\d{2}|\\d{3}|\\d{4}|\\d{5}|\\d{6}|\\d{7}|\\d{8}|\\d{9}|\\d{10})$")]
  numbers <- as.numeric(numbers)
  df[i, ] <- numbers
}

tab6c <- cbind(df[ , 1], labs, df[ , 2:7])

tab6 <- rbind(tab6a, tab6b, tab6c)
tab6 <- tab6[c(3:4, 6:16, 18:30, 32:97), ]

departmentCode <- rep(100, nrow(tab6))
department <- rep("Curative Services", nrow(tab6))

categoryCode <- c(rep(21, 2), rep(22, 11), rep(25, 13), rep(26, 66))
category <- c(rep("Compensation Of Employees", 2),
              rep("Use Of Goods And Services", 11),
              rep("Subsidy", 13),
              rep("Grants", 66))

tab6 <- data.frame(departmentCode, department, categoryCode, category, tab6)

tab6$department <- str_to_title(string = tab6$department)
tab6$category <- str_to_title(string = tab6$category)

names(tab6) <- c("departmentCode", "department",
                 "categoryCode", "category",
                 "itemCode", "item",
                 "actual_2016_2017",
                 "budget_2017_2018", "outturn_2017_2018",
                 "budget_2018_2019", "projection_2019_2020",
                 "projection_2020_2021")

tab6[ , 5] <- as.numeric(as.character(tab6[ , 5]))
tab6[ , 6] <- as.character(tab6[ , 6])
tab6[ , 7] <- as.numeric(as.character(tab6[ , 7]))
tab6[ , 8] <- as.numeric(as.character(tab6[ , 8]))
tab6[ , 9] <- as.numeric(as.character(tab6[ , 9]))
tab6[ , 10] <- as.numeric(as.character(tab6[ , 10]))
tab6[ , 11] <- as.numeric(as.character(tab6[ , 11]))
tab6[ , 12] <- as.numeric(as.character(tab6[ , 12]))

################################################################################

mohHealthCurative2018 <- tab6
usethis::use_data(mohHealthCurative2018, overwrite = TRUE)

################################################################################

tab7 <- health2[[8]][12:28, ]

tab7[ , 1] <- str_replace_all(string = tab7[ , 1], pattern = " - ", replacement = " ")
tab7[ , 1] <- str_replace_all(string = tab7[ , 1], pattern = "-", replacement = " ")
tab7[ , 1] <- str_replace_all(string = tab7[ , 1], pattern = "[[:punct:]]", replacement = "")
tab7[ , 2] <- str_replace_all(string = tab7[ , 2], pattern = "[[:punct:]]", replacement = "")
tab7[ , 3] <- str_replace_all(string = tab7[ , 3], pattern = "[[:punct:]]", replacement = "")
tab7[ , 4] <- str_replace_all(string = tab7[ , 4], pattern = "[[:punct:]]", replacement = "")
tab7[ , 5] <- str_replace_all(string = tab7[ , 5], pattern = "[[:punct:]]", replacement = "")
tab7[ , 6] <- str_replace_all(string = tab7[ , 6], pattern = "[[:punct:]]", replacement = "")

tab7[5, 1] <- paste(tab7[5, 1], tab7[6, 1], sep = " ")
tab7[10, 1] <- paste(tab7[10, 1], tab7[11, 1], sep = " ")
tab7[16, 1] <- paste(tab7[16, 1], tab7[17, 1], sep = " ")

tab7 <- tab7[c(1:5, 7:10, 12:16), ]

tab7 <- paste(tab7[ , 1], tab7[ , 2], tab7[ , 3], tab7[ , 4], tab7[ , 5], tab7[ , 6], sep = " ")

x <- str_split_fixed(string = tab7, pattern = " ", n = 13)

labs <- NULL
df <- matrix(data = NA, nrow = nrow(x), ncol = 7)

for(i in 1:(nrow(x))) {
  words <- x[i, ][str_detect(string = x[i, ], pattern = "^[a-zA-Z]+$")]
  phrase <- ""


  for(j in 1:length(words)){
    phrase <- paste(phrase, words[j], sep = " ")
  }

  phrase <- str_remove(string = phrase, pattern = " ")
  labs <- c(labs, phrase)
  numbers <- x[i, ][str_detect(string = x[i, ],
                               pattern = "^(\\d{1}|\\d{2}|\\d{3}|\\d{4}|\\d{5}|\\d{6}|\\d{7}|\\d{8}|\\d{9}|\\d{10})$")]
  #numbers <- as.numeric(numbers)
  df[i, ] <- numbers
}

tab7 <- cbind(df[ , 1], labs, df[ , 2:7])
tab7 <- tab7[c(3, 5:12, 14), ]

departmentCode <- rep(200, nrow(tab7))
department <- rep(labs[1], nrow(tab7))

categoryCode <- c(21, rep(22, 8), 26)
category <- c(labs[2], rep(labs[4], 8), labs[13])

tab7 <- data.frame(departmentCode, department, categoryCode, category, tab7)

tab7$department <- str_to_title(string = tab7$department)
tab7$category <- str_to_title(string = tab7$category)

names(tab7) <- c("departmentCode", "department",
                 "categoryCode", "category",
                 "itemCode", "item",
                 "actual_2016_2017",
                 "budget_2017_2018", "outturn_2017_2018",
                 "budget_2018_2019", "projection_2019_2020",
                 "projection_2020_2021")

tab7[ , 5] <- as.numeric(as.character(tab7[ , 5]))
tab7[ , 6] <- as.character(tab7[ , 6])
tab7[ , 7] <- as.numeric(as.character(tab7[ , 7]))
tab7[ , 8] <- as.numeric(as.character(tab7[ , 8]))
tab7[ , 9] <- as.numeric(as.character(tab7[ , 9]))
tab7[ , 10] <- as.numeric(as.character(tab7[ , 10]))
tab7[ , 11] <- as.numeric(as.character(tab7[ , 11]))
tab7[ , 12] <- as.numeric(as.character(tab7[ , 12]))

################################################################################

mohHealthPreventive2018 <- tab7
usethis::use_data(mohHealthPreventive2018, overwrite = TRUE)

################################################################################

tab8 <- health2[[9]][10:30, ]

tab8[1, 1] <- paste(tab8[1, 1], tab8[2, 1], sep = " ")
tab8[7, 1] <- paste(tab8[7, 1], tab8[8, 1], sep = " ")
tab8[9, 1] <- paste(tab8[9, 1], tab8[10, 1], sep = " ")
tab8[11, 1] <- paste(tab8[11, 1], tab8[12, 1], sep = " ")
tab8[16, 1] <- paste(tab8[16, 1], tab8[17, 1], sep = " ")

tab8 <- tab8[c(1, 3:7, 9, 11, 13:16, 18:21), ]

tab8[ , 1] <- str_replace_all(string = tab8[ , 1], pattern = "-", replacement = " ")
tab8[ , 1] <- str_replace_all(string = tab8[ , 1], pattern = "[[:punct:]]", replacement = "")
tab8[ , 2] <- str_replace_all(string = tab8[ , 2], pattern = "[[:punct:]]", replacement = "")
tab8[ , 3] <- str_replace_all(string = tab8[ , 3], pattern = "[[:punct:]]", replacement = "")
tab8[ , 4] <- str_replace_all(string = tab8[ , 4], pattern = "[[:punct:]]", replacement = "")
tab8[ , 5] <- str_replace_all(string = tab8[ , 5], pattern = "[[:punct:]]", replacement = "")
tab8[ , 6] <- str_replace_all(string = tab8[ , 6], pattern = "[[:punct:]]", replacement = "")

tab8 <- paste(tab8[ , 1], tab8[ , 2], tab8[ , 3], tab8[ , 4], tab8[ , 5], tab8[ , 6], sep = " ")

x <- str_split_fixed(string = tab8, pattern = " ", n = 14)

labs <- NULL
df <- matrix(data = NA, nrow = nrow(x), ncol = 7)

for(i in 1:(nrow(x))) {
  words <- x[i, ][str_detect(string = x[i, ], pattern = "^[a-zA-Z]+$")]
  phrase <- ""


  for(j in 1:length(words)){
    phrase <- paste(phrase, words[j], sep = " ")
  }

  phrase <- str_remove(string = phrase, pattern = " ")
  labs <- c(labs, phrase)
  numbers <- x[i, ][str_detect(string = x[i, ],
                               pattern = "^(\\d{1}|\\d{2}|\\d{3}|\\d{4}|\\d{5}|\\d{6}|\\d{7}|\\d{8}|\\d{9}|\\d{10})$")]
  numbers <- as.numeric(numbers)
  df[i, ] <- numbers
}

tab8 <- cbind(df[ , 1], labs, df[ , 2:7])
tab8 <- tab8[c(3, 5:14, 16), ]

departmentCode <- rep(400, nrow(tab8))
department <- rep(labs[1], nrow(tab8))

categoryCode <- c(21, rep(22, 10), 26)
category <- c(labs[2], rep(labs[4], 10), labs[15])

tab8 <- data.frame(departmentCode, department, categoryCode, category, tab8)

tab8$department <- str_to_title(string = tab8$department)
tab8$category <- str_to_title(string = tab8$category)

names(tab8) <- c("departmentCode", "department",
                 "categoryCode", "category",
                 "itemCode", "item",
                 "actual_2016_2017",
                 "budget_2017_2018", "outturn_2017_2018",
                 "budget_2018_2019", "projection_2019_2020",
                 "projection_2020_2021")

tab8[ , 5] <- as.numeric(as.character(tab8[ , 5]))
tab8[ , 6] <- as.character(tab8[ , 6])
tab8[ , 7] <- as.numeric(as.character(tab8[ , 7]))
tab8[ , 8] <- as.numeric(as.character(tab8[ , 8]))
tab8[ , 9] <- as.numeric(as.character(tab8[ , 9]))
tab8[ , 10] <- as.numeric(as.character(tab8[ , 10]))
tab8[ , 11] <- as.numeric(as.character(tab8[ , 11]))
tab8[ , 12] <- as.numeric(as.character(tab8[ , 12]))

################################################################################

mohHealthPlanning2018 <- tab8
usethis::use_data(mohHealthPlanning2018, overwrite = TRUE)

################################################################################

tab9 <- health2[[10]][5:20, ]

tab9[7, 1] <- paste(tab9[7, 1], tab9[8, 1], sep = " ")
tab9[9, 1] <- paste(tab9[9, 1], tab9[10, 1], sep = " ")
tab9[13, 1] <- paste(tab9[13, 1], tab9[14, 1], sep = " ")

tab9 <- tab9[c(1:7, 9, 11:13, 15:16), ]

tab9[ , 1] <- str_replace_all(string = tab9[ , 1], pattern = " - ", replacement = " ")
tab9[ , 1] <- str_replace_all(string = tab9[ , 1], pattern = "-", replacement = " ")
tab9[ , 1] <- str_replace_all(string = tab9[ , 1], pattern = "–", replacement = " ")
tab9[ , 1] <- str_replace_all(string = tab9[ , 1], pattern = "[[:punct:]]", replacement = "")
tab9[ , 2] <- str_replace_all(string = tab9[ , 2], pattern = "[[:punct:]]", replacement = "")
tab9[ , 3] <- str_replace_all(string = tab9[ , 3], pattern = "[[:punct:]]", replacement = "")
tab9[ , 4] <- str_replace_all(string = tab9[ , 4], pattern = "[[:punct:]]", replacement = "")
tab9[ , 5] <- str_replace_all(string = tab9[ , 5], pattern = "[[:punct:]]", replacement = "")
tab9[ , 6] <- str_replace_all(string = tab9[ , 6], pattern = "[[:punct:]]", replacement = "")

tab9 <- paste(tab9[ , 1], tab9[ , 2], tab9[ , 3], tab9[ , 4], tab9[ , 5], tab9[ , 6], sep = " ")

x <- str_split_fixed(string = tab9, pattern = " ", n = 13)

labs <- NULL
df <- matrix(data = NA, nrow = nrow(x), ncol = 7)

for(i in 1:(nrow(x))) {
  words <- x[i, ][str_detect(string = x[i, ], pattern = "^[a-zA-Z]+$")]
  phrase <- ""


  for(j in 1:length(words)){
    phrase <- paste(phrase, words[j], sep = " ")
  }

  phrase <- str_remove(string = phrase, pattern = " ")
  labs <- c(labs, phrase)
  numbers <- x[i, ][str_detect(string = x[i, ],
                               pattern = "^(\\d{1}|\\d{2}|\\d{3}|\\d{4}|\\d{5}|\\d{6}|\\d{7}|\\d{8}|\\d{9}|\\d{10})$")]
  numbers <- as.numeric(numbers)
  df[i, ] <- numbers
}

tab9 <- cbind(df[ , 1], labs, df[ , 2:7])
tab9 <- tab9[c(3:4, 6:13), ]

departmentCode <- rep(500, nrow(tab9))
department <- rep(labs[1], nrow(tab9))

categoryCode <- c(rep(21, 2), rep(22, 8))
category <- c(rep(labs[2], 2), rep(labs[5], 8))

tab9 <- data.frame(departmentCode, department, categoryCode, category, tab9)

tab9$department <- str_to_title(string = tab9$department)
tab9$category <- str_to_title(string = tab9$category)

names(tab9) <- c("departmentCode", "department",
                 "categoryCode", "category",
                 "itemCode", "item",
                 "actual_2016_2017",
                 "budget_2017_2018", "outturn_2017_2018",
                 "budget_2018_2019", "projection_2019_2020",
                 "projection_2020_2021")

tab9[ , 5] <- as.numeric(as.character(tab9[ , 5]))
tab9[ , 6] <- as.character(tab9[ , 6])
tab9[ , 7] <- as.numeric(as.character(tab9[ , 7]))
tab9[ , 8] <- as.numeric(as.character(tab9[ , 8]))
tab9[ , 9] <- as.numeric(as.character(tab9[ , 9]))
tab9[ , 10] <- as.numeric(as.character(tab9[ , 10]))
tab9[ , 11] <- as.numeric(as.character(tab9[ , 11]))
tab9[ , 12] <- as.numeric(as.character(tab9[ , 12]))

################################################################################

mohHealthVital2018 <- tab9
usethis::use_data(mohHealthVital2018, overwrite = TRUE)

################################################################################

tab10 <- rbind(health2[[10]][32:48, ], health2[[11]][5:17, ])

tab10[1, 1] <- paste(tab10[1, 1], tab10[2, 1], sep = " ")
tab10[9, 1] <- paste(tab10[9, 1], tab10[10, 1], sep = " ")
tab10[11, 1] <- paste(tab10[11, 1], tab10[12, 1], sep = " ")
tab10[14, 1] <- paste(tab10[14, 1], tab10[15, 1], sep = " ")
tab10[21, 1] <- paste(tab10[21, 1], tab10[22, 1], sep = " ")

tab10 <- tab10[c(1, 3:9, 11, 13:14, 16:21, 23:30), ]

tab10[ , 1] <- str_replace_all(string = tab10[ , 1], pattern = " - ", replacement = " ")
tab10[ , 1] <- str_replace_all(string = tab10[ , 1], pattern = "-", replacement = " ")
tab10[ , 1] <- str_replace_all(string = tab10[ , 1], pattern = "–", replacement = " ")
tab10[ , 1] <- str_replace_all(string = tab10[ , 1], pattern = "[[:punct:]]", replacement = "")
tab10[ , 2] <- str_replace_all(string = tab10[ , 2], pattern = "[[:punct:]]", replacement = "")
tab10[ , 3] <- str_replace_all(string = tab10[ , 3], pattern = "[[:punct:]]", replacement = "")
tab10[ , 4] <- str_replace_all(string = tab10[ , 4], pattern = "[[:punct:]]", replacement = "")
tab10[ , 5] <- str_replace_all(string = tab10[ , 5], pattern = "[[:punct:]]", replacement = "")
tab10[ , 6] <- str_replace_all(string = tab10[ , 6], pattern = "[[:punct:]]", replacement = "")

tab10 <- paste(tab10[ , 1], tab10[ , 2], tab10[ , 3], tab10[ , 4], tab10[ , 5], tab10[ , 6], sep = " ")

x <- str_split_fixed(string = tab10, pattern = " ", n = 14)

labs <- NULL
df <- matrix(data = NA, nrow = nrow(x), ncol = 7)

for(i in 1:(nrow(x))) {
  words <- x[i, ][str_detect(string = x[i, ], pattern = "^[a-zA-Z]+$")]
  phrase <- ""


  for(j in 1:length(words)){
    phrase <- paste(phrase, words[j], sep = " ")
  }

  phrase <- str_remove(string = phrase, pattern = " ")
  labs <- c(labs, phrase)
  numbers <- x[i, ][str_detect(string = x[i, ],
                               pattern = "^(\\d{1}|\\d{2}|\\d{3}|\\d{4}|\\d{5}|\\d{6}|\\d{7}|\\d{8}|\\d{9}|\\d{10})$")]
  numbers <- as.numeric(numbers)
  df[i, ] <- numbers
}

tab10 <- cbind(df[ , 1], labs, df[ , 2:7])
tab10 <- tab10[c(3:5, 7:23, 25), ]

departmentCode <- rep(600, nrow(tab10))
department <- rep(labs[1], nrow(tab10))

categoryCode <- c(rep(21, 3), rep(22, 17), 31)
category <- c(rep(labs[2], 3), rep(labs[6], 17), labs[24])

tab10 <- data.frame(departmentCode, department, categoryCode, category, tab10)

tab10$department <- str_to_title(string = tab10$department)
tab10$category <- str_to_title(string = tab10$category)

names(tab10) <- c("departmentCode", "department",
                 "categoryCode", "category",
                 "itemCode", "item",
                 "actual_2016_2017",
                 "budget_2017_2018", "outturn_2017_2018",
                 "budget_2018_2019", "projection_2019_2020",
                 "projection_2020_2021")

tab10[ , 5] <- as.numeric(as.character(tab10[ , 5]))
tab10[ , 6] <- as.character(tab10[ , 6])
tab10[ , 7] <- as.numeric(as.character(tab10[ , 7]))
tab10[ , 8] <- as.numeric(as.character(tab10[ , 8]))
tab10[ , 9] <- as.numeric(as.character(tab10[ , 9]))
tab10[ , 10] <- as.numeric(as.character(tab10[ , 10]))
tab10[ , 11] <- as.numeric(as.character(tab10[ , 11]))
tab10[ , 12] <- as.numeric(as.character(tab10[ , 12]))

################################################################################

mohHealthAdmin2018 <- tab10
usethis::use_data(mohHealthAdmin2018, overwrite = TRUE)

################################################################################

tab11 <- rbind(health2[[11]][27:30, ], health2[[12]][c(5:10, 20:27, 37:45), ], health2[[13]][5:8, ])

tab11[7, 1] <- paste(tab11[7, 1], tab11[8, 1], sep = " ")
tab11[9, 1] <- paste(tab11[9, 1], tab11[10, 1], sep = " ")
tab11[15, 1] <- paste(tab11[15, 1], tab11[16, 1], sep = " ")
tab11[24, 1] <- paste(tab11[24, 1], tab11[25, 1], sep = " ")
tab11[28, 1] <- paste(tab11[28, 1], tab11[29, 1], sep = " ")

tab11 <- tab11[c(1:7, 9, 11:15, 17:24, 26:28, 30:31), ]

tab11[ , 1] <- str_replace_all(string = tab11[ , 1], pattern = " - ", replacement = " ")
tab11[ , 1] <- str_replace_all(string = tab11[ , 1], pattern = "-", replacement = " ")
tab11[ , 1] <- str_replace_all(string = tab11[ , 1], pattern = "–", replacement = " ")
tab11[ , 1] <- str_replace_all(string = tab11[ , 1], pattern = "[[:punct:]]", replacement = "")
tab11[ , 2] <- str_replace_all(string = tab11[ , 2], pattern = "[[:punct:]]", replacement = "")
tab11[ , 3] <- str_replace_all(string = tab11[ , 3], pattern = "[[:punct:]]", replacement = "")
tab11[ , 4] <- str_replace_all(string = tab11[ , 4], pattern = "[[:punct:]]", replacement = "")
tab11[ , 5] <- str_replace_all(string = tab11[ , 5], pattern = "[[:punct:]]", replacement = "")
tab11[ , 6] <- str_replace_all(string = tab11[ , 6], pattern = "[[:punct:]]", replacement = "")

tab11 <- paste(tab11[ , 1], tab11[ , 2], tab11[ , 3], tab11[ , 4], tab11[ , 5], tab11[ , 6], sep = " ")

x <- str_split_fixed(string = tab11, pattern = " ", n = 14)

labs <- NULL
df <- matrix(data = NA, nrow = nrow(x), ncol = 7)

for(i in 1:(nrow(x))) {
  words <- x[i, ][str_detect(string = x[i, ], pattern = "^[a-zA-Z]+$")]
  phrase <- ""


  for(j in 1:length(words)){
    phrase <- paste(phrase, words[j], sep = " ")
  }

  phrase <- str_remove(string = phrase, pattern = " ")
  labs <- c(labs, phrase)################################################################################
  #
  # Load required libraries
  #
  ################################################################################

  library(pdftools)
  library(tm)
  library(tabulizer)
  library(stringr)
  library(tidyverse)
  library(tidytext)

  options(scipen = 999)


  ################################################################################
  #
  # Function to process tables
  #
  ################################################################################

  get_table <- function(tab, nrow = NULL, ncol = NULL) {
    labs <- NULL
    df <- matrix(data = NA, nrow = nrow, ncol = ncol)

    for(i in 1:(length(tab))) {
      words <- tab[[i]][str_detect(string = tab[[i]], pattern = "^[a-zA-Z]+$")]
      phrase <- ""

      for(j in 1:length(words)){
        phrase <- paste(phrase, words[j], sep = " ")
      }

      phrase <- str_remove(string = phrase, pattern = " ")
      labs <- c(labs, phrase)
      numbers <- tab[[i]][str_detect(string = tab[[i]],
                                     pattern = "^(\\d{6}|\\d{7}|\\d{8})$")]
      numbers <- as.numeric(numbers)
      df[i, ] <- numbers
    }
  }



  ################################################################################
  #
  # Create list for information and tables in page 253 (2018)
  #
  ################################################################################

  ## Extract tables from page 253
  health1 <- extract_tables(file = "data-raw/budget/2018 Ministry of Finance and Development Planning.pdf",
                            pages = 253,
                            method = "decide")

  #goal <- health1[[1]][3, ]
  #strategicObjective <- str_c(health1[[1]][5, ], health1[[1]][6, ], health1[[1]][7, ], sep = " ")

  ## Extract first table in page 253
  tab1 <- health1[[1]][27:31, ]
  tab1 <- paste(tab1[ , 1], tab1[ , 2], sep = " ")

  tab1 <- str_replace_all(string = tab1,  pattern = "[[:punct:]]", replace = "")
  tab1 <- str_split_fixed(string = tab1, pattern = " ", n = 12)

  ## Extract relevant fields to re-create first table in page 255 and create data.frame
  economic_classification <- NULL
  df <- matrix(data = NA, nrow = 5, ncol = 7)

  for(i in 1:(nrow(tab1))) {
    words <- tab1[i, ][str_detect(string = tab1[i, ], pattern = "^[a-zA-Z]+$")]
    phrase <- ""

    for(j in 1:length(words)){
      phrase <- paste(phrase, words[j], sep = " ")
    }

    phrase <- str_remove(string = phrase, pattern = " ")
    economic_classification <- c(economic_classification, phrase)
    numbers <- tab1[i, ][str_detect(string = tab1[i,],
                                    pattern = "^(\\d{1}|\\d{2}|\\d{3}|\\d{4}|\\d{5}|\\d{6}|\\d{7}|\\d{8}|\\d{9}|\\d{10})$")]
    numbers <- as.numeric(numbers)
    df[i, ] <- numbers
  }

  df <- data.frame(df[ , 1], economic_classification, df[ , 2:7])
  names(df) <- c("code", "economic_classification",
                 "actual_2016_2017",
                 "budget_2017_2018", "outturn_2017_2018",
                 "budget_2018_2019", "projection_2019_2020",
                 "projection_2020_2021")

  df$economic_classification <- str_to_sentence(df$economic_classification)

  summaryEconHealth2018 <- df

  ## Extract second table in page 255
  tab2 <- health1[[1]][37:46, ]
  tab2 <- paste(tab2[ , 1], tab2[ , 2], sep = " ")
  tab2 <- str_replace_all(string = tab2,  pattern = "[[:punct:]]", replace = "")

  tab2 <- str_split_fixed(string = tab2, pattern = " ", n = 12)

  ## Extract relevant fields to re-create first table in page 255 and create data.frame
  spending_entity <- NULL
  df <- matrix(data = NA, nrow = 10, ncol = 7)

  for(i in 1:nrow(tab2)) {
    words <- tab2[i, ][str_detect(string = tab2[i, ], pattern = "^[a-zA-Z]+$")]
    phrase <- ""

    for(j in 1:length(words)){
      phrase <- paste(phrase, words[j], sep = " ")
    }

    phrase <- str_remove(string = phrase, pattern = " ")
    spending_entity <- c(spending_entity, phrase)
    numbers <- tab2[i, ][str_detect(string = tab2[i, ],
                                    pattern = "^(\\d{1}|\\d{2}|\\d{3}|\\d{4}|\\d{5}|\\d{6}|\\d{7}|\\d{8}|\\d{9}|\\d{10})$")]
    numbers <- as.numeric(numbers)
    df[i, ] <- numbers
  }

  df <- data.frame(df[ , 1], spending_entity, df[ , 2:7])
  names(df) <- c("code", "spending_entity",
                 "actual_2016_2017",
                 "budget_2017_2018", "outturn_2017_2018",
                 "budget_2018_2019", "projection_2019_2020",
                 "projection_2020_2021")

  df$spending_entity <- str_to_title(df$spending_entity)

  summarySpendingHealth2018 <- df

  summaryHealth2018 <- list(summaryEconHealth2018, summarySpendingHealth2018)

  names(summaryHealth2018) <- c("summaryEconHealth2018", "summarySpendingHealth2018")

  usethis::use_data(summaryHealth2018, overwrite = TRUE)

  ## Tidy-up
  rm(words, phrase, numbers, economic_classification, tab1, tab2, df,
     summaryEconHealth2018, summarySpendingHealth2018, health1)

  ################################################################################
  #
  #
  #
  ################################################################################

  ## Extract tables from page 25-266
  health2 <- extract_tables(file = "data-raw/budget/2018 Ministry of Finance and Development Planning.pdf",
                            pages = 254:266,
                            method = "decide")

  tab1 <- health2[[1]][14:48, ]

  tab1[ , 1] <- str_replace_all(string = tab1[ , 1], pattern = " – ", replacement = " ")
  tab1[ , 1] <- str_replace_all(string = tab1[ , 1], pattern = "–", replacement = " ")
  tab1[ , 1] <- str_replace_all(string = tab1[ , 1], pattern = "-", replacement = " ")
  tab1[ , 1] <- str_replace_all(string = tab1[ , 1], pattern = "[[:punct:]]", replacement = "")
  tab1[ , 2] <- str_replace_all(string = tab1[ , 2], pattern = "[[:punct:]]", replacement = "")
  tab1[ , 3] <- str_replace_all(string = tab1[ , 3], pattern = "[[:punct:]]", replacement = "")
  tab1[ , 4] <- str_replace_all(string = tab1[ , 4], pattern = "[[:punct:]]", replacement = "")
  tab1[ , 5] <- str_replace_all(string = tab1[ , 5], pattern = "[[:punct:]]", replacement = "")
  tab1[ , 6] <- str_replace_all(string = tab1[ , 6], pattern = "[[:punct:]]", replacement = "")

  tab1[7, 1] <- paste(tab1[7, 1], tab1[8, 1], sep = " ")
  tab1[11, 1] <- paste(tab1[11, 1], tab1[12, 1], sep = " ")

  tab1 <- tab1[c(1:7, 9:11, 13:35), ]

  y <- str_split_fixed(string = tab1[ , 6], pattern = " ", n = 2)

  tab1 <- cbind(tab1[ , 1:5], y)

  x <- str_split_fixed(string = tab1[ , 1], pattern = " ", n = 8)

  labs <- NULL
  df <- matrix(data = NA, nrow = nrow(x), ncol = 2)

  for(i in 1:(nrow(x))) {
    words <- x[i, ][str_detect(string = x[i, ], pattern = "^[a-zA-Z]+$")]
    phrase <- ""


    for(j in 1:length(words)){
      phrase <- paste(phrase, words[j], sep = " ")
    }

    phrase <- str_remove(string = phrase, pattern = " ")
    labs <- c(labs, phrase)
    numbers <- x[i, ][str_detect(string = x[i, ],
                                 pattern = "^(\\d{1}|\\d{2}|\\d{3}|\\d{4}|\\d{5}|\\d{6}|\\d{7}|\\d{8}|\\d{9}|\\d{10})$")]
    numbers <- as.numeric(numbers)
    df[i, ] <- numbers
  }

  tab1 <- data.frame(df[ , 1], labs, df[ , 2], tab1[ , 3:7])
  tab1 <- tab1[c(2:4, 6:26, 28:33), ]

  categoryCode <- c(rep(21, 3), rep(22, 21), rep(25, 6))
  category <- c(rep(labs[1], 3), rep(labs[5], 21), rep(labs[27], 6))

  tab1 <- data.frame(categoryCode, category, tab1)

  tab1$category <- str_to_title(tab1$category)

  names(tab1) <- c("categoryCode", "category",
                   "itemCode", "item",
                   "actual_2016_2017",
                   "budget_2017_2018", "outturn_2017_2018",
                   "budget_2018_2019", "projection_2019_2020",
                   "projection_2020_2021")

  tab1[ , 4] <- as.character(tab1[ , 4])
  tab1[ , 5] <- as.numeric(as.character(tab1[ , 5]))
  tab1[ , 6] <- as.numeric(as.character(tab1[ , 6]))
  tab1[ , 7] <- as.numeric(as.character(tab1[ , 7]))
  tab1[ , 8] <- as.numeric(as.character(tab1[ , 8]))
  tab1[ , 9] <- as.numeric(as.character(tab1[ , 9]))
  tab1[ , 10] <- as.numeric(as.character(tab1[ , 10]))


  ################################################################################

  tab2 <- health2[[2]][6:48, ]

  tab2[ , 1] <- str_replace_all(string = tab2[ , 1], pattern = " – ", replacement = " ")
  tab2[ , 1] <- str_replace_all(string = tab2[ , 1], pattern = "–", replacement = " ")
  tab2[ , 1] <- str_replace_all(string = tab2[ , 1], pattern = "-", replacement = " ")
  tab2[ , 1] <- str_replace_all(string = tab2[ , 1], pattern = "[[:punct:]]", replacement = "")
  tab2[ , 2] <- str_replace_all(string = tab2[ , 2], pattern = "[[:punct:]]", replacement = "")
  tab2[ , 3] <- str_replace_all(string = tab2[ , 3], pattern = "[[:punct:]]", replacement = "")
  tab2[ , 4] <- str_replace_all(string = tab2[ , 4], pattern = "[[:punct:]]", replacement = "")
  tab2[ , 5] <- str_replace_all(string = tab2[ , 5], pattern = "[[:punct:]]", replacement = "")
  tab2[ , 6] <- str_replace_all(string = tab2[ , 6], pattern = "[[:punct:]]", replacement = "")

  tab2[9, 1] <- paste(tab2[9, 1], tab2[10, 1], sep = " ")
  tab2[15, 1] <- paste(tab2[15, 1], tab2[16, 1], sep = " ")
  tab2[31, 1] <- paste(tab2[31, 1], tab2[32, 1], sep = " ")
  tab2[36, 1] <- paste(tab2[36, 1], tab2[37, 1], sep = " ")

  tab2 <- tab2[c(1:9, 11:15, 17:31, 33:36, 38:43), ]

  y <- str_split_fixed(string = tab2[ , 6], pattern = " ", n = 2)

  tab2 <- cbind(tab2[ , 1:5], y)

  x <- str_split_fixed(string = tab2[ , 1], pattern = " ", n = 8)

  labs <- NULL
  df <- matrix(data = NA, nrow = nrow(x), ncol = 2)

  for(i in 1:(nrow(x))) {
    words <- x[i, ][str_detect(string = x[i, ], pattern = "^[a-zA-Z]+$")]
    phrase <- ""


    for(j in 1:length(words)){
      phrase <- paste(phrase, words[j], sep = " ")
    }

    phrase <- str_remove(string = phrase, pattern = " ")
    labs <- c(labs, phrase)
    numbers <- x[i, ][str_detect(string = x[i, ],
                                 pattern = "^(\\d{1}|\\d{2}|\\d{3}|\\d{4}|\\d{5}|\\d{6}|\\d{7}|\\d{8}|\\d{9}|\\d{10})$")]
    numbers <- as.numeric(numbers)
    df[i, ] <- numbers
  }

  tab2 <- data.frame(df[ , 1], labs, df[ , 2], tab2[ , 3:7])
  tab2 <- tab2[c(1:7, 9:39), ]

  categoryCode <- c(rep(25, 7), rep(26, 31))
  category <- c(rep("Subsidy", 7), rep(labs[8], 31))

  tab2 <- data.frame(categoryCode, category, tab2)

  tab2$category <- str_to_title(tab2$category)

  names(tab2) <- c("categoryCode", "category",
                   "itemCode", "item",
                   "actual_2016_2017",
                   "budget_2017_2018", "outturn_2017_2018",
                   "budget_2018_2019", "projection_2019_2020",
                   "projection_2020_2021")

  tab2[ , 4] <- as.character(tab2[ , 4])
  tab2[ , 5] <- as.numeric(as.character(tab2[ , 5]))
  tab2[ , 6] <- as.numeric(as.character(tab2[ , 6]))
  tab2[ , 7] <- as.numeric(as.character(tab2[ , 7]))
  tab2[ , 8] <- as.numeric(as.character(tab2[ , 8]))
  tab2[ , 9] <- as.numeric(as.character(tab2[ , 9]))
  tab2[ , 10] <- as.numeric(as.character(tab2[ , 10]))

  ################################################################################

  tab3 <- health2[[3]][6:46, ]

  tab3[ , 1] <- str_replace_all(string = tab3[ , 1], pattern = " – ", replacement = " ")
  tab3[ , 1] <- str_replace_all(string = tab3[ , 1], pattern = "–", replacement = " ")
  tab3[ , 1] <- str_replace_all(string = tab3[ , 1], pattern = "-", replacement = " ")
  tab3[ , 1] <- str_replace_all(string = tab3[ , 1], pattern = "[[:punct:]]", replacement = "")
  tab3[ , 2] <- str_replace_all(string = tab3[ , 2], pattern = "[[:punct:]]", replacement = "")
  tab3[ , 3] <- str_replace_all(string = tab3[ , 3], pattern = "[[:punct:]]", replacement = "")
  tab3[ , 4] <- str_replace_all(string = tab3[ , 4], pattern = "[[:punct:]]", replacement = "")
  tab3[ , 5] <- str_replace_all(string = tab3[ , 5], pattern = "[[:punct:]]", replacement = "")
  tab3[ , 6] <- str_replace_all(string = tab3[ , 6], pattern = "[[:punct:]]", replacement = "")

  tab3[1, 1] <- paste(tab3[1, 1], tab3[2, 1], sep = " ")
  tab3[14, 1] <- paste(tab3[14, 1], tab3[15, 1], sep = " ")
  tab3[23, 1] <- paste(tab3[23, 1], tab3[24, 1], sep = " ")
  tab3[41, 1] <- paste(tab3[41, 1], "County", sep = " ")

  tab3 <- tab3[c(1, 3:14, 16:23, 25:41), ]

  y <- str_split_fixed(string = tab3[ , 6], pattern = " ", n = 2)

  tab3 <- cbind(tab3[ , 1:5], y)

  x <- str_split_fixed(string = tab3[ , 1], pattern = " ", n = 8)

  labs <- NULL
  df <- matrix(data = NA, nrow = nrow(x), ncol = 2)

  for(i in 1:(nrow(x))) {
    words <- x[i, ][str_detect(string = x[i, ], pattern = "^[a-zA-Z]+$")]
    phrase <- ""


    for(j in 1:length(words)){
      phrase <- paste(phrase, words[j], sep = " ")
    }

    phrase <- str_remove(string = phrase, pattern = " ")
    labs <- c(labs, phrase)
    numbers <- x[i, ][str_detect(string = x[i, ],
                                 pattern = "^(\\d{1}|\\d{2}|\\d{3}|\\d{4}|\\d{5}|\\d{6}|\\d{7}|\\d{8}|\\d{9}|\\d{10})$")]
    numbers <- as.numeric(numbers)
    df[i, ] <- numbers
  }

  tab3 <- data.frame(df[ , 1], labs, df[ , 2], tab3[ , 3:7])

  categoryCode <- rep(26, nrow(tab3))
  category <- rep("Grants", nrow(tab3))

  tab3 <- data.frame(categoryCode, category, tab3)

  tab3$category <- str_to_title(tab3$category)

  names(tab3) <- c("categoryCode", "category",
                   "itemCode", "item",
                   "actual_2016_2017",
                   "budget_2017_2018", "outturn_2017_2018",
                   "budget_2018_2019", "projection_2019_2020",
                   "projection_2020_2021")

  tab3[ , 4] <- as.character(tab3[ , 4])
  tab3[ , 5] <- as.numeric(as.character(tab3[ , 5]))
  tab3[ , 6] <- as.numeric(as.character(tab3[ , 6]))
  tab3[ , 7] <- as.numeric(as.character(tab3[ , 7]))
  tab3[ , 8] <- as.numeric(as.character(tab3[ , 8]))
  tab3[ , 9] <- as.numeric(as.character(tab3[ , 9]))
  tab3[ , 10] <- as.numeric(as.character(tab3[ , 10]))

  ################################################################################

  tab4 <- health2[[4]][6:24, ]

  tab4[1, 1] <- paste(tab4[1, 1], tab4[2, 1], sep = " ")

  tab4 <- tab4[c(1, 3:19), ]

  tab4[ , 1] <- str_replace_all(string = tab4[ , 1], pattern = " – ", replacement = " ")
  tab4[ , 1] <- str_replace_all(string = tab4[ , 1], pattern = "–", replacement = " ")
  tab4[ , 1] <- str_replace_all(string = tab4[ , 1], pattern = "-", replacement = " ")
  tab4[ , 1] <- str_replace_all(string = tab4[ , 1], pattern = "[[:punct:]]", replacement = "")
  tab4[ , 2] <- str_replace_all(string = tab4[ , 2], pattern = "[[:punct:]]", replacement = "")
  tab4[ , 3] <- str_replace_all(string = tab4[ , 3], pattern = "[[:punct:]]", replacement = "")
  tab4[ , 4] <- str_replace_all(string = tab4[ , 4], pattern = "[[:punct:]]", replacement = "")
  tab4[ , 5] <- str_replace_all(string = tab4[ , 5], pattern = "[[:punct:]]", replacement = "")
  tab4[ , 6] <- str_replace_all(string = tab4[ , 6], pattern = "[[:punct:]]", replacement = "")

  y <- str_split_fixed(string = tab4[ , 6], pattern = " ", n = 2)

  tab4 <- cbind(tab4[ , 1:5], y)

  x <- str_split_fixed(string = tab4[ , 1], pattern = " ", n = 8)

  labs <- NULL
  df <- matrix(data = NA, nrow = nrow(x), ncol = 2)

  for(i in 1:(nrow(x))) {
    words <- x[i, ][str_detect(string = x[i, ], pattern = "^[a-zA-Z]+$")]
    phrase <- ""


    for(j in 1:length(words)){
      phrase <- paste(phrase, words[j], sep = " ")
    }

    phrase <- str_remove(string = phrase, pattern = " ")
    labs <- c(labs, phrase)
    numbers <- x[i, ][str_detect(string = x[i, ],
                                 pattern = "^(\\d{1}|\\d{2}|\\d{3}|\\d{4}|\\d{5}|\\d{6}|\\d{7}|\\d{8}|\\d{9}|\\d{10})$")]
    numbers <- as.numeric(numbers)
    df[i, ] <- numbers
  }

  tab4 <- data.frame(df[ , 1], labs, df[ , 2], tab4[ , 3:7])
  tab4 <- tab4[c(1:16, 18), ]

  categoryCode <- c(rep(26, 16), 31)
  category <- c(rep("Grants", 16), labs[17])

  tab4 <- data.frame(categoryCode, category, tab4)

  tab4$category <- str_to_title(tab4$category)

  names(tab4) <- c("categoryCode", "category",
                   "itemCode", "item",
                   "actual_2016_2017",
                   "budget_2017_2018", "outturn_2017_2018",
                   "budget_2018_2019", "projection_2019_2020",
                   "projection_2020_2021")

  tab4[ , 4] <- as.character(tab4[ , 4])
  tab4[ , 5] <- as.numeric(as.character(tab4[ , 5]))
  tab4[ , 6] <- as.numeric(as.character(tab4[ , 6]))
  tab4[ , 7] <- as.numeric(as.character(tab4[ , 7]))
  tab4[ , 8] <- as.numeric(as.character(tab4[ , 8]))
  tab4[ , 9] <- as.numeric(as.character(tab4[ , 9]))
  tab4[ , 10] <- as.numeric(as.character(tab4[ , 10]))

  ################################################################################

  mohHealthEcon2018 <- data.frame(rbind(tab1, tab2, tab3, tab4))
  usethis::use_data(mohHealthEcon2018, overwrite = TRUE)

  ################################################################################

  tab5 <- health2[[4]][30:44, ]

  tab5[ , 1] <- str_replace_all(string = tab5[ , 1], pattern = "-", replacement = " ")
  tab5[ , 1] <- str_replace_all(string = tab5[ , 1], pattern = "[[:punct:]]", replacement = "")
  tab5[ , 2] <- str_replace_all(string = tab5[ , 2], pattern = "[[:punct:]]", replacement = "")
  tab5[ , 3] <- str_replace_all(string = tab5[ , 3], pattern = "[[:punct:]]", replacement = "")
  tab5[ , 4] <- str_replace_all(string = tab5[ , 4], pattern = "[[:punct:]]", replacement = "")
  tab5[ , 5] <- str_replace_all(string = tab5[ , 5], pattern = "[[:punct:]]", replacement = "")
  tab5[ , 6] <- str_replace_all(string = tab5[ , 6], pattern = "[[:punct:]]", replacement = "")

  x <- str_split_fixed(string = tab5[ , 1], pattern = " ", n = 5)
  y <- str_split_fixed(string = tab5[ , 6], pattern = " ", n = 2)

  tab5 <- cbind(tab5[ , 1:5], y)

  labs <- NULL
  df <- matrix(data = NA, nrow = nrow(x), ncol = 2)

  for(i in 1:(nrow(x))) {
    words <- x[i, ][str_detect(string = x[i, ], pattern = "^[a-zA-Z]+$")]
    phrase <- ""


    for(j in 1:length(words)){
      phrase <- paste(phrase, words[j], sep = " ")
    }

    phrase <- str_remove(string = phrase, pattern = " ")
    labs <- c(labs, phrase)
    numbers <- x[i, ][str_detect(string = x[i, ],
                                 pattern = "^(\\d{1}|\\d{2}|\\d{3}|\\d{4}|\\d{5}|\\d{6}|\\d{7}|\\d{8}|\\d{9}|\\d{10})$")]
    numbers <- as.numeric(numbers)
    df[i, ] <- numbers
  }

  tab5 <- data.frame(df[ , 1], labs, df[ , 2], tab5[ , 3:7])

  tab5$labs <- str_to_title(tab5$labs)

  names(tab5) <- c("countyCode", "county",
                   "actual_2016_2017",
                   "budget_2017_2018", "outturn_2017_2018",
                   "budget_2018_2019", "projection_2019_2020",
                   "projection_2020_2021")

  tab5[ , 4] <- as.numeric(as.character(tab5[ , 4]))
  tab5[ , 5] <- as.numeric(as.character(tab5[ , 5]))
  tab5[ , 6] <- as.numeric(as.character(tab5[ , 6]))
  tab5[ , 7] <- as.numeric(as.character(tab5[ , 7]))
  tab5[ , 8] <- as.numeric(as.character(tab5[ , 8]))

  ################################################################################

  mohHealthCounty2018 <- tab5
  usethis::use_data(mohHealthCounty2018, overwrite = TRUE)

  ################################################################################

  tab6 <- health2[[5]][14:48, ]

  tab6[ , 1] <- str_replace_all(string = tab6[ , 1], pattern = " - ", replacement = " ")
  tab6[ , 1] <- str_replace_all(string = tab6[ , 1], pattern = "-", replacement = " ")
  tab6[ , 1] <- str_replace_all(string = tab6[ , 1], pattern = "[[:punct:]]", replacement = "")
  tab6[ , 2] <- str_replace_all(string = tab6[ , 2], pattern = "[[:punct:]]", replacement = "")
  tab6[ , 3] <- str_replace_all(string = tab6[ , 3], pattern = "[[:punct:]]", replacement = "")
  tab6[ , 4] <- str_replace_all(string = tab6[ , 4], pattern = "[[:punct:]]", replacement = "")

  tab6[7, 1] <- paste(tab6[7, 1], tab6[8, 1], sep = " ")
  tab6[9, 1] <- paste(tab6[9, 1], tab6[10, 1], sep = " ")
  tab6[11, 1] <- paste(tab6[11, 1], tab6[12, 1], sep = " ")
  tab6[16, 1] <- paste(tab6[16, 1], tab6[17, 1], sep = " ")
  tab6[23, 1] <- paste(tab6[23, 1], tab6[24, 1], sep = " ")
  tab6[28, 1] <- paste(tab6[28, 1], tab6[29, 1], sep = " ")
  tab6[31, 1] <- paste(tab6[31, 1], tab6[32, 1], sep = " ")

  tab6 <- tab6[c(1:7, 9, 11, 13:16, 18:23, 25:28, 30:31, 33:35), ]

  tab6 <- paste(tab6[ , 1], tab6[ , 2], tab6[ , 3], tab6[ , 4], sep = " ")

  x <- str_split_fixed(string = tab6, pattern = " ", n = 13)

  labs <- NULL
  df <- matrix(data = NA, nrow = nrow(x), ncol = 7)

  for(i in 1:(nrow(x))) {
    words <- x[i, ][str_detect(string = x[i, ], pattern = "^[a-zA-Z]+$")]
    phrase <- ""


    for(j in 1:length(words)){
      phrase <- paste(phrase, words[j], sep = " ")
    }

    phrase <- str_remove(string = phrase, pattern = " ")
    labs <- c(labs, phrase)
    numbers <- x[i, ][str_detect(string = x[i, ],
                                 pattern = "^(\\d{1}|\\d{2}|\\d{3}|\\d{4}|\\d{5}|\\d{6}|\\d{7}|\\d{8}|\\d{9}|\\d{10})$")]
    numbers <- as.numeric(numbers)
    df[i, ] <- numbers
  }

  tab6a <- cbind(df[ , 1], labs, df[ , 2:7])

  tab6 <- health2[[6]][5:50, ]

  tab6[ , 1] <- str_replace_all(string = tab6[ , 1], pattern = " - ", replacement = " ")
  tab6[ , 1] <- str_replace_all(string = tab6[ , 1], pattern = "-", replacement = " ")
  tab6[ , 1] <- str_replace_all(string = tab6[ , 1], pattern = "[[:punct:]]", replacement = "")
  tab6[ , 2] <- str_replace_all(string = tab6[ , 2], pattern = "[[:punct:]]", replacement = "")
  tab6[ , 3] <- str_replace_all(string = tab6[ , 3], pattern = "[[:punct:]]", replacement = "")
  tab6[ , 4] <- str_replace_all(string = tab6[ , 4], pattern = "[[:punct:]]", replacement = "")
  tab6[ , 5] <- str_replace_all(string = tab6[ , 5], pattern = "[[:punct:]]", replacement = "")
  tab6[ , 6] <- str_replace_all(string = tab6[ , 6], pattern = "[[:punct:]]", replacement = "")

  tab6[4, 1] <- paste(tab6[4, 1], tab6[5, 1], sep = " ")
  tab6[11, 1] <- paste(tab6[11, 1], tab6[12, 1], sep = " ")
  tab6[14, 1] <- paste(tab6[14, 1], tab6[15, 1], sep = " ")
  tab6[19, 1] <- paste(tab6[19, 1], tab6[20, 1], sep = " ")
  tab6[22, 1] <- paste(tab6[22, 1], tab6[23, 1], sep = " ")
  tab6[24, 1] <- paste(tab6[24, 1], tab6[25, 1], sep = " ")
  tab6[27, 1] <- paste(tab6[27, 1], tab6[28, 1], sep = " ")
  tab6[33, 1] <- paste(tab6[33, 1], tab6[34, 1], sep = " ")
  tab6[35, 1] <- paste(tab6[35, 1], tab6[36, 1], sep = " ")
  tab6[37, 1] <- paste(tab6[37, 1], tab6[38, 1], sep = " ")
  tab6[40, 1] <- paste(tab6[40, 1], tab6[41, 1], sep = " ")
  tab6[44, 1] <- paste(tab6[44, 1], tab6[45, 1], sep = " ")

  tab6 <- tab6[c(1:4, 6:11, 13:14, 16:19, 21:22, 24, 26:27, 29:33, 35, 37, 39:40, 42:44, 46), ]

  tab6 <- paste(tab6[ , 1], tab6[ , 2], tab6[ , 3], tab6[ , 4], tab6[ , 5], tab6[ , 6], sep = " ")

  x <- str_split_fixed(string = tab6, pattern = " ", n = 13)

  labs <- NULL
  df <- matrix(data = NA, nrow = nrow(x), ncol = 7)

  for(i in 1:(nrow(x))) {
    words <- x[i, ][str_detect(string = x[i, ], pattern = "^[a-zA-Z]+$")]
    phrase <- ""


    for(j in 1:length(words)){
      phrase <- paste(phrase, words[j], sep = " ")
    }

    phrase <- str_remove(string = phrase, pattern = " ")
    labs <- c(labs, phrase)
    numbers <- x[i, ][str_detect(string = x[i, ],
                                 pattern = "^(\\d{1}|\\d{2}|\\d{3}|\\d{4}|\\d{5}|\\d{6}|\\d{7}|\\d{8}|\\d{9}|\\d{10})$")]
    numbers <- as.numeric(numbers)
    df[i, ] <- numbers
  }

  tab6b <- cbind(df[ , 1], labs, df[ , 2:7])

  tab6 <- health2[[7]][5:48, ]

  tab6[ , 1] <- str_replace_all(string = tab6[ , 1], pattern = " - ", replacement = " ")
  tab6[ , 1] <- str_replace_all(string = tab6[ , 1], pattern = "-", replacement = " ")
  tab6[ , 1] <- str_replace_all(string = tab6[ , 1], pattern = "[[:punct:]]", replacement = "")
  tab6[ , 2] <- str_replace_all(string = tab6[ , 2], pattern = "[[:punct:]]", replacement = "")
  tab6[ , 3] <- str_replace_all(string = tab6[ , 3], pattern = "[[:punct:]]", replacement = "")
  tab6[ , 4] <- str_replace_all(string = tab6[ , 4], pattern = "[[:punct:]]", replacement = "")
  tab6[ , 5] <- str_replace_all(string = tab6[ , 5], pattern = "[[:punct:]]", replacement = "")
  tab6[ , 6] <- str_replace_all(string = tab6[ , 6], pattern = "[[:punct:]]", replacement = "")

  tab6[1, 1] <- paste(tab6[1, 1], tab6[2, 1], sep = " ")
  tab6[3, 1] <- paste(tab6[3, 1], tab6[4, 1], sep = " ")
  tab6[6, 1] <- paste(tab6[6, 1], tab6[7, 1], sep = " ")
  tab6[12, 1] <- paste(tab6[12, 1], tab6[13, 1], sep = " ")
  tab6[16, 1] <- paste(tab6[16, 1], tab6[17, 1], sep = " ")
  tab6[19, 1] <- paste(tab6[19, 1], tab6[20, 1], sep = " ")
  tab6[28, 1] <- paste(tab6[28, 1], tab6[29, 1], sep = " ")
  tab6[30, 1] <- paste(tab6[30, 1], tab6[31, 1], sep = " ")
  tab6[36, 1] <- paste(tab6[36, 1], tab6[37, 1], sep = " ")

  tab6 <- tab6[c(1, 3, 5:6, 8:12, 14:16, 18:19, 21:28, 30, 32:36, 38:44), ]

  tab6 <- paste(tab6[ , 1], tab6[ , 2], tab6[ , 3], tab6[ , 4], tab6[ , 5], tab6[ , 6], sep = " ")

  x <- str_split_fixed(string = tab6, pattern = " ", n = 14)

  labs <- NULL
  df <- matrix(data = NA, nrow = nrow(x), ncol = 7)

  for(i in 1:(nrow(x))) {
    words <- x[i, ][str_detect(string = x[i, ], pattern = "^[a-zA-Z]+$")]
    phrase <- ""


    for(j in 1:length(words)){
      phrase <- paste(phrase, words[j], sep = " ")
    }

    phrase <- str_remove(string = phrase, pattern = " ")
    labs <- c(labs, phrase)
    numbers <- x[i, ][str_detect(string = x[i, ],
                                 pattern = "^(\\d{1}|\\d{2}|\\d{3}|\\d{4}|\\d{5}|\\d{6}|\\d{7}|\\d{8}|\\d{9}|\\d{10})$")]
    numbers <- as.numeric(numbers)
    df[i, ] <- numbers
  }

  tab6c <- cbind(df[ , 1], labs, df[ , 2:7])

  tab6 <- rbind(tab6a, tab6b, tab6c)
  tab6 <- tab6[c(3:4, 6:16, 18:30, 32:97), ]

  departmentCode <- rep(100, nrow(tab6))
  department <- rep("Curative Servives", nrow(tab6))

  categoryCode <- c(rep(21, 2), rep(22, 11), rep(25, 13), rep(26, 66))
  category <- c(rep("Compensation Of Employees", 2),
                rep("Use Of Goods And Services", 11),
                rep("Subsidy", 13),
                rep("Grants", 66))

  tab6 <- data.frame(departmentCode, department, categoryCode, category, tab6)

  tab6$department <- str_to_title(string = tab6$department)
  tab6$category <- str_to_title(string = tab6$category)

  names(tab6) <- c("departmentCode", "department",
                   "categoryCode", "category",
                   "itemCode", "item",
                   "actual_2016_2017",
                   "budget_2017_2018", "outturn_2017_2018",
                   "budget_2018_2019", "projection_2019_2020",
                   "projection_2020_2021")

  tab6[ , 5] <- as.numeric(as.character(tab6[ , 5]))
  tab6[ , 6] <- as.character(tab6[ , 6])
  tab6[ , 7] <- as.numeric(as.character(tab6[ , 7]))
  tab6[ , 8] <- as.numeric(as.character(tab6[ , 8]))
  tab6[ , 9] <- as.numeric(as.character(tab6[ , 9]))
  tab6[ , 10] <- as.numeric(as.character(tab6[ , 10]))
  tab6[ , 11] <- as.numeric(as.character(tab6[ , 11]))
  tab6[ , 12] <- as.numeric(as.character(tab6[ , 12]))

  ################################################################################

  mohHealthCurative2018 <- tab6
  usethis::use_data(mohHealthCurative2018, overwrite = TRUE)

  ################################################################################

  tab7 <- health2[[8]][12:28, ]

  tab7[ , 1] <- str_replace_all(string = tab7[ , 1], pattern = " - ", replacement = " ")
  tab7[ , 1] <- str_replace_all(string = tab7[ , 1], pattern = "-", replacement = " ")
  tab7[ , 1] <- str_replace_all(string = tab7[ , 1], pattern = "[[:punct:]]", replacement = "")
  tab7[ , 2] <- str_replace_all(string = tab7[ , 2], pattern = "[[:punct:]]", replacement = "")
  tab7[ , 3] <- str_replace_all(string = tab7[ , 3], pattern = "[[:punct:]]", replacement = "")
  tab7[ , 4] <- str_replace_all(string = tab7[ , 4], pattern = "[[:punct:]]", replacement = "")
  tab7[ , 5] <- str_replace_all(string = tab7[ , 5], pattern = "[[:punct:]]", replacement = "")
  tab7[ , 6] <- str_replace_all(string = tab7[ , 6], pattern = "[[:punct:]]", replacement = "")

  tab7[5, 1] <- paste(tab7[5, 1], tab7[6, 1], sep = " ")
  tab7[10, 1] <- paste(tab7[10, 1], tab7[11, 1], sep = " ")
  tab7[16, 1] <- paste(tab7[16, 1], tab7[17, 1], sep = " ")

  tab7 <- tab7[c(1:5, 7:10, 12:16), ]

  tab7 <- paste(tab7[ , 1], tab7[ , 2], tab7[ , 3], tab7[ , 4], tab7[ , 5], tab7[ , 6], sep = " ")

  x <- str_split_fixed(string = tab7, pattern = " ", n = 13)

  labs <- NULL
  df <- matrix(data = NA, nrow = nrow(x), ncol = 7)

  for(i in 1:(nrow(x))) {
    words <- x[i, ][str_detect(string = x[i, ], pattern = "^[a-zA-Z]+$")]
    phrase <- ""


    for(j in 1:length(words)){
      phrase <- paste(phrase, words[j], sep = " ")
    }################################################################################
    #
    # Load required libraries
    #
    ################################################################################

    library(pdftools)
    library(tm)
    library(tabulizer)
    library(stringr)
    library(tidyverse)
    library(tidytext)

    options(scipen = 999)


    ################################################################################
    #
    # Function to process tables
    #
    ################################################################################

    get_table <- function(tab, nrow = NULL, ncol = NULL) {
      labs <- NULL
      df <- matrix(data = NA, nrow = nrow, ncol = ncol)

      for(i in 1:(length(tab))) {
        words <- tab[[i]][str_detect(string = tab[[i]], pattern = "^[a-zA-Z]+$")]
        phrase <- ""

        for(j in 1:length(words)){
          phrase <- paste(phrase, words[j], sep = " ")
        }

        phrase <- str_remove(string = phrase, pattern = " ")
        labs <- c(labs, phrase)
        numbers <- tab[[i]][str_detect(string = tab[[i]],
                                       pattern = "^(\\d{6}|\\d{7}|\\d{8})$")]
        numbers <- as.numeric(numbers)
        df[i, ] <- numbers
      }
    }



    ################################################################################
    #
    # Create list for information and tables in page 253 (2018)
    #
    ################################################################################

    ## Extract tables from page 253
    health1 <- extract_tables(file = "data-raw/budget/2018 Ministry of Finance and Development Planning.pdf",
                              pages = 253,
                              method = "decide")

    #goal <- health1[[1]][3, ]
    #strategicObjective <- str_c(health1[[1]][5, ], health1[[1]][6, ], health1[[1]][7, ], sep = " ")

    ## Extract first table in page 253
    tab1 <- health1[[1]][27:31, ]
    tab1 <- paste(tab1[ , 1], tab1[ , 2], sep = " ")

    tab1 <- str_replace_all(string = tab1,  pattern = "[[:punct:]]", replace = "")
    tab1 <- str_split_fixed(string = tab1, pattern = " ", n = 12)

    ## Extract relevant fields to re-create first table in page 255 and create data.frame
    economic_classification <- NULL
    df <- matrix(data = NA, nrow = 5, ncol = 7)

    for(i in 1:(nrow(tab1))) {
      words <- tab1[i, ][str_detect(string = tab1[i, ], pattern = "^[a-zA-Z]+$")]
      phrase <- ""

      for(j in 1:length(words)){
        phrase <- paste(phrase, words[j], sep = " ")
      }

      phrase <- str_remove(string = phrase, pattern = " ")
      economic_classification <- c(economic_classification, phrase)
      numbers <- tab1[i, ][str_detect(string = tab1[i,],
                                      pattern = "^(\\d{1}|\\d{2}|\\d{3}|\\d{4}|\\d{5}|\\d{6}|\\d{7}|\\d{8}|\\d{9}|\\d{10})$")]
      numbers <- as.numeric(numbers)
      df[i, ] <- numbers
    }

    df <- data.frame(df[ , 1], economic_classification, df[ , 2:7])
    names(df) <- c("code", "economic_classification",
                   "actual_2016_2017",
                   "budget_2017_2018", "outturn_2017_2018",
                   "budget_2018_2019", "projection_2019_2020",
                   "projection_2020_2021")

    df$economic_classification <- str_to_sentence(df$economic_classification)

    summaryEconHealth2018 <- df

    ## Extract second table in page 255
    tab2 <- health1[[1]][37:46, ]
    tab2 <- paste(tab2[ , 1], tab2[ , 2], sep = " ")
    tab2 <- str_replace_all(string = tab2,  pattern = "[[:punct:]]", replace = "")

    tab2 <- str_split_fixed(string = tab2, pattern = " ", n = 12)

    ## Extract relevant fields to re-create first table in page 255 and create data.frame
    spending_entity <- NULL
    df <- matrix(data = NA, nrow = 10, ncol = 7)

    for(i in 1:nrow(tab2)) {
      words <- tab2[i, ][str_detect(string = tab2[i, ], pattern = "^[a-zA-Z]+$")]
      phrase <- ""

      for(j in 1:length(words)){
        phrase <- paste(phrase, words[j], sep = " ")
      }

      phrase <- str_remove(string = phrase, pattern = " ")
      spending_entity <- c(spending_entity, phrase)
      numbers <- tab2[i, ][str_detect(string = tab2[i, ],
                                      pattern = "^(\\d{1}|\\d{2}|\\d{3}|\\d{4}|\\d{5}|\\d{6}|\\d{7}|\\d{8}|\\d{9}|\\d{10})$")]
      numbers <- as.numeric(numbers)
      df[i, ] <- numbers
    }

    df <- data.frame(df[ , 1], spending_entity, df[ , 2:7])
    names(df) <- c("code", "spending_entity",
                   "actual_2016_2017",
                   "budget_2017_2018", "outturn_2017_2018",
                   "budget_2018_2019", "projection_2019_2020",
                   "projection_2020_2021")

    df$spending_entity <- str_to_title(df$spending_entity)

    summarySpendingHealth2018 <- df

    summaryHealth2018 <- list(summaryEconHealth2018, summarySpendingHealth2018)

    names(summaryHealth2018) <- c("summaryEconHealth2018", "summarySpendingHealth2018")

    usethis::use_data(summaryHealth2018, overwrite = TRUE)

    ## Tidy-up
    rm(words, phrase, numbers, economic_classification, tab1, tab2, df,
       summaryEconHealth2018, summarySpendingHealth2018, health1)

    ################################################################################
    #
    #
    #
    ################################################################################

    ## Extract tables from page 25-266
    health2 <- extract_tables(file = "data-raw/budget/2018 Ministry of Finance and Development Planning.pdf",
                              pages = 254:266,
                              method = "decide")

    tab1 <- health2[[1]][14:48, ]

    tab1[ , 1] <- str_replace_all(string = tab1[ , 1], pattern = " – ", replacement = " ")
    tab1[ , 1] <- str_replace_all(string = tab1[ , 1], pattern = "–", replacement = " ")
    tab1[ , 1] <- str_replace_all(string = tab1[ , 1], pattern = "-", replacement = " ")
    tab1[ , 1] <- str_replace_all(string = tab1[ , 1], pattern = "[[:punct:]]", replacement = "")
    tab1[ , 2] <- str_replace_all(string = tab1[ , 2], pattern = "[[:punct:]]", replacement = "")
    tab1[ , 3] <- str_replace_all(string = tab1[ , 3], pattern = "[[:punct:]]", replacement = "")
    tab1[ , 4] <- str_replace_all(string = tab1[ , 4], pattern = "[[:punct:]]", replacement = "")
    tab1[ , 5] <- str_replace_all(string = tab1[ , 5], pattern = "[[:punct:]]", replacement = "")
    tab1[ , 6] <- str_replace_all(string = tab1[ , 6], pattern = "[[:punct:]]", replacement = "")

    tab1[7, 1] <- paste(tab1[7, 1], tab1[8, 1], sep = " ")
    tab1[11, 1] <- paste(tab1[11, 1], tab1[12, 1], sep = " ")

    tab1 <- tab1[c(1:7, 9:11, 13:35), ]

    y <- str_split_fixed(string = tab1[ , 6], pattern = " ", n = 2)

    tab1 <- cbind(tab1[ , 1:5], y)

    x <- str_split_fixed(string = tab1[ , 1], pattern = " ", n = 8)

    labs <- NULL
    df <- matrix(data = NA, nrow = nrow(x), ncol = 2)

    for(i in 1:(nrow(x))) {
      words <- x[i, ][str_detect(string = x[i, ], pattern = "^[a-zA-Z]+$")]
      phrase <- ""


      for(j in 1:length(words)){
        phrase <- paste(phrase, words[j], sep = " ")
      }

      phrase <- str_remove(string = phrase, pattern = " ")
      labs <- c(labs, phrase)
      numbers <- x[i, ][str_detect(string = x[i, ],
                                   pattern = "^(\\d{1}|\\d{2}|\\d{3}|\\d{4}|\\d{5}|\\d{6}|\\d{7}|\\d{8}|\\d{9}|\\d{10})$")]
      numbers <- as.numeric(numbers)
      df[i, ] <- numbers
    }

    tab1 <- data.frame(df[ , 1], labs, df[ , 2], tab1[ , 3:7])
    tab1 <- tab1[c(2:4, 6:26, 28:33), ]

    categoryCode <- c(rep(21, 3), rep(22, 21), rep(25, 6))
    category <- c(rep(labs[1], 3), rep(labs[5], 21), rep(labs[27], 6))

    tab1 <- data.frame(categoryCode, category, tab1)

    tab1$category <- str_to_title(tab1$category)

    names(tab1) <- c("categoryCode", "category",
                     "itemCode", "item",
                     "actual_2016_2017",
                     "budget_2017_2018", "outturn_2017_2018",
                     "budget_2018_2019", "projection_2019_2020",
                     "projection_2020_2021")

    tab1[ , 4] <- as.character(tab1[ , 4])
    tab1[ , 5] <- as.numeric(as.character(tab1[ , 5]))
    tab1[ , 6] <- as.numeric(as.character(tab1[ , 6]))
    tab1[ , 7] <- as.numeric(as.character(tab1[ , 7]))
    tab1[ , 8] <- as.numeric(as.character(tab1[ , 8]))
    tab1[ , 9] <- as.numeric(as.character(tab1[ , 9]))
    tab1[ , 10] <- as.numeric(as.character(tab1[ , 10]))


    ################################################################################

    tab2 <- health2[[2]][6:48, ]

    tab2[ , 1] <- str_replace_all(string = tab2[ , 1], pattern = " – ", replacement = " ")
    tab2[ , 1] <- str_replace_all(string = tab2[ , 1], pattern = "–", replacement = " ")
    tab2[ , 1] <- str_replace_all(string = tab2[ , 1], pattern = "-", replacement = " ")
    tab2[ , 1] <- str_replace_all(string = tab2[ , 1], pattern = "[[:punct:]]", replacement = "")
    tab2[ , 2] <- str_replace_all(string = tab2[ , 2], pattern = "[[:punct:]]", replacement = "")
    tab2[ , 3] <- str_replace_all(string = tab2[ , 3], pattern = "[[:punct:]]", replacement = "")
    tab2[ , 4] <- str_replace_all(string = tab2[ , 4], pattern = "[[:punct:]]", replacement = "")
    tab2[ , 5] <- str_replace_all(string = tab2[ , 5], pattern = "[[:punct:]]", replacement = "")
    tab2[ , 6] <- str_replace_all(string = tab2[ , 6], pattern = "[[:punct:]]", replacement = "")

    tab2[9, 1] <- paste(tab2[9, 1], tab2[10, 1], sep = " ")
    tab2[15, 1] <- paste(tab2[15, 1], tab2[16, 1], sep = " ")
    tab2[31, 1] <- paste(tab2[31, 1], tab2[32, 1], sep = " ")
    tab2[36, 1] <- paste(tab2[36, 1], tab2[37, 1], sep = " ")

    tab2 <- tab2[c(1:9, 11:15, 17:31, 33:36, 38:43), ]

    y <- str_split_fixed(string = tab2[ , 6], pattern = " ", n = 2)

    tab2 <- cbind(tab2[ , 1:5], y)

    x <- str_split_fixed(string = tab2[ , 1], pattern = " ", n = 8)

    labs <- NULL
    df <- matrix(data = NA, nrow = nrow(x), ncol = 2)

    for(i in 1:(nrow(x))) {
      words <- x[i, ][str_detect(string = x[i, ], pattern = "^[a-zA-Z]+$")]
      phrase <- ""


      for(j in 1:length(words)){
        phrase <- paste(phrase, words[j], sep = " ")
      }

      phrase <- str_remove(string = phrase, pattern = " ")
      labs <- c(labs, phrase)
      numbers <- x[i, ][str_detect(string = x[i, ],
                                   pattern = "^(\\d{1}|\\d{2}|\\d{3}|\\d{4}|\\d{5}|\\d{6}|\\d{7}|\\d{8}|\\d{9}|\\d{10})$")]
      numbers <- as.numeric(numbers)
      df[i, ] <- numbers
    }

    tab2 <- data.frame(df[ , 1], labs, df[ , 2], tab2[ , 3:7])
    tab2 <- tab2[c(1:7, 9:39), ]

    categoryCode <- c(rep(25, 7), rep(26, 31))
    category <- c(rep("Subsidy", 7), rep(labs[8], 31))

    tab2 <- data.frame(categoryCode, category, tab2)

    tab2$category <- str_to_title(tab2$category)

    names(tab2) <- c("categoryCode", "category",
                     "itemCode", "item",
                     "actual_2016_2017",
                     "budget_2017_2018", "outturn_2017_2018",
                     "budget_2018_2019", "projection_2019_2020",
                     "projection_2020_2021")

    tab2[ , 4] <- as.character(tab2[ , 4])
    tab2[ , 5] <- as.numeric(as.character(tab2[ , 5]))
    tab2[ , 6] <- as.numeric(as.character(tab2[ , 6]))
    tab2[ , 7] <- as.numeric(as.character(tab2[ , 7]))
    tab2[ , 8] <- as.numeric(as.character(tab2[ , 8]))
    tab2[ , 9] <- as.numeric(as.character(tab2[ , 9]))
    tab2[ , 10] <- as.numeric(as.character(tab2[ , 10]))

    ################################################################################

    tab3 <- health2[[3]][6:46, ]

    tab3[ , 1] <- str_replace_all(string = tab3[ , 1], pattern = " – ", replacement = " ")
    tab3[ , 1] <- str_replace_all(string = tab3[ , 1], pattern = "–", replacement = " ")
    tab3[ , 1] <- str_replace_all(string = tab3[ , 1], pattern = "-", replacement = " ")
    tab3[ , 1] <- str_replace_all(string = tab3[ , 1], pattern = "[[:punct:]]", replacement = "")
    tab3[ , 2] <- str_replace_all(string = tab3[ , 2], pattern = "[[:punct:]]", replacement = "")
    tab3[ , 3] <- str_replace_all(string = tab3[ , 3], pattern = "[[:punct:]]", replacement = "")
    tab3[ , 4] <- str_replace_all(string = tab3[ , 4], pattern = "[[:punct:]]", replacement = "")
    tab3[ , 5] <- str_replace_all(string = tab3[ , 5], pattern = "[[:punct:]]", replacement = "")
    tab3[ , 6] <- str_replace_all(string = tab3[ , 6], pattern = "[[:punct:]]", replacement = "")

    tab3[1, 1] <- paste(tab3[1, 1], tab3[2, 1], sep = " ")
    tab3[14, 1] <- paste(tab3[14, 1], tab3[15, 1], sep = " ")
    tab3[23, 1] <- paste(tab3[23, 1], tab3[24, 1], sep = " ")
    tab3[41, 1] <- paste(tab3[41, 1], "County", sep = " ")

    tab3 <- tab3[c(1, 3:14, 16:23, 25:41), ]

    y <- str_split_fixed(string = tab3[ , 6], pattern = " ", n = 2)

    tab3 <- cbind(tab3[ , 1:5], y)

    x <- str_split_fixed(string = tab3[ , 1], pattern = " ", n = 8)

    labs <- NULL
    df <- matrix(data = NA, nrow = nrow(x), ncol = 2)

    for(i in 1:(nrow(x))) {
      words <- x[i, ][str_detect(string = x[i, ], pattern = "^[a-zA-Z]+$")]
      phrase <- ""


      for(j in 1:length(words)){
        phrase <- paste(phrase, words[j], sep = " ")
      }

      phrase <- str_remove(string = phrase, pattern = " ")
      labs <- c(labs, phrase)
      numbers <- x[i, ][str_detect(string = x[i, ],
                                   pattern = "^(\\d{1}|\\d{2}|\\d{3}|\\d{4}|\\d{5}|\\d{6}|\\d{7}|\\d{8}|\\d{9}|\\d{10})$")]
      numbers <- as.numeric(numbers)
      df[i, ] <- numbers
    }

    tab3 <- data.frame(df[ , 1], labs, df[ , 2], tab3[ , 3:7])

    categoryCode <- rep(26, nrow(tab3))
    category <- rep("Grants", nrow(tab3))

    tab3 <- data.frame(categoryCode, category, tab3)

    tab3$category <- str_to_title(tab3$category)

    names(tab3) <- c("categoryCode", "category",
                     "itemCode", "item",
                     "actual_2016_2017",
                     "budget_2017_2018", "outturn_2017_2018",
                     "budget_2018_2019", "projection_2019_2020",
                     "projection_2020_2021")

    tab3[ , 4] <- as.character(tab3[ , 4])
    tab3[ , 5] <- as.numeric(as.character(tab3[ , 5]))
    tab3[ , 6] <- as.numeric(as.character(tab3[ , 6]))
    tab3[ , 7] <- as.numeric(as.character(tab3[ , 7]))
    tab3[ , 8] <- as.numeric(as.character(tab3[ , 8]))
    tab3[ , 9] <- as.numeric(as.character(tab3[ , 9]))
    tab3[ , 10] <- as.numeric(as.character(tab3[ , 10]))

    ################################################################################

    tab4 <- health2[[4]][6:24, ]

    tab4[1, 1] <- paste(tab4[1, 1], tab4[2, 1], sep = " ")

    tab4 <- tab4[c(1, 3:19), ]

    tab4[ , 1] <- str_replace_all(string = tab4[ , 1], pattern = " – ", replacement = " ")
    tab4[ , 1] <- str_replace_all(string = tab4[ , 1], pattern = "–", replacement = " ")
    tab4[ , 1] <- str_replace_all(string = tab4[ , 1], pattern = "-", replacement = " ")
    tab4[ , 1] <- str_replace_all(string = tab4[ , 1], pattern = "[[:punct:]]", replacement = "")
    tab4[ , 2] <- str_replace_all(string = tab4[ , 2], pattern = "[[:punct:]]", replacement = "")
    tab4[ , 3] <- str_replace_all(string = tab4[ , 3], pattern = "[[:punct:]]", replacement = "")
    tab4[ , 4] <- str_replace_all(string = tab4[ , 4], pattern = "[[:punct:]]", replacement = "")
    tab4[ , 5] <- str_replace_all(string = tab4[ , 5], pattern = "[[:punct:]]", replacement = "")
    tab4[ , 6] <- str_replace_all(string = tab4[ , 6], pattern = "[[:punct:]]", replacement = "")

    y <- str_split_fixed(string = tab4[ , 6], pattern = " ", n = 2)

    tab4 <- cbind(tab4[ , 1:5], y)

    x <- str_split_fixed(string = tab4[ , 1], pattern = " ", n = 8)

    labs <- NULL
    df <- matrix(data = NA, nrow = nrow(x), ncol = 2)

    for(i in 1:(nrow(x))) {
      words <- x[i, ][str_detect(string = x[i, ], pattern = "^[a-zA-Z]+$")]
      phrase <- ""


      for(j in 1:length(words)){
        phrase <- paste(phrase, words[j], sep = " ")
      }

      phrase <- str_remove(string = phrase, pattern = " ")
      labs <- c(labs, phrase)
      numbers <- x[i, ][str_detect(string = x[i, ],
                                   pattern = "^(\\d{1}|\\d{2}|\\d{3}|\\d{4}|\\d{5}|\\d{6}|\\d{7}|\\d{8}|\\d{9}|\\d{10})$")]
      numbers <- as.numeric(numbers)
      df[i, ] <- numbers
    }

    tab4 <- data.frame(df[ , 1], labs, df[ , 2], tab4[ , 3:7])
    tab4 <- tab4[c(1:16, 18), ]

    categoryCode <- c(rep(26, 16), 31)
    category <- c(rep("Grants", 16), labs[17])

    tab4 <- data.frame(categoryCode, category, tab4)

    tab4$category <- str_to_title(tab4$category)

    names(tab4) <- c("categoryCode", "category",
                     "itemCode", "item",
                     "actual_2016_2017",
                     "budget_2017_2018", "outturn_2017_2018",
                     "budget_2018_2019", "projection_2019_2020",
                     "projection_2020_2021")

    tab4[ , 4] <- as.character(tab4[ , 4])
    tab4[ , 5] <- as.numeric(as.character(tab4[ , 5]))
    tab4[ , 6] <- as.numeric(as.character(tab4[ , 6]))
    tab4[ , 7] <- as.numeric(as.character(tab4[ , 7]))
    tab4[ , 8] <- as.numeric(as.character(tab4[ , 8]))
    tab4[ , 9] <- as.numeric(as.character(tab4[ , 9]))
    tab4[ , 10] <- as.numeric(as.character(tab4[ , 10]))

    ################################################################################

    mohHealthEcon2018 <- data.frame(rbind(tab1, tab2, tab3, tab4))
    usethis::use_data(mohHealthEcon2018, overwrite = TRUE)

    ################################################################################

    tab5 <- health2[[4]][30:44, ]

    tab5[ , 1] <- str_replace_all(string = tab5[ , 1], pattern = "-", replacement = " ")
    tab5[ , 1] <- str_replace_all(string = tab5[ , 1], pattern = "[[:punct:]]", replacement = "")
    tab5[ , 2] <- str_replace_all(string = tab5[ , 2], pattern = "[[:punct:]]", replacement = "")
    tab5[ , 3] <- str_replace_all(string = tab5[ , 3], pattern = "[[:punct:]]", replacement = "")
    tab5[ , 4] <- str_replace_all(string = tab5[ , 4], pattern = "[[:punct:]]", replacement = "")
    tab5[ , 5] <- str_replace_all(string = tab5[ , 5], pattern = "[[:punct:]]", replacement = "")
    tab5[ , 6] <- str_replace_all(string = tab5[ , 6], pattern = "[[:punct:]]", replacement = "")

    x <- str_split_fixed(string = tab5[ , 1], pattern = " ", n = 5)
    y <- str_split_fixed(string = tab5[ , 6], pattern = " ", n = 2)

    tab5 <- cbind(tab5[ , 1:5], y)

    labs <- NULL
    df <- matrix(data = NA, nrow = nrow(x), ncol = 2)

    for(i in 1:(nrow(x))) {
      words <- x[i, ][str_detect(string = x[i, ], pattern = "^[a-zA-Z]+$")]
      phrase <- ""


      for(j in 1:length(words)){
        phrase <- paste(phrase, words[j], sep = " ")
      }

      phrase <- str_remove(string = phrase, pattern = " ")
      labs <- c(labs, phrase)
      numbers <- x[i, ][str_detect(string = x[i, ],
                                   pattern = "^(\\d{1}|\\d{2}|\\d{3}|\\d{4}|\\d{5}|\\d{6}|\\d{7}|\\d{8}|\\d{9}|\\d{10})$")]
      numbers <- as.numeric(numbers)
      df[i, ] <- numbers
    }

    tab5 <- data.frame(df[ , 1], labs, df[ , 2], tab5[ , 3:7])

    tab5$labs <- str_to_title(tab5$labs)

    names(tab5) <- c("countyCode", "county",
                     "actual_2016_2017",
                     "budget_2017_2018", "outturn_2017_2018",
                     "budget_2018_2019", "projection_2019_2020",
                     "projection_2020_2021")

    tab5[ , 4] <- as.numeric(as.character(tab5[ , 4]))
    tab5[ , 5] <- as.numeric(as.character(tab5[ , 5]))
    tab5[ , 6] <- as.numeric(as.character(tab5[ , 6]))
    tab5[ , 7] <- as.numeric(as.character(tab5[ , 7]))
    tab5[ , 8] <- as.numeric(as.character(tab5[ , 8]))

    ################################################################################

    mohHealthCounty2018 <- tab5
    usethis::use_data(mohHealthCounty2018, overwrite = TRUE)

    ################################################################################

    tab6 <- health2[[5]][14:48, ]

    tab6[ , 1] <- str_replace_all(string = tab6[ , 1], pattern = " - ", replacement = " ")
    tab6[ , 1] <- str_replace_all(string = tab6[ , 1], pattern = "-", replacement = " ")
    tab6[ , 1] <- str_replace_all(string = tab6[ , 1], pattern = "[[:punct:]]", replacement = "")
    tab6[ , 2] <- str_replace_all(string = tab6[ , 2], pattern = "[[:punct:]]", replacement = "")
    tab6[ , 3] <- str_replace_all(string = tab6[ , 3], pattern = "[[:punct:]]", replacement = "")
    tab6[ , 4] <- str_replace_all(string = tab6[ , 4], pattern = "[[:punct:]]", replacement = "")

    tab6[7, 1] <- paste(tab6[7, 1], tab6[8, 1], sep = " ")
    tab6[9, 1] <- paste(tab6[9, 1], tab6[10, 1], sep = " ")
    tab6[11, 1] <- paste(tab6[11, 1], tab6[12, 1], sep = " ")
    tab6[16, 1] <- paste(tab6[16, 1], tab6[17, 1], sep = " ")
    tab6[23, 1] <- paste(tab6[23, 1], tab6[24, 1], sep = " ")
    tab6[28, 1] <- paste(tab6[28, 1], tab6[29, 1], sep = " ")
    tab6[31, 1] <- paste(tab6[31, 1], tab6[32, 1], sep = " ")

    tab6 <- tab6[c(1:7, 9, 11, 13:16, 18:23, 25:28, 30:31, 33:35), ]

    tab6 <- paste(tab6[ , 1], tab6[ , 2], tab6[ , 3], tab6[ , 4], sep = " ")

    x <- str_split_fixed(string = tab6, pattern = " ", n = 13)

    labs <- NULL
    df <- matrix(data = NA, nrow = nrow(x), ncol = 7)

    for(i in 1:(nrow(x))) {
      words <- x[i, ][str_detect(string = x[i, ], pattern = "^[a-zA-Z]+$")]
      phrase <- ""


      for(j in 1:length(words)){
        phrase <- paste(phrase, words[j], sep = " ")
      }

      phrase <- str_remove(string = phrase, pattern = " ")
      labs <- c(labs, phrase)
      numbers <- x[i, ][str_detect(string = x[i, ],
                                   pattern = "^(\\d{1}|\\d{2}|\\d{3}|\\d{4}|\\d{5}|\\d{6}|\\d{7}|\\d{8}|\\d{9}|\\d{10})$")]
      numbers <- as.numeric(numbers)
      df[i, ] <- numbers
    }

    tab6a <- cbind(df[ , 1], labs, df[ , 2:7])

    tab6 <- health2[[6]][5:50, ]

    tab6[ , 1] <- str_replace_all(string = tab6[ , 1], pattern = " - ", replacement = " ")
    tab6[ , 1] <- str_replace_all(string = tab6[ , 1], pattern = "-", replacement = " ")
    tab6[ , 1] <- str_replace_all(string = tab6[ , 1], pattern = "[[:punct:]]", replacement = "")
    tab6[ , 2] <- str_replace_all(string = tab6[ , 2], pattern = "[[:punct:]]", replacement = "")
    tab6[ , 3] <- str_replace_all(string = tab6[ , 3], pattern = "[[:punct:]]", replacement = "")
    tab6[ , 4] <- str_replace_all(string = tab6[ , 4], pattern = "[[:punct:]]", replacement = "")
    tab6[ , 5] <- str_replace_all(string = tab6[ , 5], pattern = "[[:punct:]]", replacement = "")
    tab6[ , 6] <- str_replace_all(string = tab6[ , 6], pattern = "[[:punct:]]", replacement = "")

    tab6[4, 1] <- paste(tab6[4, 1], tab6[5, 1], sep = " ")
    tab6[11, 1] <- paste(tab6[11, 1], tab6[12, 1], sep = " ")
    tab6[14, 1] <- paste(tab6[14, 1], tab6[15, 1], sep = " ")
    tab6[19, 1] <- paste(tab6[19, 1], tab6[20, 1], sep = " ")
    tab6[22, 1] <- paste(tab6[22, 1], tab6[23, 1], sep = " ")
    tab6[24, 1] <- paste(tab6[24, 1], tab6[25, 1], sep = " ")
    tab6[27, 1] <- paste(tab6[27, 1], tab6[28, 1], sep = " ")
    tab6[33, 1] <- paste(tab6[33, 1], tab6[34, 1], sep = " ")
    tab6[35, 1] <- paste(tab6[35, 1], tab6[36, 1], sep = " ")
    tab6[37, 1] <- paste(tab6[37, 1], tab6[38, 1], sep = " ")
    tab6[40, 1] <- paste(tab6[40, 1], tab6[41, 1], sep = " ")
    tab6[44, 1] <- paste(tab6[44, 1], tab6[45, 1], sep = " ")

    tab6 <- tab6[c(1:4, 6:11, 13:14, 16:19, 21:22, 24, 26:27, 29:33, 35, 37, 39:40, 42:44, 46), ]

    tab6 <- paste(tab6[ , 1], tab6[ , 2], tab6[ , 3], tab6[ , 4], tab6[ , 5], tab6[ , 6], sep = " ")

    x <- str_split_fixed(string = tab6, pattern = " ", n = 13)

    labs <- NULL
    df <- matrix(data = NA, nrow = nrow(x), ncol = 7)

    for(i in 1:(nrow(x))) {
      words <- x[i, ][str_detect(string = x[i, ], pattern = "^[a-zA-Z]+$")]
      phrase <- ""


      for(j in 1:length(words)){
        phrase <- paste(phrase, words[j], sep = " ")
      }

      phrase <- str_remove(string = phrase, pattern = " ")
      labs <- c(labs, phrase)
      numbers <- x[i, ][str_detect(string = x[i, ],
                                   pattern = "^(\\d{1}|\\d{2}|\\d{3}|\\d{4}|\\d{5}|\\d{6}|\\d{7}|\\d{8}|\\d{9}|\\d{10})$")]
      numbers <- as.numeric(numbers)
      df[i, ] <- numbers
    }

    tab6b <- cbind(df[ , 1], labs, df[ , 2:7])

    tab6 <- health2[[7]][5:48, ]

    tab6[ , 1] <- str_replace_all(string = tab6[ , 1], pattern = " - ", replacement = " ")
    tab6[ , 1] <- str_replace_all(string = tab6[ , 1], pattern = "-", replacement = " ")
    tab6[ , 1] <- str_replace_all(string = tab6[ , 1], pattern = "[[:punct:]]", replacement = "")
    tab6[ , 2] <- str_replace_all(string = tab6[ , 2], pattern = "[[:punct:]]", replacement = "")
    tab6[ , 3] <- str_replace_all(string = tab6[ , 3], pattern = "[[:punct:]]", replacement = "")
    tab6[ , 4] <- str_replace_all(string = tab6[ , 4], pattern = "[[:punct:]]", replacement = "")
    tab6[ , 5] <- str_replace_all(string = tab6[ , 5], pattern = "[[:punct:]]", replacement = "")
    tab6[ , 6] <- str_replace_all(string = tab6[ , 6], pattern = "[[:punct:]]", replacement = "")

    tab6[1, 1] <- paste(tab6[1, 1], tab6[2, 1], sep = " ")
    tab6[3, 1] <- paste(tab6[3, 1], tab6[4, 1], sep = " ")
    tab6[6, 1] <- paste(tab6[6, 1], tab6[7, 1], sep = " ")
    tab6[12, 1] <- paste(tab6[12, 1], tab6[13, 1], sep = " ")
    tab6[16, 1] <- paste(tab6[16, 1], tab6[17, 1], sep = " ")
    tab6[19, 1] <- paste(tab6[19, 1], tab6[20, 1], sep = " ")
    tab6[28, 1] <- paste(tab6[28, 1], tab6[29, 1], sep = " ")
    tab6[30, 1] <- paste(tab6[30, 1], tab6[31, 1], sep = " ")
    tab6[36, 1] <- paste(tab6[36, 1], tab6[37, 1], sep = " ")

    tab6 <- tab6[c(1, 3, 5:6, 8:12, 14:16, 18:19, 21:28, 30, 32:36, 38:44), ]

    tab6 <- paste(tab6[ , 1], tab6[ , 2], tab6[ , 3], tab6[ , 4], tab6[ , 5], tab6[ , 6], sep = " ")

    x <- str_split_fixed(string = tab6, pattern = " ", n = 14)

    labs <- NULL
    df <- matrix(data = NA, nrow = nrow(x), ncol = 7)

    for(i in 1:(nrow(x))) {
      words <- x[i, ][str_detect(string = x[i, ], pattern = "^[a-zA-Z]+$")]
      phrase <- ""


      for(j in 1:length(words)){
        phrase <- paste(phrase, words[j], sep = " ")
      }

      phrase <- str_remove(string = phrase, pattern = " ")
      labs <- c(labs, phrase)
      numbers <- x[i, ][str_detect(string = x[i, ],
                                   pattern = "^(\\d{1}|\\d{2}|\\d{3}|\\d{4}|\\d{5}|\\d{6}|\\d{7}|\\d{8}|\\d{9}|\\d{10})$")]
      numbers <- as.numeric(numbers)
      df[i, ] <- numbers
    }

    tab6c <- cbind(df[ , 1], labs, df[ , 2:7])

    tab6 <- rbind(tab6a, tab6b, tab6c)
    tab6 <- tab6[c(3:4, 6:16, 18:30, 32:97), ]

    departmentCode <- rep(100, nrow(tab6))
    department <- rep("Curative Servives", nrow(tab6))

    categoryCode <- c(rep(21, 2), rep(22, 11), rep(25, 13), rep(26, 66))
    category <- c(rep("Compensation Of Employees", 2),
                  rep("Use Of Goods And Services", 11),
                  rep("Subsidy", 13),
                  rep("Grants", 66))

    tab6 <- data.frame(departmentCode, department, categoryCode, category, tab6)

    tab6$department <- str_to_title(string = tab6$department)
    tab6$category <- str_to_title(string = tab6$category)

    names(tab6) <- c("departmentCode", "department",
                     "categoryCode", "category",
                     "itemCode", "item",
                     "actual_2016_2017",
                     "budget_2017_2018", "outturn_2017_2018",
                     "budget_2018_2019", "projection_2019_2020",
                     "projection_2020_2021")

    tab6[ , 5] <- as.numeric(as.character(tab6[ , 5]))
    tab6[ , 6] <- as.character(tab6[ , 6])
    tab6[ , 7] <- as.numeric(as.character(tab6[ , 7]))
    tab6[ , 8] <- as.numeric(as.character(tab6[ , 8]))
    tab6[ , 9] <- as.numeric(as.character(tab6[ , 9]))
    tab6[ , 10] <- as.numeric(as.character(tab6[ , 10]))
    tab6[ , 11] <- as.numeric(as.character(tab6[ , 11]))
    tab6[ , 12] <- as.numeric(as.character(tab6[ , 12]))

    ################################################################################

    mohHealthCurative2018 <- tab6
    usethis::use_data(mohHealthCurative2018, overwrite = TRUE)

    ################################################################################

    tab7 <- health2[[8]][12:28, ]

    tab7[ , 1] <- str_replace_all(string = tab7[ , 1], pattern = " - ", replacement = " ")
    tab7[ , 1] <- str_replace_all(string = tab7[ , 1], pattern = "-", replacement = " ")
    tab7[ , 1] <- str_replace_all(string = tab7[ , 1], pattern = "[[:punct:]]", replacement = "")
    tab7[ , 2] <- str_replace_all(string = tab7[ , 2], pattern = "[[:punct:]]", replacement = "")
    tab7[ , 3] <- str_replace_all(string = tab7[ , 3], pattern = "[[:punct:]]", replacement = "")
    tab7[ , 4] <- str_replace_all(string = tab7[ , 4], pattern = "[[:punct:]]", replacement = "")
    tab7[ , 5] <- str_replace_all(string = tab7[ , 5], pattern = "[[:punct:]]", replacement = "")
    tab7[ , 6] <- str_replace_all(string = tab7[ , 6], pattern = "[[:punct:]]", replacement = "")

    tab7[5, 1] <- paste(tab7[5, 1], tab7[6, 1], sep = " ")
    tab7[10, 1] <- paste(tab7[10, 1], tab7[11, 1], sep = " ")
    tab7[16, 1] <- paste(tab7[16, 1], tab7[17, 1], sep = " ")

    tab7 <- tab7[c(1:5, 7:10, 12:16), ]

    tab7 <- paste(tab7[ , 1], tab7[ , 2], tab7[ , 3], tab7[ , 4], tab7[ , 5], tab7[ , 6], sep = " ")

    x <- str_split_fixed(string = tab7, pattern = " ", n = 13)

    labs <- NULL
    df <- matrix(data = NA, nrow = nrow(x), ncol = 7)

    for(i in 1:(nrow(x))) {
      words <- x[i, ][str_detect(string = x[i, ], pattern = "^[a-zA-Z]+$")]
      phrase <- ""


      for(j in 1:length(words)){
        phrase <- paste(phrase, words[j], sep = " ")
      }

      phrase <- str_remove(string = phrase, pattern = " ")
      labs <- c(labs, phrase)
      numbers <- x[i, ][str_detect(string = x[i, ],
                                   pattern = "^(\\d{1}|\\d{2}|\\d{3}|\\d{4}|\\d{5}|\\d{6}|\\d{7}|\\d{8}|\\d{9}|\\d{10})$")]
      #numbers <- as.numeric(numbers)
      df[i, ] <- numbers
    }

    tab7 <- cbind(df[ , 1], labs, df[ , 2:7])
    tab7 <- tab7[c(3, 5:12, 14), ]

    departmentCode <- rep(200, nrow(tab7))
    department <- rep(labs[1], nrow(tab7))

    categoryCode <- c(21, rep(22, 8), 26)
    category <- c(labs[2], rep(labs[4], 8), labs[13])

    tab7 <- data.frame(departmentCode, department, categoryCode, category, tab7)

    tab7$department <- str_to_title(string = tab7$department)
    tab7$category <- str_to_title(string = tab7$category)

    names(tab7) <- c("departmentCode", "department",
                     "categoryCode", "category",
                     "itemCode", "item",
                     "actual_2016_2017",
                     "budget_2017_2018", "outturn_2017_2018",
                     "budget_2018_2019", "projection_2019_2020",
                     "projection_2020_2021")

    tab7[ , 5] <- as.numeric(as.character(tab7[ , 5]))
    tab7[ , 6] <- as.character(tab7[ , 6])
    tab7[ , 7] <- as.numeric(as.character(tab7[ , 7]))
    tab7[ , 8] <- as.numeric(as.character(tab7[ , 8]))
    tab7[ , 9] <- as.numeric(as.character(tab7[ , 9]))
    tab7[ , 10] <- as.numeric(as.character(tab7[ , 10]))
    tab7[ , 11] <- as.numeric(as.character(tab7[ , 11]))
    tab7[ , 12] <- as.numeric(as.character(tab7[ , 12]))

    ################################################################################

    mohHealthPreventive2018 <- tab7
    usethis::use_data(mohHealthPreventive2018, overwrite = TRUE)

    ################################################################################

    tab8 <- health2[[9]][10:30, ]

    tab8[1, 1] <- paste(tab8[1, 1], tab8[2, 1], sep = " ")
    tab8[7, 1] <- paste(tab8[7, 1], tab8[8, 1], sep = " ")
    tab8[9, 1] <- paste(tab8[9, 1], tab8[10, 1], sep = " ")
    tab8[11, 1] <- paste(tab8[11, 1], tab8[12, 1], sep = " ")
    tab8[16, 1] <- paste(tab8[16, 1], tab8[17, 1], sep = " ")

    tab8 <- tab8[c(1, 3:7, 9, 11, 13:16, 18:21), ]

    tab8[ , 1] <- str_replace_all(string = tab8[ , 1], pattern = "-", replacement = " ")
    tab8[ , 1] <- str_replace_all(string = tab8[ , 1], pattern = "[[:punct:]]", replacement = "")
    tab8[ , 2] <- str_replace_all(string = tab8[ , 2], pattern = "[[:punct:]]", replacement = "")
    tab8[ , 3] <- str_replace_all(string = tab8[ , 3], pattern = "[[:punct:]]", replacement = "")
    tab8[ , 4] <- str_replace_all(string = tab8[ , 4], pattern = "[[:punct:]]", replacement = "")
    tab8[ , 5] <- str_replace_all(string = tab8[ , 5], pattern = "[[:punct:]]", replacement = "")
    tab8[ , 6] <- str_replace_all(string = tab8[ , 6], pattern = "[[:punct:]]", replacement = "")

    tab8 <- paste(tab8[ , 1], tab8[ , 2], tab8[ , 3], tab8[ , 4], tab8[ , 5], tab8[ , 6], sep = " ")

    x <- str_split_fixed(string = tab8, pattern = " ", n = 14)

    labs <- NULL
    df <- matrix(data = NA, nrow = nrow(x), ncol = 7)

    for(i in 1:(nrow(x))) {
      words <- x[i, ][str_detect(string = x[i, ], pattern = "^[a-zA-Z]+$")]
      phrase <- ""


      for(j in 1:length(words)){
        phrase <- paste(phrase, words[j], sep = " ")
      }

      phrase <- str_remove(string = phrase, pattern = " ")
      labs <- c(labs, phrase)
      numbers <- x[i, ][str_detect(string = x[i, ],
                                   pattern = "^(\\d{1}|\\d{2}|\\d{3}|\\d{4}|\\d{5}|\\d{6}|\\d{7}|\\d{8}|\\d{9}|\\d{10})$")]
      numbers <- as.numeric(numbers)
      df[i, ] <- numbers
    }

    tab8 <- cbind(df[ , 1], labs, df[ , 2:7])
    tab8 <- tab8[c(3, 5:14, 16), ]

    departmentCode <- rep(400, nrow(tab8))
    department <- rep(labs[1], nrow(tab8))

    categoryCode <- c(21, rep(22, 10), 26)
    category <- c(labs[2], rep(labs[4], 10), labs[15])

    tab8 <- data.frame(departmentCode, department, categoryCode, category, tab8)

    tab8$department <- str_to_title(string = tab8$department)
    tab8$category <- str_to_title(string = tab8$category)

    names(tab8) <- c("departmentCode", "department",
                     "categoryCode", "category",
                     "itemCode", "item",
                     "actual_2016_2017",
                     "budget_2017_2018", "outturn_2017_2018",
                     "budget_2018_2019", "projection_2019_2020",
                     "projection_2020_2021")

    tab8[ , 5] <- as.numeric(as.character(tab8[ , 5]))
    tab8[ , 6] <- as.character(tab8[ , 6])
    tab8[ , 7] <- as.numeric(as.character(tab8[ , 7]))
    tab8[ , 8] <- as.numeric(as.character(tab8[ , 8]))
    tab8[ , 9] <- as.numeric(as.character(tab8[ , 9]))
    tab8[ , 10] <- as.numeric(as.character(tab8[ , 10]))
    tab8[ , 11] <- as.numeric(as.character(tab8[ , 11]))
    tab8[ , 12] <- as.numeric(as.character(tab8[ , 12]))

    ################################################################################

    mohHealthPlanning2018 <- tab8
    usethis::use_data(mohHealthPlanning2018, overwrite = TRUE)

    ################################################################################

    tab9 <- health2[[10]][5:20, ]

    tab9[7, 1] <- paste(tab9[7, 1], tab9[8, 1], sep = " ")
    tab9[9, 1] <- paste(tab9[9, 1], tab9[10, 1], sep = " ")
    tab9[13, 1] <- paste(tab9[13, 1], tab9[14, 1], sep = " ")

    tab9 <- tab9[c(1:7, 9, 11:13, 15:16), ]

    tab9[ , 1] <- str_replace_all(string = tab9[ , 1], pattern = " - ", replacement = " ")
    tab9[ , 1] <- str_replace_all(string = tab9[ , 1], pattern = "-", replacement = " ")
    tab9[ , 1] <- str_replace_all(string = tab9[ , 1], pattern = "–", replacement = " ")
    tab9[ , 1] <- str_replace_all(string = tab9[ , 1], pattern = "[[:punct:]]", replacement = "")
    tab9[ , 2] <- str_replace_all(string = tab9[ , 2], pattern = "[[:punct:]]", replacement = "")
    tab9[ , 3] <- str_replace_all(string = tab9[ , 3], pattern = "[[:punct:]]", replacement = "")
    tab9[ , 4] <- str_replace_all(string = tab9[ , 4], pattern = "[[:punct:]]", replacement = "")
    tab9[ , 5] <- str_replace_all(string = tab9[ , 5], pattern = "[[:punct:]]", replacement = "")
    tab9[ , 6] <- str_replace_all(string = tab9[ , 6], pattern = "[[:punct:]]", replacement = "")

    tab9 <- paste(tab9[ , 1], tab9[ , 2], tab9[ , 3], tab9[ , 4], tab9[ , 5], tab9[ , 6], sep = " ")

    x <- str_split_fixed(string = tab9, pattern = " ", n = 13)

    labs <- NULL
    df <- matrix(data = NA, nrow = nrow(x), ncol = 7)

    for(i in 1:(nrow(x))) {
      words <- x[i, ][str_detect(string = x[i, ], pattern = "^[a-zA-Z]+$")]
      phrase <- ""


      for(j in 1:length(words)){
        phrase <- paste(phrase, words[j], sep = " ")
      }

      phrase <- str_remove(string = phrase, pattern = " ")
      labs <- c(labs, phrase)
      numbers <- x[i, ][str_detect(string = x[i, ],
                                   pattern = "^(\\d{1}|\\d{2}|\\d{3}|\\d{4}|\\d{5}|\\d{6}|\\d{7}|\\d{8}|\\d{9}|\\d{10})$")]
      numbers <- as.numeric(numbers)
      df[i, ] <- numbers
    }

    tab9 <- cbind(df[ , 1], labs, df[ , 2:7])
    tab9 <- tab9[c(3:4, 6:13), ]

    departmentCode <- rep(500, nrow(tab9))
    department <- rep(labs[1], nrow(tab9))

    categoryCode <- c(rep(21, 2), rep(22, 8))
    category <- c(rep(labs[2], 2), rep(labs[5], 8))

    tab9 <- data.frame(departmentCode, department, categoryCode, category, tab9)

    tab9$department <- str_to_title(string = tab9$department)
    tab9$category <- str_to_title(string = tab9$category)

    names(tab9) <- c("departmentCode", "department",
                     "categoryCode", "category",
                     "itemCode", "item",
                     "actual_2016_2017",
                     "budget_2017_2018", "outturn_2017_2018",
                     "budget_2018_2019", "projection_2019_2020",
                     "projection_2020_2021")

    tab9[ , 5] <- as.numeric(as.character(tab9[ , 5]))
    tab9[ , 6] <- as.character(tab9[ , 6])
    tab9[ , 7] <- as.numeric(as.character(tab9[ , 7]))
    tab9[ , 8] <- as.numeric(as.character(tab9[ , 8]))
    tab9[ , 9] <- as.numeric(as.character(tab9[ , 9]))
    tab9[ , 10] <- as.numeric(as.character(tab9[ , 10]))
    tab9[ , 11] <- as.numeric(as.character(tab9[ , 11]))
    tab9[ , 12] <- as.numeric(as.character(tab9[ , 12]))

    ################################################################################

    mohHealthVital2018 <- tab9
    usethis::use_data(mohHealthVital2018, overwrite = TRUE)

    ################################################################################

    tab10 <- rbind(health2[[10]][32:48, ], health2[[11]][5:17, ])

    tab10[1, 1] <- paste(tab10[1, 1], tab10[2, 1], sep = " ")
    tab10[9, 1] <- pas################################################################################
    #
    # Load required libraries
    #
    ################################################################################

    library(pdftools)
    library(tm)
    library(tabulizer)
    library(stringr)
    library(tidyverse)
    library(tidytext)

    options(scipen = 999)


    ################################################################################
    #
    # Function to process tables
    #
    ################################################################################

    get_table <- function(tab, nrow = NULL, ncol = NULL) {
      labs <- NULL
      df <- matrix(data = NA, nrow = nrow, ncol = ncol)

      for(i in 1:(length(tab))) {
        words <- tab[[i]][str_detect(string = tab[[i]], pattern = "^[a-zA-Z]+$")]
        phrase <- ""

        for(j in 1:length(words)){
          phrase <- paste(phrase, words[j], sep = " ")
        }

        phrase <- str_remove(string = phrase, pattern = " ")
        labs <- c(labs, phrase)
        numbers <- tab[[i]][str_detect(string = tab[[i]],
                                       pattern = "^(\\d{6}|\\d{7}|\\d{8})$")]
        numbers <- as.numeric(numbers)
        df[i, ] <- numbers
      }
    }



    ################################################################################
    #
    # Create list for information and tables in page 253 (2018)
    #
    ################################################################################

    ## Extract tables from page 253
    health1 <- extract_tables(file = "data-raw/budget/2018 Ministry of Finance and Development Planning.pdf",
                              pages = 253,
                              method = "decide")

    #goal <- health1[[1]][3, ]
    #strategicObjective <- str_c(health1[[1]][5, ], health1[[1]][6, ], health1[[1]][7, ], sep = " ")

    ## Extract first table in page 253
    tab1 <- health1[[1]][27:31, ]
    tab1 <- paste(tab1[ , 1], tab1[ , 2], sep = " ")

    tab1 <- str_replace_all(string = tab1,  pattern = "[[:punct:]]", replace = "")
    tab1 <- str_split_fixed(string = tab1, pattern = " ", n = 12)

    ## Extract relevant fields to re-create first table in page 255 and create data.frame
    economic_classification <- NULL
    df <- matrix(data = NA, nrow = 5, ncol = 7)

    for(i in 1:(nrow(tab1))) {
      words <- tab1[i, ][str_detect(string = tab1[i, ], pattern = "^[a-zA-Z]+$")]
      phrase <- ""

      for(j in 1:length(words)){
        phrase <- paste(phrase, words[j], sep = " ")
      }

      phrase <- str_remove(string = phrase, pattern = " ")
      economic_classification <- c(economic_classification, phrase)
      numbers <- tab1[i, ][str_detect(string = tab1[i,],
                                      pattern = "^(\\d{1}|\\d{2}|\\d{3}|\\d{4}|\\d{5}|\\d{6}|\\d{7}|\\d{8}|\\d{9}|\\d{10})$")]
      numbers <- as.numeric(numbers)
      df[i, ] <- numbers
    }

    df <- data.frame(df[ , 1], economic_classification, df[ , 2:7])
    names(df) <- c("code", "economic_classification",
                   "actual_2016_2017",
                   "budget_2017_2018", "outturn_2017_2018",
                   "budget_2018_2019", "projection_2019_2020",
                   "projection_2020_2021")

    df$economic_classification <- str_to_sentence(df$economic_classification)

    summaryEconHealth2018 <- df

    ## Extract second table in page 255
    tab2 <- health1[[1]][37:46, ]
    tab2 <- paste(tab2[ , 1], tab2[ , 2], sep = " ")
    tab2 <- str_replace_all(string = tab2,  pattern = "[[:punct:]]", replace = "")

    tab2 <- str_split_fixed(string = tab2, pattern = " ", n = 12)

    ## Extract relevant fields to re-create first table in page 255 and create data.frame
    spending_entity <- NULL
    df <- matrix(data = NA, nrow = 10, ncol = 7)

    for(i in 1:nrow(tab2)) {
      words <- tab2[i, ][str_detect(string = tab2[i, ], pattern = "^[a-zA-Z]+$")]
      phrase <- ""

      for(j in 1:length(words)){
        phrase <- paste(phrase, words[j], sep = " ")
      }

      phrase <- str_remove(string = phrase, pattern = " ")
      spending_entity <- c(spending_entity, phrase)
      numbers <- tab2[i, ][str_detect(string = tab2[i, ],
                                      pattern = "^(\\d{1}|\\d{2}|\\d{3}|\\d{4}|\\d{5}|\\d{6}|\\d{7}|\\d{8}|\\d{9}|\\d{10})$")]
      numbers <- as.numeric(numbers)
      df[i, ] <- numbers
    }

    df <- data.frame(df[ , 1], spending_entity, df[ , 2:7])
    names(df) <- c("code", "spending_entity",
                   "actual_2016_2017",
                   "budget_2017_2018", "outturn_2017_2018",
                   "budget_2018_2019", "projection_2019_2020",
                   "projection_2020_2021")

    df$spending_entity <- str_to_title(df$spending_entity)

    summarySpendingHealth2018 <- df

    summaryHealth2018 <- list(summaryEconHealth2018, summarySpendingHealth2018)

    names(summaryHealth2018) <- c("summaryEconHealth2018", "summarySpendingHealth2018")

    usethis::use_data(summaryHealth2018, overwrite = TRUE)

    ## Tidy-up
    rm(words, phrase, numbers, economic_classification, tab1, tab2, df,
       summaryEconHealth2018, summarySpendingHealth2018, health1)

    ################################################################################
    #
    #
    #
    ################################################################################

    ## Extract tables from page 25-266
    health2 <- extract_tables(file = "data-raw/budget/2018 Ministry of Finance and Development Planning.pdf",
                              pages = 254:266,
                              method = "decide")

    tab1 <- health2[[1]][14:48, ]

    tab1[ , 1] <- str_replace_all(string = tab1[ , 1], pattern = " – ", replacement = " ")
    tab1[ , 1] <- str_replace_all(string = tab1[ , 1], pattern = "–", replacement = " ")
    tab1[ , 1] <- str_replace_all(string = tab1[ , 1], pattern = "-", replacement = " ")
    tab1[ , 1] <- str_replace_all(string = tab1[ , 1], pattern = "[[:punct:]]", replacement = "")
    tab1[ , 2] <- str_replace_all(string = tab1[ , 2], pattern = "[[:punct:]]", replacement = "")
    tab1[ , 3] <- str_replace_all(string = tab1[ , 3], pattern = "[[:punct:]]", replacement = "")
    tab1[ , 4] <- str_replace_all(string = tab1[ , 4], pattern = "[[:punct:]]", replacement = "")
    tab1[ , 5] <- str_replace_all(string = tab1[ , 5], pattern = "[[:punct:]]", replacement = "")
    tab1[ , 6] <- str_replace_all(string = tab1[ , 6], pattern = "[[:punct:]]", replacement = "")

    tab1[7, 1] <- paste(tab1[7, 1], tab1[8, 1], sep = " ")
    tab1[11, 1] <- paste(tab1[11, 1], tab1[12, 1], sep = " ")

    tab1 <- tab1[c(1:7, 9:11, 13:35), ]

    y <- str_split_fixed(string = tab1[ , 6], pattern = " ", n = 2)

    tab1 <- cbind(tab1[ , 1:5], y)

    x <- str_split_fixed(string = tab1[ , 1], pattern = " ", n = 8)

    labs <- NULL
    df <- matrix(data = NA, nrow = nrow(x), ncol = 2)

    for(i in 1:(nrow(x))) {
      words <- x[i, ][str_detect(string = x[i, ], pattern = "^[a-zA-Z]+$")]
      phrase <- ""


      for(j in 1:length(words)){
        phrase <- paste(phrase, words[j], sep = " ")
      }

      phrase <- str_remove(string = phrase, pattern = " ")
      labs <- c(labs, phrase)
      numbers <- x[i, ][str_detect(string = x[i, ],
                                   pattern = "^(\\d{1}|\\d{2}|\\d{3}|\\d{4}|\\d{5}|\\d{6}|\\d{7}|\\d{8}|\\d{9}|\\d{10})$")]
      numbers <- as.numeric(numbers)
      df[i, ] <- numbers
    }

    tab1 <- data.frame(df[ , 1], labs, df[ , 2], tab1[ , 3:7])
    tab1 <- tab1[c(2:4, 6:26, 28:33), ]

    categoryCode <- c(rep(21, 3), rep(22, 21), rep(25, 6))
    category <- c(rep(labs[1], 3), rep(labs[5], 21), rep(labs[27], 6))

    tab1 <- data.frame(categoryCode, category, tab1)

    tab1$category <- str_to_title(tab1$category)

    names(tab1) <- c("categoryCode", "category",
                     "itemCode", "item",
                     "actual_2016_2017",
                     "budget_2017_2018", "outturn_2017_2018",
                     "budget_2018_2019", "projection_2019_2020",
                     "projection_2020_2021")

    tab1[ , 4] <- as.character(tab1[ , 4])
    tab1[ , 5] <- as.numeric(as.character(tab1[ , 5]))
    tab1[ , 6] <- as.numeric(as.character(tab1[ , 6]))
    tab1[ , 7] <- as.numeric(as.character(tab1[ , 7]))
    tab1[ , 8] <- as.numeric(as.character(tab1[ , 8]))
    tab1[ , 9] <- as.numeric(as.character(tab1[ , 9]))
    tab1[ , 10] <- as.numeric(as.character(tab1[ , 10]))


    ################################################################################

    tab2 <- health2[[2]][6:48, ]

    tab2[ , 1] <- str_replace_all(string = tab2[ , 1], pattern = " – ", replacement = " ")
    tab2[ , 1] <- str_replace_all(string = tab2[ , 1], pattern = "–", replacement = " ")
    tab2[ , 1] <- str_replace_all(string = tab2[ , 1], pattern = "-", replacement = " ")
    tab2[ , 1] <- str_replace_all(string = tab2[ , 1], pattern = "[[:punct:]]", replacement = "")
    tab2[ , 2] <- str_replace_all(string = tab2[ , 2], pattern = "[[:punct:]]", replacement = "")
    tab2[ , 3] <- str_replace_all(string = tab2[ , 3], pattern = "[[:punct:]]", replacement = "")
    tab2[ , 4] <- str_replace_all(string = tab2[ , 4], pattern = "[[:punct:]]", replacement = "")
    tab2[ , 5] <- str_replace_all(string = tab2[ , 5], pattern = "[[:punct:]]", replacement = "")
    tab2[ , 6] <- str_replace_all(string = tab2[ , 6], pattern = "[[:punct:]]", replacement = "")

    tab2[9, 1] <- paste(tab2[9, 1], tab2[10, 1], sep = " ")
    tab2[15, 1] <- paste(tab2[15, 1], tab2[16, 1], sep = " ")
    tab2[31, 1] <- paste(tab2[31, 1], tab2[32, 1], sep = " ")
    tab2[36, 1] <- paste(tab2[36, 1], tab2[37, 1], sep = " ")

    tab2 <- tab2[c(1:9, 11:15, 17:31, 33:36, 38:43), ]

    y <- str_split_fixed(string = tab2[ , 6], pattern = " ", n = 2)

    tab2 <- cbind(tab2[ , 1:5], y)

    x <- str_split_fixed(string = tab2[ , 1], pattern = " ", n = 8)

    labs <- NULL
    df <- matrix(data = NA, nrow = nrow(x), ncol = 2)

    for(i in 1:(nrow(x))) {
      words <- x[i, ][str_detect(string = x[i, ], pattern = "^[a-zA-Z]+$")]
      phrase <- ""


      for(j in 1:length(words)){
        phrase <- paste(phrase, words[j], sep = " ")
      }

      phrase <- str_remove(string = phrase, pattern = " ")
      labs <- c(labs, phrase)
      numbers <- x[i, ][str_detect(string = x[i, ],
                                   pattern = "^(\\d{1}|\\d{2}|\\d{3}|\\d{4}|\\d{5}|\\d{6}|\\d{7}|\\d{8}|\\d{9}|\\d{10})$")]
      numbers <- as.numeric(numbers)
      df[i, ] <- numbers
    }

    tab2 <- data.frame(df[ , 1], labs, df[ , 2], tab2[ , 3:7])
    tab2 <- tab2[c(1:7, 9:39), ]

    categoryCode <- c(rep(25, 7), rep(26, 31))
    category <- c(rep("Subsidy", 7), rep(labs[8], 31))

    tab2 <- data.frame(categoryCode, category, tab2)

    tab2$category <- str_to_title(tab2$category)

    names(tab2) <- c("categoryCode", "category",
                     "itemCode", "item",
                     "actual_2016_2017",
                     "budget_2017_2018", "outturn_2017_2018",
                     "budget_2018_2019", "projection_2019_2020",
                     "projection_2020_2021")

    tab2[ , 4] <- as.character(tab2[ , 4])
    tab2[ , 5] <- as.numeric(as.character(tab2[ , 5]))
    tab2[ , 6] <- as.numeric(as.character(tab2[ , 6]))
    tab2[ , 7] <- as.numeric(as.character(tab2[ , 7]))
    tab2[ , 8] <- as.numeric(as.character(tab2[ , 8]))
    tab2[ , 9] <- as.numeric(as.character(tab2[ , 9]))
    tab2[ , 10] <- as.numeric(as.character(tab2[ , 10]))

    ################################################################################

    tab3 <- health2[[3]][6:46, ]

    tab3[ , 1] <- str_replace_all(string = tab3[ , 1], pattern = " – ", replacement = " ")
    tab3[ , 1] <- str_replace_all(string = tab3[ , 1], pattern = "–", replacement = " ")
    tab3[ , 1] <- str_replace_all(string = tab3[ , 1], pattern = "-", replacement = " ")
    tab3[ , 1] <- str_replace_all(string = tab3[ , 1], pattern = "[[:punct:]]", replacement = "")
    tab3[ , 2] <- str_replace_all(string = tab3[ , 2], pattern = "[[:punct:]]", replacement = "")
    tab3[ , 3] <- str_replace_all(string = tab3[ , 3], pattern = "[[:punct:]]", replacement = "")
    tab3[ , 4] <- str_replace_all(string = tab3[ , 4], pattern = "[[:punct:]]", replacement = "")
    tab3[ , 5] <- str_replace_all(string = tab3[ , 5], pattern = "[[:punct:]]", replacement = "")
    tab3[ , 6] <- str_replace_all(string = tab3[ , 6], pattern = "[[:punct:]]", replacement = "")

    tab3[1, 1] <- paste(tab3[1, 1], tab3[2, 1], sep = " ")
    tab3[14, 1] <- paste(tab3[14, 1], tab3[15, 1], sep = " ")
    tab3[23, 1] <- paste(tab3[23, 1], tab3[24, 1], sep = " ")
    tab3[41, 1] <- paste(tab3[41, 1], "County", sep = " ")

    tab3 <- tab3[c(1, 3:14, 16:23, 25:41), ]

    y <- str_split_fixed(string = tab3[ , 6], pattern = " ", n = 2)

    tab3 <- cbind(tab3[ , 1:5], y)

    x <- str_split_fixed(string = tab3[ , 1], pattern = " ", n = 8)

    labs <- NULL
    df <- matrix(data = NA, nrow = nrow(x), ncol = 2)

    for(i in 1:(nrow(x))) {
      words <- x[i, ][str_detect(string = x[i, ], pattern = "^[a-zA-Z]+$")]
      phrase <- ""


      for(j in 1:length(words)){
        phrase <- paste(phrase, words[j], sep = " ")
      }

      phrase <- str_remove(string = phrase, pattern = " ")
      labs <- c(labs, phrase)
      numbers <- x[i, ][str_detect(string = x[i, ],
                                   pattern = "^(\\d{1}|\\d{2}|\\d{3}|\\d{4}|\\d{5}|\\d{6}|\\d{7}|\\d{8}|\\d{9}|\\d{10})$")]
      numbers <- as.numeric(numbers)
      df[i, ] <- numbers
    }

    tab3 <- data.frame(df[ , 1], labs, df[ , 2], tab3[ , 3:7])

    categoryCode <- rep(26, nrow(tab3))
    category <- rep("Grants", nrow(tab3))

    tab3 <- data.frame(categoryCode, category, tab3)

    tab3$category <- str_to_title(tab3$category)

    names(tab3) <- c("categoryCode", "category",
                     "itemCode", "item",
                     "actual_2016_2017",
                     "budget_2017_2018", "outturn_2017_2018",
                     "budget_2018_2019", "projection_2019_2020",
                     "projection_2020_2021")

    tab3[ , 4] <- as.character(tab3[ , 4])
    tab3[ , 5] <- as.numeric(as.character(tab3[ , 5]))
    tab3[ , 6] <- as.numeric(as.character(tab3[ , 6]))
    tab3[ , 7] <- as.numeric(as.character(tab3[ , 7]))
    tab3[ , 8] <- as.numeric(as.character(tab3[ , 8]))
    tab3[ , 9] <- as.numeric(as.character(tab3[ , 9]))
    tab3[ , 10] <- as.numeric(as.character(tab3[ , 10]))

    ################################################################################

    tab4 <- health2[[4]][6:24, ]

    tab4[1, 1] <- paste(tab4[1, 1], tab4[2, 1], sep = " ")

    tab4 <- tab4[c(1, 3:19), ]

    tab4[ , 1] <- str_replace_all(string = tab4[ , 1], pattern = " – ", replacement = " ")
    tab4[ , 1] <- str_replace_all(string = tab4[ , 1], pattern = "–", replacement = " ")
    tab4[ , 1] <- str_replace_all(string = tab4[ , 1], pattern = "-", replacement = " ")
    tab4[ , 1] <- str_replace_all(string = tab4[ , 1], pattern = "[[:punct:]]", replacement = "")
    tab4[ , 2] <- str_replace_all(string = tab4[ , 2], pattern = "[[:punct:]]", replacement = "")
    tab4[ , 3] <- str_replace_all(string = tab4[ , 3], pattern = "[[:punct:]]", replacement = "")
    tab4[ , 4] <- str_replace_all(string = tab4[ , 4], pattern = "[[:punct:]]", replacement = "")
    tab4[ , 5] <- str_replace_all(string = tab4[ , 5], pattern = "[[:punct:]]", replacement = "")
    tab4[ , 6] <- str_replace_all(string = tab4[ , 6], pattern = "[[:punct:]]", replacement = "")

    y <- str_split_fixed(string = tab4[ , 6], pattern = " ", n = 2)

    tab4 <- cbind(tab4[ , 1:5], y)

    x <- str_split_fixed(string = tab4[ , 1], pattern = " ", n = 8)

    labs <- NULL
    df <- matrix(data = NA, nrow = nrow(x), ncol = 2)

    for(i in 1:(nrow(x))) {
      words <- x[i, ][str_detect(string = x[i, ], pattern = "^[a-zA-Z]+$")]
      phrase <- ""


      for(j in 1:length(words)){
        phrase <- paste(phrase, words[j], sep = " ")
      }

      phrase <- str_remove(string = phrase, pattern = " ")
      labs <- c(labs, phrase)
      numbers <- x[i, ][str_detect(string = x[i, ],
                                   pattern = "^(\\d{1}|\\d{2}|\\d{3}|\\d{4}|\\d{5}|\\d{6}|\\d{7}|\\d{8}|\\d{9}|\\d{10})$")]
      numbers <- as.numeric(numbers)
      df[i, ] <- numbers
    }

    tab4 <- data.frame(df[ , 1], labs, df[ , 2], tab4[ , 3:7])
    tab4 <- tab4[c(1:16, 18), ]

    categoryCode <- c(rep(26, 16), 31)
    category <- c(rep("Grants", 16), labs[17])

    tab4 <- data.frame(categoryCode, category, tab4)

    tab4$category <- str_to_title(tab4$category)

    names(tab4) <- c("categoryCode", "category",
                     "itemCode", "item",
                     "actual_2016_2017",
                     "budget_2017_2018", "outturn_2017_2018",
                     "budget_2018_2019", "projection_2019_2020",
                     "projection_2020_2021")

    tab4[ , 4] <- as.character(tab4[ , 4])
    tab4[ , 5] <- as.numeric(as.character(tab4[ , 5]))
    tab4[ , 6] <- as.numeric(as.character(tab4[ , 6]))
    tab4[ , 7] <- as.numeric(as.character(tab4[ , 7]))
    tab4[ , 8] <- as.numeric(as.character(tab4[ , 8]))
    tab4[ , 9] <- as.numeric(as.character(tab4[ , 9]))
    tab4[ , 10] <- as.numeric(as.character(tab4[ , 10]))

    ################################################################################

    mohHealthEcon2018 <- data.frame(rbind(tab1, tab2, tab3, tab4))
    usethis::use_data(mohHealthEcon2018, overwrite = TRUE)

    ################################################################################

    tab5 <- health2[[4]][30:44, ]

    tab5[ , 1] <- str_replace_all(string = tab5[ , 1], pattern = "-", replacement = " ")
    tab5[ , 1] <- str_replace_all(string = tab5[ , 1], pattern = "[[:punct:]]", replacement = "")
    tab5[ , 2] <- str_replace_all(string = tab5[ , 2], pattern = "[[:punct:]]", replacement = "")
    tab5[ , 3] <- str_replace_all(string = tab5[ , 3], pattern = "[[:punct:]]", replacement = "")
    tab5[ , 4] <- str_replace_all(string = tab5[ , 4], pattern = "[[:punct:]]", replacement = "")
    tab5[ , 5] <- str_replace_all(string = tab5[ , 5], pattern = "[[:punct:]]", replacement = "")
    tab5[ , 6] <- str_replace_all(string = tab5[ , 6], pattern = "[[:punct:]]", replacement = "")

    x <- str_split_fixed(string = tab5[ , 1], pattern = " ", n = 5)
    y <- str_split_fixed(string = tab5[ , 6], pattern = " ", n = 2)

    tab5 <- cbind(tab5[ , 1:5], y)

    labs <- NULL
    df <- matrix(data = NA, nrow = nrow(x), ncol = 2)

    for(i in 1:(nrow(x))) {
      words <- x[i, ][str_detect(string = x[i, ], pattern = "^[a-zA-Z]+$")]
      phrase <- ""


      for(j in 1:length(words)){
        phrase <- paste(phrase, words[j], sep = " ")
      }

      phrase <- str_remove(string = phrase, pattern = " ")
      labs <- c(labs, phrase)
      numbers <- x[i, ][str_detect(string = x[i, ],
                                   pattern = "^(\\d{1}|\\d{2}|\\d{3}|\\d{4}|\\d{5}|\\d{6}|\\d{7}|\\d{8}|\\d{9}|\\d{10})$")]
      numbers <- as.numeric(numbers)
      df[i, ] <- numbers
    }

    tab5 <- data.frame(df[ , 1], labs, df[ , 2], tab5[ , 3:7])

    tab5$labs <- str_to_title(tab5$labs)

    names(tab5) <- c("countyCode", "county",
                     "actual_2016_2017",
                     "budget_2017_2018", "outturn_2017_2018",
                     "budget_2018_2019", "projection_2019_2020",
                     "projection_2020_2021")

    tab5[ , 4] <- as.numeric(as.character(tab5[ , 4]))
    tab5[ , 5] <- as.numeric(as.character(tab5[ , 5]))
    tab5[ , 6] <- as.numeric(as.character(tab5[ , 6]))
    tab5[ , 7] <- as.numeric(as.character(tab5[ , 7]))
    tab5[ , 8] <- as.numeric(as.character(tab5[ , 8]))

    ################################################################################

    mohHealthCounty2018 <- tab5
    usethis::use_data(mohHealthCounty2018, overwrite = TRUE)

    ################################################################################

    tab6 <- health2[[5]][14:48, ]

    tab6[ , 1] <- str_replace_all(string = tab6[ , 1], pattern = " - ", replacement = " ")
    tab6[ , 1] <- str_replace_all(string = tab6[ , 1], pattern = "-", replacement = " ")
    tab6[ , 1] <- str_replace_all(string = tab6[ , 1], pattern = "[[:punct:]]", replacement = "")
    tab6[ , 2] <- str_replace_all(string = tab6[ , 2], pattern = "[[:punct:]]", replacement = "")
    tab6[ , 3] <- str_replace_all(string = tab6[ , 3], pattern = "[[:punct:]]", replacement = "")
    tab6[ , 4] <- str_replace_all(string = tab6[ , 4], pattern = "[[:punct:]]", replacement = "")

    tab6[7, 1] <- paste(tab6[7, 1], tab6[8, 1], sep = " ")
    tab6[9, 1] <- paste(tab6[9, 1], tab6[10, 1], sep = " ")
    tab6[11, 1] <- paste(tab6[11, 1], tab6[12, 1], sep = " ")
    tab6[16, 1] <- paste(tab6[16, 1], tab6[17, 1], sep = " ")
    tab6[23, 1] <- paste(tab6[23, 1], tab6[24, 1], sep = " ")
    tab6[28, 1] <- paste(tab6[28, 1], tab6[29, 1], sep = " ")
    tab6[31, 1] <- paste(tab6[31, 1], tab6[32, 1], sep = " ")

    tab6 <- tab6[c(1:7, 9, 11, 13:16, 18:23, 25:28, 30:31, 33:35), ]

    tab6 <- paste(tab6[ , 1], tab6[ , 2], tab6[ , 3], tab6[ , 4], sep = " ")

    x <- str_split_fixed(string = tab6, pattern = " ", n = 13)

    labs <- NULL
    df <- matrix(data = NA, nrow = nrow(x), ncol = 7)

    for(i in 1:(nrow(x))) {
      words <- x[i, ][str_detect(string = x[i, ], pattern = "^[a-zA-Z]+$")]
      phrase <- ""


      for(j in 1:length(words)){
        phrase <- paste(phrase, words[j], sep = " ")
      }

      phrase <- str_remove(string = phrase, pattern = " ")
      labs <- c(labs, phrase)
      numbers <- x[i, ][str_detect(string = x[i, ],
                                   pattern = "^(\\d{1}|\\d{2}|\\d{3}|\\d{4}|\\d{5}|\\d{6}|\\d{7}|\\d{8}|\\d{9}|\\d{10})$")]
      numbers <- as.numeric(numbers)
      df[i, ] <- numbers
    }

    tab6a <- cbind(df[ , 1], labs, df[ , 2:7])

    tab6 <- health2[[6]][5:50, ]

    tab6[ , 1] <- str_replace_all(string = tab6[ , 1], pattern = " - ", replacement = " ")
    tab6[ , 1] <- str_replace_all(string = tab6[ , 1], pattern = "-", replacement = " ")
    tab6[ , 1] <- str_replace_all(string = tab6[ , 1], pattern = "[[:punct:]]", replacement = "")
    tab6[ , 2] <- str_replace_all(string = tab6[ , 2], pattern = "[[:punct:]]", replacement = "")
    tab6[ , 3] <- str_replace_all(string = tab6[ , 3], pattern = "[[:punct:]]", replacement = "")
    tab6[ , 4] <- str_replace_all(string = tab6[ , 4], pattern = "[[:punct:]]", replacement = "")
    tab6[ , 5] <- str_replace_all(string = tab6[ , 5], pattern = "[[:punct:]]", replacement = "")
    tab6[ , 6] <- str_replace_all(string = tab6[ , 6], pattern = "[[:punct:]]", replacement = "")

    tab6[4, 1] <- paste(tab6[4, 1], tab6[5, 1], sep = " ")
    tab6[11, 1] <- paste(tab6[11, 1], tab6[12, 1], sep = " ")
    tab6[14, 1] <- paste(tab6[14, 1], tab6[15, 1], sep = " ")
    tab6[19, 1] <- paste(tab6[19, 1], tab6[20, 1], sep = " ")
    tab6[22, 1] <- paste(tab6[22, 1], tab6[23, 1], sep = " ")
    tab6[24, 1] <- paste(tab6[24, 1], tab6[25, 1], sep = " ")
    tab6[27, 1] <- paste(tab6[27, 1], tab6[28, 1], sep = " ")
    tab6[33, 1] <- paste(tab6[33, 1], tab6[34, 1], sep = " ")
    tab6[35, 1] <- paste(tab6[35, 1], tab6[36, 1], sep = " ")
    tab6[37, 1] <- paste(tab6[37, 1], tab6[38, 1], sep = " ")
    tab6[40, 1] <- paste(tab6[40, 1], tab6[41, 1], sep = " ")
    tab6[44, 1] <- paste(tab6[44, 1], tab6[45, 1], sep = " ")

    tab6 <- tab6[c(1:4, 6:11, 13:14, 16:19, 21:22, 24, 26:27, 29:33, 35, 37, 39:40, 42:44, 46), ]

    tab6 <- paste(tab6[ , 1], tab6[ , 2], tab6[ , 3], tab6[ , 4], tab6[ , 5], tab6[ , 6], sep = " ")

    x <- str_split_fixed(string = tab6, pattern = " ", n = 13)

    labs <- NULL
    df <- matrix(data = NA, nrow = nrow(x), ncol = 7)

    for(i in 1:(nrow(x))) {
      words <- x[i, ][str_detect(string = x[i, ], pattern = "^[a-zA-Z]+$")]
      phrase <- ""


      for(j in 1:length(words)){
        phrase <- paste(phrase, words[j], sep = " ")
      }

      phrase <- str_remove(string = phrase, pattern = " ")
      labs <- c(labs, phrase)
      numbers <- x[i, ][str_detect(string = x[i, ],
                                   pattern = "^(\\d{1}|\\d{2}|\\d{3}|\\d{4}|\\d{5}|\\d{6}|\\d{7}|\\d{8}|\\d{9}|\\d{10})$")]
      numbers <- as.numeric(numbers)
      df[i, ] <- numbers
    }

    tab6b <- cbind(df[ , 1], labs, df[ , 2:7])

    tab6 <- health2[[7]][5:48, ]

    tab6[ , 1] <- str_replace_all(string = tab6[ , 1], pattern = " - ", replacement = " ")
    tab6[ , 1] <- str_replace_all(string = tab6[ , 1], pattern = "-", replacement = " ")
    tab6[ , 1] <- str_replace_all(string = tab6[ , 1], pattern = "[[:punct:]]", replacement = "")
    tab6[ , 2] <- str_replace_all(string = tab6[ , 2], pattern = "[[:punct:]]", replacement = "")
    tab6[ , 3] <- str_replace_all(string = tab6[ , 3], pattern = "[[:punct:]]", replacement = "")
    tab6[ , 4] <- str_replace_all(string = tab6[ , 4], pattern = "[[:punct:]]", replacement = "")
    tab6[ , 5] <- str_replace_all(string = tab6[ , 5], pattern = "[[:punct:]]", replacement = "")
    tab6[ , 6] <- str_replace_all(string = tab6[ , 6], pattern = "[[:punct:]]", replacement = "")

    tab6[1, 1] <- paste(tab6[1, 1], tab6[2, 1], sep = " ")
    tab6[3, 1] <- paste(tab6[3, 1], tab6[4, 1], sep = " ")
    tab6[6, 1] <- paste(tab6[6, 1], tab6[7, 1], sep = " ")
    tab6[12, 1] <- paste(tab6[12, 1], tab6[13, 1], sep = " ")
    tab6[16, 1] <- paste(tab6[16, 1], tab6[17, 1], sep = " ")
    tab6[19, 1] <- paste(tab6[19, 1], tab6[20, 1], sep = " ")
    tab6[28, 1] <- paste(tab6[28, 1], tab6[29, 1], sep = " ")
    tab6[30, 1] <- paste(tab6[30, 1], tab6[31, 1], sep = " ")
    tab6[36, 1] <- paste(tab6[36, 1], tab6[37, 1], sep = " ")

    tab6 <- tab6[c(1, 3, 5:6, 8:12, 14:16, 18:19, 21:28, 30, 32:36, 38:44), ]

    tab6 <- paste(tab6[ , 1], tab6[ , 2], tab6[ , 3], tab6[ , 4], tab6[ , 5], tab6[ , 6], sep = " ")

    x <- str_split_fixed(string = tab6, pattern = " ", n = 14)

    labs <- NULL
    df <- matrix(data = NA, nrow = nrow(x), ncol = 7)

    for(i in 1:(nrow(x))) {
      words <- x[i, ][str_detect(string = x[i, ], pattern = "^[a-zA-Z]+$")]
      phrase <- ""


      for(j in 1:length(words)){
        phrase <- paste(phrase, words[j], sep = " ")
      }

      phrase <- str_remove(string = phrase, pattern = " ")
      labs <- c(labs, phrase)
      numbers <- x[i, ][str_detect(string = x[i, ],
                                   pattern = "^(\\d{1}|\\d{2}|\\d{3}|\\d{4}|\\d{5}|\\d{6}|\\d{7}|\\d{8}|\\d{9}|\\d{10})$")]
      numbers <- as.numeric(numbers)
      df[i, ] <- numbers
    }

    tab6c <- cbind(df[ , 1], labs, df[ , 2:7])

    tab6 <- rbind(tab6a, tab6b, tab6c)
    tab6 <- tab6[c(3:4, 6:16, 18:30, 32:97), ]

    departmentCode <- rep(100, nrow(tab6))
    department <- rep("Curative Servives", nrow(tab6))

    categoryCode <- c(rep(21, 2), rep(22, 11), rep(25, 13), rep(26, 66))
    category <- c(rep("Compensation Of Employees", 2),
                  rep("Use Of Goods And Services", 11),
                  rep("Subsidy", 13),
                  rep("Grants", 66))

    tab6 <- data.frame(departmentCode, department, categoryCode, category, tab6)

    tab6$department <- str_to_title(string = tab6$department)
    tab6$category <- str_to_title(string = tab6$category)

    names(tab6) <- c("departmentCode", "department",
                     "categoryCode", "category",
                     "itemCode", "item",
                     "actual_2016_2017",
                     "budget_2017_2018", "outturn_2017_2018",
                     "budget_2018_2019", "projection_2019_2020",
                     "projection_2020_2021")

    tab6[ , 5] <- as.numeric(as.character(tab6[ , 5]))
    tab6[ , 6] <- as.character(tab6[ , 6])
    tab6[ , 7] <- as.numeric(as.character(tab6[ , 7]))
    tab6[ , 8] <- as.numeric(as.character(tab6[ , 8]))
    tab6[ , 9] <- as.numeric(as.character(tab6[ , 9]))
    tab6[ , 10] <- as.numeric(as.character(tab6[ , 10]))
    tab6[ , 11] <- as.numeric(as.character(tab6[ , 11]))
    tab6[ , 12] <- as.numeric(as.character(tab6[ , 12]))

    ################################################################################

    mohHealthCurative2018 <- tab6
    usethis::use_data(mohHealthCurative2018, overwrite = TRUE)

    ################################################################################

    tab7 <- health2[[8]][12:28, ]

    tab7[ , 1] <- str_replace_all(string = tab7[ , 1], pattern = " - ", replacement = " ")
    tab7[ , 1] <- str_replace_all(string = tab7[ , 1], pattern = "-", replacement = " ")
    tab7[ , 1] <- str_replace_all(string = tab7[ , 1], pattern = "[[:punct:]]", replacement = "")
    tab7[ , 2] <- str_replace_all(string = tab7[ , 2], pattern = "[[:punct:]]", replacement = "")
    tab7[ , 3] <- str_replace_all(string = tab7[ , 3], pattern = "[[:punct:]]", replacement = "")
    tab7[ , 4] <- str_replace_all(string = tab7[ , 4], pattern = "[[:punct:]]", replacement = "")
    tab7[ , 5] <- str_replace_all(string = tab7[ , 5], pattern = "[[:punct:]]", replacement = "")
    tab7[ , 6] <- str_replace_all(string = tab7[ , 6], pattern = "[[:punct:]]", replacement = "")

    tab7[5, 1] <- paste(tab7[5, 1], tab7[6, 1], sep = " ")
    tab7[10, 1] <- paste(tab7[10, 1], tab7[11, 1], sep = " ")
    tab7[16, 1] <- paste(tab7[16, 1], tab7[17, 1], sep = " ")

    tab7 <- tab7[c(1:5, 7:10, 12:16), ]

    tab7 <- paste(tab7[ , 1], tab7[ , 2], tab7[ , 3], tab7[ , 4], tab7[ , 5], tab7[ , 6], sep = " ")

    x <- str_split_fixed(string = tab7, pattern = " ", n = 13)

    labs <- NULL
    df <- matrix(data = NA, nrow = nrow(x), ncol = 7)

    for(i in 1:(nrow(x))) {
      words <- x[i, ][str_detect(string = x[i, ], pattern = "^[a-zA-Z]+$")]
      phrase <- ""


      for(j in 1:length(words)){
        phrase <- paste(phrase, words[j], sep = " ")
      }

      phrase <- str_remove(string = phrase, pattern = " ")
      labs <- c(labs, phrase)
      numbers <- x[i, ][str_detect(string = x[i, ],
                                   pattern = "^(\\d{1}|\\d{2}|\\d{3}|\\d{4}|\\d{5}|\\d{6}|\\d{7}|\\d{8}|\\d{9}|\\d{10})$")]
      #numbers <- as.numeric(numbers)
      df[i, ] <- numbers
    }

    tab7 <- cbind(df[ , 1], labs, df[ , 2:7])
    tab7 <- tab7[c(3, 5:12, 14), ]

    departmentCode <- rep(200, nrow(tab7))
    department <- rep(labs[1], nrow(tab7))

    categoryCode <- c(21, rep(22, 8), 26)
    category <- c(labs[2], rep(labs[4], 8), labs[13])

    tab7 <- data.frame(departmentCode, department, categoryCode, category, tab7)

    tab7$department <- str_to_title(string = tab7$department)
    tab7$category <- str_to_title(string = tab7$category)

    names(tab7) <- c("departmentCode", "department",
                     "categoryCode", "category",
                     "itemCode", "item",
                     "actual_2016_2017",
                     "budget_2017_2018", "outturn_2017_2018",
                     "budget_2018_2019", "projection_2019_2020",
                     "projection_2020_2021")

    tab7[ , 5] <- as.numeric(as.character(tab7[ , 5]))
    tab7[ , 6] <- as.character(tab7[ , 6])
    tab7[ , 7] <- as.numeric(as.character(tab7[ , 7]))
    tab7[ , 8] <- as.numeric(as.character(tab7[ , 8]))
    tab7[ , 9] <- as.numeric(as.character(tab7[ , 9]))
    tab7[ , 10] <- as.numeric(as.character(tab7[ , 10]))
    tab7[ , 11] <- as.numeric(as.character(tab7[ , 11]))
    tab7[ , 12] <- as.numeric(as.character(tab7[ , 12]))

    ################################################################################

    mohHealthPreventive2018 <- tab7
    usethis::use_data(mohHealthPreventive2018, overwrite = TRUE)

    ################################################################################

    tab8 <- health2[[9]][10:30, ]

    tab8[1, 1] <- paste(tab8[1, 1], tab8[2, 1], sep = " ")
    tab8[7, 1] <- paste(tab8[7, 1], tab8[8, 1], sep = " ")
    tab8[9, 1] <- paste(tab8[9, 1], tab8[10, 1], sep = " ")
    tab8[11, 1] <- paste(tab8[11, 1], tab8[12, 1], sep = " ")
    tab8[16, 1] <- paste(tab8[16, 1], tab8[17, 1], sep = " ")

    tab8 <- tab8[c(1, 3:7, 9, 11, 13:16, 18:21), ]

    tab8[ , 1] <- str_replace_all(string = tab8[ , 1], pattern = "-", replacement = " ")
    tab8[ , 1] <- str_replace_all(string = tab8[ , 1], pattern = "[[:punct:]]", replacement = "")
    tab8[ , 2] <- str_replace_all(string = tab8[ , 2], pattern = "[[:punct:]]", replacement = "")
    tab8[ , 3] <- str_replace_all(string = tab8[ , 3], pattern = "[[:punct:]]", replacement = "")
    tab8[ , 4] <- str_replace_all(string = tab8[ , 4], pattern = "[[:punct:]]", replacement = "")
    tab8[ , 5] <- str_replace_all(string = tab8[ , 5], pattern = "[[:punct:]]", replacement = "")
    tab8[ , 6] <- str_replace_all(string = tab8[ , 6], pattern = "[[:punct:]]", replacement = "")

    tab8 <- paste(tab8[ , 1], tab8[ , 2], tab8[ , 3], tab8[ , 4], tab8[ , 5], tab8[ , 6], sep = " ")

    x <- str_split_fixed(string = tab8, pattern = " ", n = 14)

    labs <- NULL
    df <- matrix(data = NA, nrow = nrow(x), ncol = 7)

    for(i in 1:(nrow(x))) {
      words <- x[i, ][str_detect(string = x[i, ], pattern = "^[a-zA-Z]+$")]
      phrase <- ""


      for(j in 1:length(words)){
        phrase <- paste(phrase, words[j], sep = " ")
      }

      phrase <- str_remove(string = phrase, pattern = " ")
      labs <- c(labs, phrase)
      numbers <- x[i, ][str_detect(string = x[i, ],
                                   pattern = "^(\\d{1}|\\d{2}|\\d{3}|\\d{4}|\\d{5}|\\d{6}|\\d{7}|\\d{8}|\\d{9}|\\d{10})$")]
      numbers <- as.numeric(numbers)
      df[i, ] <- numbers
    }

    tab8 <- cbind(df[ , 1], labs, df[ , 2:7])
    tab8 <- tab8[c(3, 5:14, 16), ]

    departmentCode <- rep(400, nrow(tab8))
    department <- rep(labs[1], nrow(tab8))

    categoryCode <- c(21, rep(22, 10), 26)
    category <- c(labs[2], rep(labs[4], 10), labs[15])

    tab8 <- data.frame(departmentCode, department, categoryCode, category, tab8)

    tab8$department <- str_to_title(string = tab8$department)
    tab8$category <- str_to_title(string = tab8$category)

    names(tab8) <- c("departmentCode", "department",
                     "categoryCode", "category",
                     "itemCode", "item",
                     "actual_2016_2017",
                     "budget_2017_2018", "outturn_2017_2018",
                     "budget_2018_2019", "projection_2019_2020",
                     "projection_2020_2021")

    tab8[ , 5] <- as.numeric(as.character(tab8[ , 5]))
    tab8[ , 6] <- as.character(tab8[ , 6])
    tab8[ , 7] <- as.numeric(as.character(tab8[ , 7]))
    tab8[ , 8] <- as.numeric(as.character(tab8[ , 8]))
    tab8[ , 9] <- as.numeric(as.character(tab8[ , 9]))
    tab8[ , 10] <- as.numeric(as.character(tab8[ , 10]))
    tab8[ , 11] <- as.numeric(as.character(tab8[ , 11]))
    tab8[ , 12] <- as.numeric(as.character(tab8[ , 12]))

    ################################################################################

    mohHealthPlanning2018 <- tab8
    usethis::use_data(mohHealthPlanning2018, overwrite = TRUE)

    ################################################################################

    tab9 <- health2[[10]][5:20, ]

    tab9[7, 1] <- paste(tab9[7, 1], tab9[8, 1], sep = " ")
    tab9[9, 1] <- paste(tab9[9, 1], tab9[10, 1], sep = " ")
    tab9[13, 1] <- paste(tab9[13, 1], tab9[14, 1], sep = " ")

    tab9 <- tab9[c(1:7, 9, 11:13, 15:16), ]

    tab9[ , 1] <- str_replace_all(string = tab9[ , 1], pattern = " - ", replacement = " ")
    tab9[ , 1] <- str_replace_all(string = tab9[ , 1], pattern = "-", replacement = " ")
    tab9[ , 1] <- str_replace_all(string = tab9[ , 1], pattern = "–", replacement = " ")
    tab9[ , 1] <- str_replace_all(string = tab9[ , 1], pattern = "[[:punct:]]", replacement = "")
    tab9[ , 2] <- str_replace_all(string = tab9[ , 2], pattern = "[[:punct:]]", replacement = "")
    tab9[ , 3] <- str_replace_all(string = tab9[ , 3], pattern = "[[:punct:]]", replacement = "")
    tab9[ , 4] <- str_replace_all(string = tab9[ , 4], pattern = "[[:punct:]]", replacement = "")
    tab9[ , 5] <- str_replace_all(string = tab9[ , 5], pattern = "[[:punct:]]", replacement = "")
    tab9[ , 6] <- str_replace_all(string = tab9[ , 6], pattern = "[[:punct:]]", replacement = "")

    tab9 <- paste(tab9[ , 1], tab9[ , 2], tab9[ , 3], tab9[ , 4], tab9[ , 5], tab9[ , 6], sep = " ")

    x <- str_split_fixed(string = tab9, pattern = " ", n = 13)

    labs <- NULL
    df <- matrix(data = NA, nrow = nrow(x), ncol = 7)

    for(i in 1:(nrow(x))) {
      words <- x[i, ][str_detect(string = x[i, ], pattern = "^[a-zA-Z]+$")]
      phrase <- ""


      for(j in 1:length(words)){
        phrase <- paste(phrase, words[j], sep = " ")
      }

      phrase <- str_remove(string = phrase, pattern = " ")
      labs <- c(labs, phrase)
      numbers <- x[i, ][str_detect(string = x[i, ],
                                   pattern = "^(\\d{1}|\\d{2}|\\d{3}|\\d{4}|\\d{5}|\\d{6}|\\d{7}|\\d{8}|\\d{9}|\\d{10})$")]
      numbers <- as.numeric(numbers)
      df[i, ] <- numbers
    }

    tab9 <- cbind(df[ , 1], labs, df[ , 2:7])
    tab9 <- tab9[c(3:4, 6:13), ]

    departmentCode <- rep(500, nrow(tab9))
    department <- rep(labs[1], nrow(tab9))

    categoryCode <- c(rep(21, 2), rep(22, 8))
    category <- c(rep(labs[2], 2), rep(labs[5], 8))

    tab9 <- data.frame(departmentCode, department, categoryCode, category, tab9)

    tab9$department <- str_to_title(string = tab9$department)
    tab9$category <- str_to_title(string = tab9$category)

    names(tab9) <- c("departmentCode", "department",
                     "categoryCode", "category",
                     "itemCode", "item",
                     "actual_2016_2017",
                     "budget_2017_2018", "outturn_2017_2018",
                     "budget_2018_2019", "projection_2019_2020",
                     "projection_2020_2021")

    tab9[ , 5] <- as.numeric(as.character(tab9[ , 5]))
    tab9[ , 6] <- as.character(tab9[ , 6])
    tab9[ , 7] <- as.numeric(as.character(tab9[ , 7]))
    tab9[ , 8] <- as.numeric(as.character(tab9[ , 8]))
    tab9[ , 9] <- as.numeric(as.character(tab9[ , 9]))
    tab9[ , 10] <- as.numeric(as.character(tab9[ , 10]))
    tab9[ , 11] <- as.numeric(as.character(tab9[ , 11]))
    tab9[ , 12] <- as.numeric(as.character(tab9[ , 12]))

    ################################################################################

    mohHealthVital2018 <- tab9
    usethis::use_data(mohHealthVital2018, overwrite = TRUE)

    ################################################################################

    tab10 <- rbind(health2[[10]][32:48, ], health2[[11]][5:17, ])

    tab10[1, 1] <- paste(tab10[1, 1], tab10[2, 1], sep = " ")
    tab10[9, 1] <- paste(tab10[9, 1], tab10[10, 1], sep = " ")
    tab10[11, 1] <- paste(tab10[11, 1], tab10[12, 1], sep = " ")
    tab10[14, 1] <- paste(tab10[14, 1], tab10[15, 1], sep = " ")
    tab10[21, 1] <- paste(tab10[21, 1], tab10[22, 1], sep = " ")

    tab10 <- tab10[c(1, 3:9, 11, 13:14, 16:21, 23:30), ]

    tab10[ , 1] <- str_replace_all(string = tab10[ , 1], pattern = " - ", replacement = " ")
    tab10[ , 1] <- str_replace_all(string = tab10[ , 1], pattern = "-", replacement = " ")
    tab10[ , 1] <- str_replace_all(string = tab10[ , 1], pattern = "–", replacement = " ")
    tab10[ , 1] <- str_replace_all(string = tab10[ , 1], pattern = "[[:punct:]]", replacement = "")
    tab10[ , 2] <- str_replace_all(string = tab10[ , 2], pattern = "[[:punct:]]", replacement = "")
    tab10[ , 3] <- str_replace_all(string = tab10[ , 3], pattern = "[[:punct:]]", replacement = "")
    tab10[ , 4] <- str_replace_all(string = tab10[ , 4], pattern = "[[:punct:]]", replacement = "")
    tab10[ , 5] <- str_replace_all(string = tab10[ , 5], pattern = "[[:punct:]]", replacement = "")
    tab10[ , 6] <- str_replace_all(string = tab10[ , 6], pattern = "[[:punct:]]", replacement = "")

    tab10 <- paste(tab10[ , 1], tab10[ , 2], tab10[ , 3], tab10[ , 4], tab10[ , 5], tab10[ , 6], sep = " ")

    x <- str_split_fixed(string = tab10, pattern = " ", n = 14)

    labs <- NULL
    df <- matrix(data = NA, nrow = nrow(x), ncol = 7)

    for(i in 1:(nrow(x))) {
      words <- x[i, ][str_detect(string = x[i, ], pattern = "^[a-zA-Z]+$")]
      phrase <- ""


      for(j in 1:length(words)){
        phrase <- paste(phrase, words[j], sep = " ")
      }

      phrase <- str_remove(string = phrase, pattern = " ")
      labs <- c(labs, phrase)
      numbers <- x[i, ][str_detect(string = x[i, ],
                                   pattern = "^(\\d{1}|\\d{2}|\\d{3}|\\d{4}|\\d{5}|\\d{6}|\\d{7}|\\d{8}|\\d{9}|\\d{10})$")]
      numbers <- as.numeric(numbers)
      df[i, ] <- numbers
    }

    tab10 <- cbind(df[ , 1], labs, df[ , 2:7])
    tab10 <- tab10[c(3:5, 7:23, 25), ]

    departmentCode <- rep(600, nrow(tab10))
    department <- rep(labs[1], nrow(tab10))

    categoryCode <- c(rep(21, 3), rep(22, 17), 31)
    category <- c(rep(labs[2], 3), rep(labs[6], 17), labs[24])

    tab10 <- data.frame(departmentCode, department, categoryCode, category, tab10)

    tab10$department <- str_to_title(string = tab10$department)
    tab10$category <- str_to_title(string = tab10$category)

    names(tab10) <- c("departmentCode", "department",
                      "categoryCode", "category",
                      "itemCode", "item",
                      "actual_2016_2017",
                      "budget_2017_2018", "outturn_2017_2018",
                      "budget_2018_2019", "projection_2019_2020",
                      "projection_2020_2021")

    tab10[ , 5] <- as.numeric(as.character(tab10[ , 5]))
    tab10[ , 6] <- as.character(tab10[ , 6])
    tab10[ , 7] <- as.numeric(as.character(tab10[ , 7]))
    tab10[ , 8] <- as.numeric(as.character(tab10[ , 8]))
    tab10[ , 9] <- as.numeric(as.character(tab10[ , 9]))
    tab10[ , 10] <- as.numeric(as.character(tab10[ , 10]))
    tab10[ , 11] <- as.numeric(as.character(tab10[ , 11]))
    tab10[ , 12] <- as.numeric(as.character(tab10[ , 12]))

    ################################################################################

    mohHealthAdmin2018 <- tab10
    usethis::use_data(mohHealthAdmin2018, overwrite = TRUE)

    ################################################################################

    tab11 <- rbind(health2[[11]][27:30, ], health2[[12]][c(5:10, 20:27, 37:45), ], health2[[13]][5:8, ])

    tab11[7, 1] <- paste(tab11[7, 1], tab11[8, 1], sep = " ")
    tab11[9, 1] <- paste(tab11[9, 1], tab11[10, 1], sep = " ")
    tab11[15, 1] <- paste(tab11[15, 1], tab11[16, 1], sep = " ")
    tab11[24, 1] <- paste(tab11[24, 1], tab11[25, 1], sep = " ")
    tab11[28, 1] <- paste(tab11[28, 1], tab11[29, 1], sep = " ")

    tab11 <- tab11[c(1:7, 9, 11:15, 17:24, 26:28, 30:31), ]

    tab11[ , 1] <- str_replace_all(string = tab11[ , 1], pattern = " - ", replacement = " ")
    tab11[ , 1] <- str_replace_all(string = tab11[ , 1], pattern = "-", replacement = " ")
    tab11[ , 1] <- str_replace_all(string = tab11[ , 1], pattern = "–", replacement = " ")
    tab11[ , 1] <- str_replace_all(string = tab11[ , 1], pattern = "[[:punct:]]", replacement = "")
    tab11[ , 2] <- str_replace_all(string = tab11[ , 2], pattern = "[[:punct:]]", replacement = "")
    tab11[ , 3] <- str_replace_all(string = tab11[ , 3], pattern = "[[:punct:]]", replacement = "")
    tab11[ , 4] <- str_replace_all(string = tab11[ , 4], pattern = "[[:punct:]]", replacement = "")
    tab11[ , 5] <- str_replace_all(string = tab11[ , 5], pattern = "[[:punct:]]", replacement = "")
    tab11[ , 6] <- str_replace_all(string = tab11[ , 6], pattern = "[[:punct:]]", replacement = "")

    tab11 <- paste(tab11[ , 1], tab11[ , 2], tab11[ , 3], tab11[ , 4], tab11[ , 5], tab11[ , 6], sep = " ")

    x <- str_split_fixed(string = tab11, pattern = " ", n = 14)

    labs <- NULL
    df <- matrix(data = NA, nrow = nrow(x), ncol = 7)

    for(i in 1:(nrow(x))) {
      words <- x[i, ][str_detect(string = x[i, ], pattern = "^[a-zA-Z]+$")]
      phrase <- ""


      for(j in 1:length(words)){
        phrase <- paste(phrase, words[j], sep = " ")
      }

      phrase <- str_remove(string = phrase, pattern = " ")
      labs <- c(labs, phrase)
      numbers <- x[i, ][str_detect(string = x[i, ],
                                   pattern = "^(\\d{1}|\\d{2}|\\d{3}|\\d{4}|\\d{5}|\\d{6}|\\d{7}|\\d{8}|\\d{9}|\\d{10})$")]
      numbers <- as.numeric(numbers)
      df[i, ] <- numbers
    }

    tab11 <- cbind(df[ , 1], labs, df[ , 2:7])
    tab11 <- tab11[c(3:4, 7:8, 11:15, 18:26), ]

    departmentCode <- c(rep(702, 2), rep(704, 2), rep(709, 5), rep(712, 9))
    department <- c(rep(labs[1], 2), rep(labs[5], 2), rep(labs[9], 5), rep(labs[16], 9))

    categoryCode <- c(rep(26, 2), rep(26, 2), rep(26, 5), rep(26, 9))
    category <- c(rep(labs[2], 2), rep(labs[6], 2), rep(labs[10], 5), rep(labs[17], 9))

    tab11 <- data.frame(departmentCode, department, categoryCode, category, tab11)

    tab11$department <- str_to_title(string = tab11$department)
    tab11$category <- str_to_title(string = tab11$category)

    names(tab11) <- c("departmentCode", "department",
                      "categoryCode", "category",
                      "itemCode", "item",
                      "actual_2016_2017",
                      "budget_2017_2018", "outturn_2017_2018",
                      "budget_2018_2019", "projection_2019_2020",
                      "projection_2020_2021")

    tab11[ , 5] <- as.numeric(as.character(tab11[ , 5]))
    tab11[ , 6] <- as.character(tab11[ , 6])
    tab11[ , 7] <- as.numeric(as.character(tab11[ , 7]))
    tab11[ , 8] <- as.numeric(as.character(tab11[ , 8]))
    tab11[ , 9] <- as.numeric(as.character(tab11[ , 9]))
    tab11[ , 10] <- as.numeric(as.character(tab11[ , 10]))
    tab11[ , 11] <- as.numeric(as.character(tab11[ , 11]))
    tab11[ , 12] <- as.numeric(as.character(tab11[ , 12]))

    ################################################################################

    mohHealthCountyAdd2018 <- tab11
    usethis::use_data(mohHealthCountyAdd2018, overwrite = TRUE)

    ################################################################################

    tab12 <- health2[[13]][18:20, ]

    tab12[ , 1] <- str_replace_all(string = tab12[ , 1], pattern = " - ", replacement = " ")
    tab12[ , 1] <- str_replace_all(string = tab12[ , 1], pattern = "-", replacement = " ")
    tab12[ , 1] <- str_replace_all(string = tab12[ , 1], pattern = "–", replacement = " ")
    tab12[ , 1] <- str_replace_all(string = tab12[ , 1], pattern = "[[:punct:]]", replacement = "")
    tab12[ , 2] <- str_replace_all(string = tab12[ , 2], pattern = "[[:punct:]]", replacement = "")
    tab12[ , 3] <- str_replace_all(string = tab12[ , 3], pattern = "[[:punct:]]", replacement = "")
    tab12[ , 4] <- str_replace_all(string = tab12[ , 4], pattern = "[[:punct:]]", replacement = "")
    tab12[ , 5] <- str_replace_all(string = tab12[ , 5], pattern = "[[:punct:]]", replacement = "")
    tab12[ , 6] <- str_replace_all(string = tab12[ , 6], pattern = "[[:punct:]]", replacement = "")

    tab12 <- paste(tab12[ , 1], tab12[ , 2], tab12[ , 3], tab12[ , 4], tab12[ , 5], tab12[ , 6], sep = " ")

    x <- str_split_fixed(string = tab12, pattern = " ", n = 12)

    labs <- NULL
    df <- matrix(data = NA, nrow = nrow(x), ncol = 7)

    for(i in 1:(nrow(x))) {
      words <- x[i, ][str_detect(string = x[i, ], pattern = "^[a-zA-Z]+$")]
      phrase <- ""


      for(j in 1:length(words)){
        phrase <- paste(phrase, words[j], sep = " ")
      }

      phrase <- str_remove(string = phrase, pattern = " ")
      labs <- c(labs, phrase)
      numbers <- x[i, ][str_detect(string = x[i, ],
                                   pattern = "^(\\d{1}|\\d{2}|\\d{3}|\\d{4}|\\d{5}|\\d{6}|\\d{7}|\\d{8}|\\d{9}|\\d{10})$")]
      numbers <- as.numeric(numbers)
      df[i, ] <- numbers
    }

    tab12 <- cbind(df[ , 1], labs, df[ , 2:7])
    tab12 <- tab12[3, ]

    departmentCode <- 5500
    department <- "General Claims"

    categoryCode <- 26
    category <- "Grants"

    tab12 <- data.frame(rbind(c(departmentCode, department, categoryCode, category, tab12)))

    #tab12$department <- str_to_title(string = tab12$department)
    #tab12$category <- str_to_title(string = tab12$category)

    names(tab12) <- c("departmentCode", "department",
                      "categoryCode", "category",
                      "itemCode", "item",
                      "actual_2016_2017",
                      "budget_2017_2018", "outturn_2017_2018",
                      "budget_2018_2019", "projection_2019_2020",
                      "projection_2020_2021")

    tab12[ , 5] <- as.numeric(as.character(tab12[ , 5]))
    tab12[ , 6] <- as.character(tab12[ , 6])
    tab12[ , 7] <- as.numeric(as.character(tab12[ , 7]))
    tab12[ , 8] <- as.numeric(as.character(tab12[ , 8]))
    tab12[ , 9] <- as.numeric(as.character(tab12[ , 9]))
    tab12[ , 10] <- as.numeric(as.character(tab12[ , 10]))
    tab12[ , 11] te(tab10[9, 1], tab10[10, 1], sep = " ")
    tab10[11, 1] <- paste(tab10[11, 1], tab10[12, 1], sep = " ")
    tab10[14, 1] <- paste(tab10[14, 1], tab10[15, 1], sep = " ")
    tab10[21, 1] <- paste(tab10[21, 1], tab10[22, 1], sep = " ")

    tab10 <- tab10[c(1, 3:9, 11, 13:14, 16:21, 23:30), ]

    tab10[ , 1] <- str_replace_all(string = tab10[ , 1], pattern = " - ", replacement = " ")
    tab10[ , 1] <- str_replace_all(string = tab10[ , 1], pattern = "-", replacement = " ")
    tab10[ , 1] <- str_replace_all(string = tab10[ , 1], pattern = "–", replacement = " ")
    tab10[ , 1] <- str_replace_all(string = tab10[ , 1], pattern = "[[:punct:]]", replacement = "")
    tab10[ , 2] <- str_replace_all(string = tab10[ , 2], pattern = "[[:punct:]]", replacement = "")
    tab10[ , 3] <- str_replace_all(string = tab10[ , 3], pattern = "[[:punct:]]", replacement = "")
    tab10[ , 4] <- str_replace_all(string = tab10[ , 4], pattern = "[[:punct:]]", replacement = "")
    tab10[ , 5] <- str_replace_all(string = tab10[ , 5], pattern = "[[:punct:]]", replacement = "")
    tab10[ , 6] <- str_replace_all(string = tab10[ , 6], pattern = "[[:punct:]]", replacement = "")

    tab10 <- paste(tab10[ , 1], tab10[ , 2], tab10[ , 3], tab10[ , 4], tab10[ , 5], tab10[ , 6], sep = " ")

    x <- str_split_fixed(string = tab10, pattern = " ", n = 14)

    labs <- NULL
    df <- matrix(data = NA, nrow = nrow(x), ncol = 7)

    for(i in 1:(nrow(x))) {
      words <- x[i, ][str_detect(string = x[i, ], pattern = "^[a-zA-Z]+$")]
      phrase <- ""


      for(j in 1:length(words)){
        phrase <- paste(phrase, words[j], sep = " ")
      }

      phrase <- str_remove(string = phrase, pattern = " ")
      labs <- c(labs, phrase)
      numbers <- x[i, ][str_detect(string = x[i, ],
                                   pattern = "^(\\d{1}|\\d{2}|\\d{3}|\\d{4}|\\d{5}|\\d{6}|\\d{7}|\\d{8}|\\d{9}|\\d{10})$")]
      numbers <- as.numeric(numbers)
      df[i, ] <- numbers
    }

    tab10 <- cbind(df[ , 1], labs, df[ , 2:7])
    tab10 <- tab10[c(3:5, 7:23, 25), ]

    departmentCode <- rep(600, nrow(tab10))
    department <- rep(labs[1], nrow(tab10))

    categoryCode <- c(rep(21, 3), rep(22, 17), 31)
    category <- c(rep(labs[2], 3), rep(labs[6], 17), labs[24])

    tab10 <- data.frame(departmentCode, department, categoryCode, category, tab10)

    tab10$department <- str_to_title(string = tab10$department)
    tab10$category <- str_to_title(string = tab10$category)

    names(tab10) <- c("departmentCode", "department",
                      "categoryCode", "category",
                      "itemCode", "item",
                      "actual_2016_2017",
                      "budget_2017_2018", "outturn_2017_2018",
                      "budget_2018_2019", "projection_2019_2020",
                      "projection_2020_2021")

    tab10[ , 5] <- as.numeric(as.character(tab10[ , 5]))
    tab10[ , 6] <- as.character(tab10[ , 6])
    tab10[ , 7] <- as.numeric(as.character(tab10[ , 7]))
    tab10[ , 8] <- as.numeric(as.character(tab10[ , 8]))
    tab10[ , 9] <- as.numeric(as.character(tab10[ , 9]))
    tab10[ , 10] <- as.numeric(as.character(tab10[ , 10]))
    tab10[ , 11] <- as.numeric(as.character(tab10[ , 11]))
    tab10[ , 12] <- as.numeric(as.character(tab10[ , 12]))

    ################################################################################

    mohHealthAdmin2018 <- tab10
    usethis::use_data(mohHealthAdmin2018, overwrite = TRUE)

    ################################################################################

    tab11 <- rbind(health2[[11]][27:30, ], health2[[12]][c(5:10, 20:27, 37:45), ], health2[[13]][5:8, ])

    tab11[7, 1] <- paste(tab11[7, 1], tab11[8, 1], sep = " ")
    tab11[9, 1] <- paste(tab11[9, 1], tab11[10, 1], sep = " ")
    tab11[15, 1] <- paste(tab11[15, 1], tab11[16, 1], sep = " ")
    tab11[24, 1] <- paste(tab11[24, 1], tab11[25, 1], sep = " ")
    tab11[28, 1] <- paste(tab11[28, 1], tab11[29, 1], sep = " ")

    tab11 <- tab11[c(1:7, 9, 11:15, 17:24, 26:28, 30:31), ]

    tab11[ , 1] <- str_replace_all(string = tab11[ , 1], pattern = " - ", replacement = " ")
    tab11[ , 1] <- str_replace_all(string = tab11[ , 1], pattern = "-", replacement = " ")
    tab11[ , 1] <- str_replace_all(string = tab11[ , 1], pattern = "–", replacement = " ")
    tab11[ , 1] <- str_replace_all(string = tab11[ , 1], pattern = "[[:punct:]]", replacement = "")
    tab11[ , 2] <- str_replace_all(string = tab11[ , 2], pattern = "[[:punct:]]", replacement = "")
    tab11[ , 3] <- str_replace_all(string = tab11[ , 3], pattern = "[[:punct:]]", replacement = "")
    tab11[ , 4] <- str_replace_all(string = tab11[ , 4], pattern = "[[:punct:]]", replacement = "")
    tab11[ , 5] <- str_replace_all(string = tab11[ , 5], pattern = "[[:punct:]]", replacement = "")
    tab11[ , 6] <- str_replace_all(string = tab11[ , 6], pattern = "[[:punct:]]", replacement = "")

    tab11 <- paste(tab11[ , 1], tab11[ , 2], tab11[ , 3], tab11[ , 4], tab11[ , 5], tab11[ , 6], sep = " ")

    x <- str_split_fixed(string = tab11, pattern = " ", n = 14)

    labs <- NULL
    df <- matrix(data = NA, nrow = nrow(x), ncol = 7)

    for(i in 1:(nrow(x))) {
      words <- x[i, ][str_detect(string = x[i, ], pattern = "^[a-zA-Z]+$")]
      phrase <- ""


      for(j in 1:length(words)){
        phrase <- paste(phrase, words[j], sep = " ")
      }

      phrase <- str_remove(string = phrase, pattern = " ")
      labs <- c(labs, phrase)
      numbers <- x[i, ][str_detect(string = x[i, ],
                                   pattern = "^(\\d{1}|\\d{2}|\\d{3}|\\d{4}|\\d{5}|\\d{6}|\\d{7}|\\d{8}|\\d{9}|\\d{10})$")]
      numbers <- as.numeric(numbers)
      df[i, ] <- numbers
    }

    tab11 <- cbind(df[ , 1], labs, df[ , 2:7])
    tab11 <- tab11[c(3:4, 7:8, 11:15, 18:26), ]

    departmentCode <- c(rep(702, 2), rep(704, 2), rep(709, 5), rep(712, 9))
    department <- c(rep(labs[1], 2), rep(labs[5], 2), rep(labs[9], 5), rep(labs[16], 9))

    categoryCode <- c(rep(26, 2), rep(26, 2), rep(26, 5), rep(26, 9))
    category <- c(rep(labs[2], 2), rep(labs[6], 2), rep(labs[10], 5), rep(labs[17], 9))

    tab11 <- data.frame(departmentCode, department, categoryCode, category, tab11)

    tab11$department <- str_to_title(string = tab11$department)
    tab11$category <- str_to_title(string = tab11$category)

    names(tab11) <- c("departmentCode", "department",
                      "categoryCode", "category",
                      "itemCode", "item",
                      "actual_2016_2017",
                      "budget_2017_2018", "outturn_2017_2018",
                      "budget_2018_2019", "projection_2019_2020",
                      "projection_2020_2021")

    tab11[ , 5] <- as.numeric(as.character(tab11[ , 5]))
    tab11[ , 6] <- as.character(tab11[ , 6])
    tab11[ , 7] <- as.numeric(as.character(tab11[ , 7]))
    tab11[ , 8] <- as.numeric(as.character(tab11[ , 8]))
    tab11[ , 9] <- as.numeric(as.character(tab11[ , 9]))
    tab11[ , 10] <- as.numeric(as.character(tab11[ , 10]))
    tab11[ , 11] <- as.numeric(as.character(tab11[ , 11]))
    tab11[ , 12] <- as.numeric(as.character(tab11[ , 12]))

    ################################################################################

    mohHealthCountyAdd2018 <- tab11
    usethis::use_data(mohHealthCountyAdd2018, overwrite = TRUE)

    ################################################################################

    tab12 <- health2[[13]][18:20, ]

    tab12[ , 1] <- str_replace_all(string = tab12[ , 1], pattern = " - ", replacement = " ")
    tab12[ , 1] <- str_replace_all(string = tab12[ , 1], pattern = "-", replacement = " ")
    tab12[ , 1] <- str_replace_all(string = tab12[ , 1], pattern = "–", replacement = " ")
    tab12[ , 1] <- str_replace_all(string = tab12[ , 1], pattern = "[[:punct:]]", replacement = "")
    tab12[ , 2] <- str_replace_all(string = tab12[ , 2], pattern = "[[:punct:]]", replacement = "")
    tab12[ , 3] <- str_replace_all(string = tab12[ , 3], pattern = "[[:punct:]]", replacement = "")
    tab12[ , 4] <- str_replace_all(string = tab12[ , 4], pattern = "[[:punct:]]", replacement = "")
    tab12[ , 5] <- str_replace_all(string = tab12[ , 5], pattern = "[[:punct:]]", replacement = "")
    tab12[ , 6] <- str_replace_all(string = tab12[ , 6], pattern = "[[:punct:]]", replacement = "")

    tab12 <- paste(tab12[ , 1], tab12[ , 2], tab12[ , 3], tab12[ , 4], tab12[ , 5], tab12[ , 6], sep = " ")

    x <- str_split_fixed(string = tab12, pattern = " ", n = 12)

    labs <- NULL
    df <- matrix(data = NA, nrow = nrow(x), ncol = 7)

    for(i in 1:(nrow(x))) {
      words <- x[i, ][str_detect(string = x[i, ], pattern = "^[a-zA-Z]+$")]
      phrase <- ""


      for(j in 1:length(words)){
        phrase <- paste(phrase, words[j], sep = " ")
      }

      phrase <- str_remove(string = phrase, pattern = " ")
      labs <- c(labs, phrase)
      numbers <- x[i, ][str_detect(string = x[i, ],
                                   pattern = "^(\\d{1}|\\d{2}|\\d{3}|\\d{4}|\\d{5}|\\d{6}|\\d{7}|\\d{8}|\\d{9}|\\d{10})$")]
      numbers <- as.numeric(numbers)
      df[i, ] <- numbers
    }

    tab12 <- cbind(df[ , 1], labs, df[ , 2:7])
    tab12 <- tab12[3, ]

    departmentCode <- 5500
    department <- "General Claims"

    categoryCode <- 26
    category <- "Grants"

    tab12 <- data.frame(rbind(c(departmentCode, department, categoryCode, category, tab12)))

    #tab12$department <- str_to_title(string = tab12$department)
    #tab12$category <- str_to_title(string = tab12$category)

    names(tab12) <- c("departmentCode", "department",
                      "categoryCode", "category",
                      "itemCode", "item",
                      "actual_2016_2017",
                      "budget_2017_2018", "outturn_2017_2018",
                      "budget_2018_2019", "projection_2019_2020",
                      "projection_2020_2021")

    tab12[ , 5] <- as.numeric(as.character(tab12[ , 5]))
    tab12[ , 6] <- as.character(tab12[ , 6])
    tab12[ , 7] <- as.numeric(as.character(tab12[ , 7]))
    tab12[ , 8] <- as.numeric(as.character(tab12[ , 8]))
    tab12[ , 9] <- as.numeric(as.character(tab12[ , 9]))
    tab12[ , 10] <- as.numeric(as.character(tab12[ , 10]))
    tab12[ , 11]

    phrase <- str_remove(string = phrase, pattern = " ")
    labs <- c(labs, phrase)
    numbers <- x[i, ][str_detect(string = x[i, ],
                                 pattern = "^(\\d{1}|\\d{2}|\\d{3}|\\d{4}|\\d{5}|\\d{6}|\\d{7}|\\d{8}|\\d{9}|\\d{10})$")]
    #numbers <- as.numeric(numbers)
    df[i, ] <- numbers
  }

  tab7 <- cbind(df[ , 1], labs, df[ , 2:7])
  tab7 <- tab7[c(3, 5:12, 14), ]

  departmentCode <- rep(200, nrow(tab7))
  department <- rep(labs[1], nrow(tab7))

  categoryCode <- c(21, rep(22, 8), 26)
  category <- c(labs[2], rep(labs[4], 8), labs[13])

  tab7 <- data.frame(departmentCode, department, categoryCode, category, tab7)

  tab7$department <- str_to_title(string = tab7$department)
  tab7$category <- str_to_title(string = tab7$category)

  names(tab7) <- c("departmentCode", "department",
                   "categoryCode", "category",
                   "itemCode", "item",
                   "actual_2016_2017",
                   "budget_2017_2018", "outturn_2017_2018",
                   "budget_2018_2019", "projection_2019_2020",
                   "projection_2020_2021")

  tab7[ , 5] <- as.numeric(as.character(tab7[ , 5]))
  tab7[ , 6] <- as.character(tab7[ , 6])
  tab7[ , 7] <- as.numeric(as.character(tab7[ , 7]))
  tab7[ , 8] <- as.numeric(as.character(tab7[ , 8]))
  tab7[ , 9] <- as.numeric(as.character(tab7[ , 9]))
  tab7[ , 10] <- as.numeric(as.character(tab7[ , 10]))
  tab7[ , 11] <- as.numeric(as.character(tab7[ , 11]))
  tab7[ , 12] <- as.numeric(as.character(tab7[ , 12]))

  ################################################################################

  mohHealthPreventive2018 <- tab7
  usethis::use_data(mohHealthPreventive2018, overwrite = TRUE)

  ################################################################################

  tab8 <- health2[[9]][10:30, ]

  tab8[1, 1] <- paste(tab8[1, 1], tab8[2, 1], sep = " ")
  tab8[7, 1] <- paste(tab8[7, 1], tab8[8, 1], sep = " ")
  tab8[9, 1] <- paste(tab8[9, 1], tab8[10, 1], sep = " ")
  tab8[11, 1] <- paste(tab8[11, 1], tab8[12, 1], sep = " ")
  tab8[16, 1] <- paste(tab8[16, 1], tab8[17, 1], sep = " ")

  tab8 <- tab8[c(1, 3:7, 9, 11, 13:16, 18:21), ]

  tab8[ , 1] <- str_replace_all(string = tab8[ , 1], pattern = "-", replacement = " ")
  tab8[ , 1] <- str_replace_all(string = tab8[ , 1], pattern = "[[:punct:]]", replacement = "")
  tab8[ , 2] <- str_replace_all(string = tab8[ , 2], pattern = "[[:punct:]]", replacement = "")
  tab8[ , 3] <- str_replace_all(string = tab8[ , 3], pattern = "[[:punct:]]", replacement = "")
  tab8[ , 4] <- str_replace_all(string = tab8[ , 4], pattern = "[[:punct:]]", replacement = "")
  tab8[ , 5] <- str_replace_all(string = tab8[ , 5], pattern = "[[:punct:]]", replacement = "")
  tab8[ , 6] <- str_replace_all(string = tab8[ , 6], pattern = "[[:punct:]]", replacement = "")

  tab8 <- paste(tab8[ , 1], tab8[ , 2], tab8[ , 3], tab8[ , 4], tab8[ , 5], tab8[ , 6], sep = " ")

  x <- str_split_fixed(string = tab8, pattern = " ", n = 14)

  labs <- NULL
  df <- matrix(data = NA, nrow = nrow(x), ncol = 7)

  for(i in 1:(nrow(x))) {
    words <- x[i, ][str_detect(string = x[i, ], pattern = "^[a-zA-Z]+$")]
    phrase <- ""


    for(j in 1:length(words)){
      phrase <- paste(phrase, words[j], sep = " ")
    }

    phrase <- str_remove(string = phrase, pattern = " ")
    labs <- c(labs, phrase)
    numbers <- x[i, ][str_detect(string = x[i, ],
                                 pattern = "^(\\d{1}|\\d{2}|\\d{3}|\\d{4}|\\d{5}|\\d{6}|\\d{7}|\\d{8}|\\d{9}|\\d{10})$")]
    numbers <- as.numeric(numbers)
    df[i, ] <- numbers
  }

  tab8 <- cbind(df[ , 1], labs, df[ , 2:7])
  tab8 <- tab8[c(3, 5:14, 16), ]

  departmentCode <- rep(400, nrow(tab8))
  department <- rep(labs[1], nrow(tab8))

  categoryCode <- c(21, rep(22, 10), 26)
  category <- c(labs[2], rep(labs[4], 10), labs[15])

  tab8 <- data.frame(departmentCode, department, categoryCode, category, tab8)

  tab8$department <- str_to_title(string = tab8$department)
  tab8$category <- str_to_title(string = tab8$category)

  names(tab8) <- c("departmentCode", "department",
                   "categoryCode", "category",
                   "itemCode", "item",
                   "actual_2016_2017",
                   "budget_2017_2018", "outturn_2017_2018",
                   "budget_2018_2019", "projection_2019_2020",
                   "projection_2020_2021")

  tab8[ , 5] <- as.numeric(as.character(tab8[ , 5]))
  tab8[ , 6] <- as.character(tab8[ , 6])
  tab8[ , 7] <- as.numeric(as.character(tab8[ , 7]))
  tab8[ , 8] <- as.numeric(as.character(tab8[ , 8]))
  tab8[ , 9] <- as.numeric(as.character(tab8[ , 9]))
  tab8[ , 10] <- as.numeric(as.character(tab8[ , 10]))
  tab8[ , 11] <- as.numeric(as.character(tab8[ , 11]))
  tab8[ , 12] <- as.numeric(as.character(tab8[ , 12]))

  ################################################################################

  mohHealthPlanning2018 <- tab8
  usethis::use_data(mohHealthPlanning2018, overwrite = TRUE)

  ################################################################################

  tab9 <- health2[[10]][5:20, ]

  tab9[7, 1] <- paste(tab9[7, 1], tab9[8, 1], sep = " ")
  tab9[9, 1] <- paste(tab9[9, 1], tab9[10, 1], sep = " ")
  tab9[13, 1] <- paste(tab9[13, 1], tab9[14, 1], sep = " ")

  tab9 <- tab9[c(1:7, 9, 11:13, 15:16), ]

  tab9[ , 1] <- str_replace_all(string = tab9[ , 1], pattern = " - ", replacement = " ")
  tab9[ , 1] <- str_replace_all(string = tab9[ , 1], pattern = "-", replacement = " ")
  tab9[ , 1] <- str_replace_all(string = tab9[ , 1], pattern = "–", replacement = " ")
  tab9[ , 1] <- str_replace_all(string = tab9[ , 1], pattern = "[[:punct:]]", replacement = "")
  tab9[ , 2] <- str_replace_all(string = tab9[ , 2], pattern = "[[:punct:]]", replacement = "")
  tab9[ , 3] <- str_replace_all(string = tab9[ , 3], pattern = "[[:punct:]]", replacement = "")
  tab9[ , 4] <- str_replace_all(string = tab9[ , 4], pattern = "[[:punct:]]", replacement = "")
  tab9[ , 5] <- str_replace_all(string = tab9[ , 5], pattern = "[[:punct:]]", replacement = "")
  tab9[ , 6] <- str_replace_all(string = tab9[ , 6], pattern = "[[:punct:]]", replacement = "")

  tab9 <- paste(tab9[ , 1], tab9[ , 2], tab9[ , 3], tab9[ , 4], tab9[ , 5], tab9[ , 6], sep = " ")

  x <- str_split_fixed(string = tab9, pattern = " ", n = 13)

  labs <- NULL
  df <- matrix(data = NA, nrow = nrow(x), ncol = 7)

  for(i in 1:(nrow(x))) {
    words <- x[i, ][str_detect(string = x[i, ], pattern = "^[a-zA-Z]+$")]
    phrase <- ""


    for(j in 1:length(words)){
      phrase <- paste(phrase, words[j], sep = " ")
    }

    phrase <- str_remove(string = phrase, pattern = " ")
    labs <- c(labs, phrase)
    numbers <- x[i, ][str_detect(string = x[i, ],
                                 pattern = "^(\\d{1}|\\d{2}|\\d{3}|\\d{4}|\\d{5}|\\d{6}|\\d{7}|\\d{8}|\\d{9}|\\d{10})$")]
    numbers <- as.numeric(numbers)
    df[i, ] <- numbers
  }

  tab9 <- cbind(df[ , 1], labs, df[ , 2:7])
  tab9 <- tab9[c(3:4, 6:13), ]

  departmentCode <- rep(500, nrow(tab9))
  department <- rep(labs[1], nrow(tab9))

  categoryCode <- c(rep(21, 2), rep(22, 8))
  category <- c(rep(labs[2], 2), rep(labs[5], 8))

  tab9 <- data.frame(departmentCode, department, categoryCode, category, tab9)

  tab9$department <- str_to_title(string = tab9$department)
  tab9$category <- str_to_title(string = tab9$category)

  names(tab9) <- c("departmentCode", "department",
                   "categoryCode", "category",
                   "itemCode", "item",
                   "actual_2016_2017",
                   "budget_2017_2018", "outturn_2017_2018",
                   "budget_2018_2019", "projection_2019_2020",
                   "projection_2020_2021")

  tab9[ , 5] <- as.numeric(as.character(tab9[ , 5]))
  tab9[ , 6] <- as.character(tab9[ , 6])
  tab9[ , 7] <- as.numeric(as.character(tab9[ , 7]))
  tab9[ , 8] <- as.numeric(as.character(tab9[ , 8]))
  tab9[ , 9] <- as.numeric(as.character(tab9[ , 9]))
  tab9[ , 10] <- as.numeric(as.character(tab9[ , 10]))
  tab9[ , 11] <- as.numeric(as.character(tab9[ , 11]))
  tab9[ , 12] <- as.numeric(as.character(tab9[ , 12]))

  ################################################################################

  mohHealthVital2018 <- tab9
  usethis::use_data(mohHealthVital2018, overwrite = TRUE)

  ################################################################################

  tab10 <- rbind(health2[[10]][32:48, ], health2[[11]][5:17, ])

  tab10[1, 1] <- paste(tab10[1, 1], tab10[2, 1], sep = " ")
  tab10[9, 1] <- paste(tab10[9, 1], tab10[10, 1], sep = " ")
  tab10[11, 1] <- paste(tab10[11, 1], tab10[12, 1], sep = " ")
  tab10[14, 1] <- paste(tab10[14, 1], tab10[15, 1], sep = " ")
  tab10[21, 1] <- paste(tab10[21, 1], tab10[22, 1], sep = " ")

  tab10 <- tab10[c(1, 3:9, 11, 13:14, 16:21, 23:30), ]

  tab10[ , 1] <- str_replace_all(string = tab10[ , 1], pattern = " - ", replacement = " ")
  tab10[ , 1] <- str_replace_all(string = tab10[ , 1], pattern = "-", replacement = " ")
  tab10[ , 1] <- str_replace_all(string = tab10[ , 1], pattern = "–", replacement = " ")
  tab10[ , 1] <- str_replace_all(string = tab10[ , 1], pattern = "[[:punct:]]", replacement = "")
  tab10[ , 2] <- str_replace_all(string = tab10[ , 2], pattern = "[[:punct:]]", replacement = "")
  tab10[ , 3] <- str_replace_all(string = tab10[ , 3], pattern = "[[:punct:]]", replacement = "")
  tab10[ , 4] <- str_replace_all(string = tab10[ , 4], pattern = "[[:punct:]]", replacement = "")
  tab10[ , 5] <- str_replace_all(string = tab10[ , 5], pattern = "[[:punct:]]", replacement = "")
  tab10[ , 6] <- str_replace_all(string = tab10[ , 6], pattern = "[[:punct:]]", replacement = "")

  tab10 <- paste(tab10[ , 1], tab10[ , 2], tab10[ , 3], tab10[ , 4], tab10[ , 5], tab10[ , 6], sep = " ")

  x <- str_split_fixed(string = tab10, pattern = " ", n = 14)

  labs <- NULL
  df <- matrix(data = NA, nrow = nrow(x), ncol = 7)

  for(i in 1:(nrow(x))) {
    words <- x[i, ][str_detect(string = x[i, ], pattern = "^[a-zA-Z]+$")]
    phrase <- ""


    for(j in 1:length(words)){
      phrase <- paste(phrase, words[j], sep = " ")
    }

    phrase <- str_remove(string = phrase, pattern = " ")
    labs <- c(labs, phrase)
    numbers <- x[i, ][str_detect(string = x[i, ],
                                 pattern = "^(\\d{1}|\\d{2}|\\d{3}|\\d{4}|\\d{5}|\\d{6}|\\d{7}|\\d{8}|\\d{9}|\\d{10})$")]
    numbers <- as.numeric(numbers)
    df[i, ] <- numbers
  }

  tab10 <- cbind(df[ , 1], labs, df[ , 2:7])
  tab10 <- tab10[c(3:5, 7:23, 25), ]

  departmentCode <- rep(600, nrow(tab10))
  department <- rep(labs[1], nrow(tab10))

  categoryCode <- c(rep(21, 3), rep(22, 17), 31)
  category <- c(rep(labs[2], 3), rep(labs[6], 17), labs[24])

  tab10 <- data.frame(departmentCode, department, categoryCode, category, tab10)

  tab10$department <- str_to_title(string = tab10$department)
  tab10$category <- str_to_title(string = tab10$category)

  names(tab10) <- c("departmentCode", "department",
                    "categoryCode", "category",
                    "itemCode", "item",
                    "actual_2016_2017",
                    "budget_2017_2018", "outturn_2017_2018",
                    "budget_2018_2019", "projection_2019_2020",
                    "projection_2020_2021")

  tab10[ , 5] <- as.numeric(as.character(tab10[ , 5]))
  tab10[ , 6] <- as.character(tab10[ , 6])
  tab10[ , 7] <- as.numeric(as.character(tab10[ , 7]))
  tab10[ , 8] <- as.numeric(as.character(tab10[ , 8]))
  tab10[ , 9] <- as.numeric(as.character(tab10[ , 9]))
  tab10[ , 10] <- as.numeric(as.character(tab10[ , 10]))
  tab10[ , 11] <- as.numeric(as.character(tab10[ , 11]))
  tab10[ , 12] <- as.numeric(as.character(tab10[ , 12]))

  ################################################################################

  mohHealthAdmin2018 <- tab10
  usethis::use_data(mohHealthAdmin2018, overwrite = TRUE)

  ################################################################################

  tab11 <- rbind(health2[[11]][27:30, ], health2[[12]][c(5:10, 20:27, 37:45), ], health2[[13]][5:8, ])

  tab11[7, 1] <- paste(tab11[7, 1], tab11[8, 1], sep = " ")
  tab11[9, 1] <- paste(tab11[9, 1], tab11[10, 1], sep = " ")
  tab11[15, 1] <- paste(tab11[15, 1], tab11[16, 1], sep = " ")
  tab11[24, 1] <- paste(tab11[24, 1], tab11[25, 1], sep = " ")
  tab11[28, 1] <- paste(tab11[28, 1], tab11[29, 1], sep = " ")

  tab11 <- tab11[c(1:7, 9, 11:15, 17:24, 26:28, 30:31), ]

  tab11[ , 1] <- str_replace_all(string = tab11[ , 1], pattern = " - ", replacement = " ")
  tab11[ , 1] <- str_replace_all(string = tab11[ , 1], pattern = "-", replacement = " ")
  tab11[ , 1] <- str_replace_all(string = tab11[ , 1], pattern = "–", replacement = " ")
  tab11[ , 1] <- str_replace_all(string = tab11[ , 1], pattern = "[[:punct:]]", replacement = "")
  tab11[ , 2] <- str_replace_all(string = tab11[ , 2], pattern = "[[:punct:]]", replacement = "")
  tab11[ , 3] <- str_replace_all(string = tab11[ , 3], pattern = "[[:punct:]]", replacement = "")
  tab11[ , 4] <- str_replace_all(string = tab11[ , 4], pattern = "[[:punct:]]", replacement = "")
  tab11[ , 5] <- str_replace_all(string = tab11[ , 5], pattern = "[[:punct:]]", replacement = "")
  tab11[ , 6] <- str_replace_all(string = tab11[ , 6], pattern = "[[:punct:]]", replacement = "")

  tab11 <- paste(tab11[ , 1], tab11[ , 2], tab11[ , 3], tab11[ , 4], tab11[ , 5], tab11[ , 6], sep = " ")

  x <- str_split_fixed(string = tab11, pattern = " ", n = 14)

  labs <- NULL
  df <- matrix(data = NA, nrow = nrow(x), ncol = 7)

  for(i in 1:(nrow(x))) {
    words <- x[i, ][str_detect(string = x[i, ], pattern = "^[a-zA-Z]+$")]
    phrase <- ""


    for(j in 1:length(words)){
      phrase <- paste(phrase, words[j], sep = " ")
    }

    phrase <- str_remove(string = phrase, pattern = " ")
    labs <- c(labs, phrase)
    numbers <- x[i, ][str_detect(string = x[i, ],
                                 pattern = "^(\\d{1}|\\d{2}|\\d{3}|\\d{4}|\\d{5}|\\d{6}|\\d{7}|\\d{8}|\\d{9}|\\d{10})$")]
    numbers <- as.numeric(numbers)
    df[i, ] <- numbers
  }

  tab11 <- cbind(df[ , 1], labs, df[ , 2:7])
  tab11 <- tab11[c(3:4, 7:8, 11:15, 18:26), ]

  departmentCode <- c(rep(702, 2), rep(704, 2), rep(709, 5), rep(712, 9))
  department <- c(rep(labs[1], 2), rep(labs[5], 2), rep(labs[9], 5), rep(labs[16], 9))

  categoryCode <- c(rep(26, 2), rep(26, 2), rep(26, 5), rep(26, 9))
  category <- c(rep(labs[2], 2), rep(labs[6], 2), rep(labs[10], 5), rep(labs[17], 9))

  tab11 <- data.frame(departmentCode, department, categoryCode, category, tab11)

  tab11$department <- str_to_title(string = tab11$department)
  tab11$category <- str_to_title(string = tab11$category)

  names(tab11) <- c("departmentCode", "department",
                    "categoryCode", "category",
                    "itemCode", "item",
                    "actual_2016_2017",
                    "budget_2017_2018", "outturn_2017_2018",
                    "budget_2018_2019", "projection_2019_2020",
                    "projection_2020_2021")

  tab11[ , 5] <- as.numeric(as.character(tab11[ , 5]))
  tab11[ , 6] <- as.character(tab11[ , 6])
  tab11[ , 7] <- as.numeric(as.character(tab11[ , 7]))
  tab11[ , 8] <- as.numeric(as.character(tab11[ , 8]))
  tab11[ , 9] <- as.numeric(as.character(tab11[ , 9]))
  tab11[ , 10] <- as.numeric(as.character(tab11[ , 10]))
  tab11[ , 11] <- as.numeric(as.character(tab11[ , 11]))
  tab11[ , 12] <- as.numeric(as.character(tab11[ , 12]))

  ################################################################################

  mohHealthCountyAdd2018 <- tab11
  usethis::use_data(mohHealthCountyAdd2018, overwrite = TRUE)

  ################################################################################

  tab12 <- health2[[13]][18:20, ]

  tab12[ , 1] <- str_replace_all(string = tab12[ , 1], pattern = " - ", replacement = " ")
  tab12[ , 1] <- str_replace_all(string = tab12[ , 1], pattern = "-", replacement = " ")
  tab12[ , 1] <- str_replace_all(string = tab12[ , 1], pattern = "–", replacement = " ")
  tab12[ , 1] <- str_replace_all(string = tab12[ , 1], pattern = "[[:punct:]]", replacement = "")
  tab12[ , 2] <- str_replace_all(string = tab12[ , 2], pattern = "[[:punct:]]", replacement = "")
  tab12[ , 3] <- str_replace_all(string = tab12[ , 3], pattern = "[[:punct:]]", replacement = "")
  tab12[ , 4] <- str_replace_all(string = tab12[ , 4], pattern = "[[:punct:]]", replacement = "")
  tab12[ , 5] <- str_replace_all(string = tab12[ , 5], pattern = "[[:punct:]]", replacement = "")
  tab12[ , 6] <- str_replace_all(string = tab12[ , 6], pattern = "[[:punct:]]", replacement = "")

  tab12 <- paste(tab12[ , 1], tab12[ , 2], tab12[ , 3], tab12[ , 4], tab12[ , 5], tab12[ , 6], sep = " ")

  x <- str_split_fixed(string = tab12, pattern = " ", n = 12)

  labs <- NULL
  df <- matrix(data = NA, nrow = nrow(x), ncol = 7)

  for(i in 1:(nrow(x))) {
    words <- x[i, ][str_detect(string = x[i, ], pattern = "^[a-zA-Z]+$")]
    phrase <- ""


    for(j in 1:length(words)){
      phrase <- paste(phrase, words[j], sep = " ")
    }

    phrase <- str_remove(string = phrase, pattern = " ")
    labs <- c(labs, phrase)
    numbers <- x[i, ][str_detect(string = x[i, ],
                                 pattern = "^(\\d{1}|\\d{2}|\\d{3}|\\d{4}|\\d{5}|\\d{6}|\\d{7}|\\d{8}|\\d{9}|\\d{10})$")]
    numbers <- as.numeric(numbers)
    df[i, ] <- numbers
  }

  tab12 <- cbind(df[ , 1], labs, df[ , 2:7])
  tab12 <- tab12[3, ]

  departmentCode <- 5500
  department <- "General Claims"

  categoryCode <- 26
  category <- "Grants"

  tab12 <- data.frame(rbind(c(departmentCode, department, categoryCode, category, tab12)))

  #tab12$department <- str_to_title(string = tab12$department)
  #tab12$category <- str_to_title(string = tab12$category)

  names(tab12) <- c("departmentCode", "department",
                    "categoryCode", "category",
                    "itemCode", "item",
                    "actual_2016_2017",
                    "budget_2017_2018", "outturn_2017_2018",
                    "budget_2018_2019", "projection_2019_2020",
                    "projection_2020_2021")

  tab12[ , 5] <- as.numeric(as.character(tab12[ , 5]))
  tab12[ , 6] <- as.character(tab12[ , 6])
  tab12[ , 7] <- as.numeric(as.character(tab12[ , 7]))
  tab12[ , 8] <- as.numeric(as.character(tab12[ , 8]))
  tab12[ , 9] <- as.numeric(as.character(tab12[ , 9]))
  tab12[ , 10] <- as.numeric(as.character(tab12[ , 10]))
  tab12[ , 11]
  numbers <- x[i, ][str_detect(string = x[i, ],
                               pattern = "^(\\d{1}|\\d{2}|\\d{3}|\\d{4}|\\d{5}|\\d{6}|\\d{7}|\\d{8}|\\d{9}|\\d{10})$")]
  numbers <- as.numeric(numbers)
  df[i, ] <- numbers
}

tab11 <- cbind(df[ , 1], labs, df[ , 2:7])
tab11 <- tab11[c(3:4, 7:8, 11:15, 18:26), ]

departmentCode <- c(rep(702, 2), rep(704, 2), rep(709, 5), rep(712, 9))
department <- c(rep(labs[1], 2), rep(labs[5], 2), rep(labs[9], 5), rep(labs[16], 9))

categoryCode <- c(rep(26, 2), rep(26, 2), rep(26, 5), rep(26, 9))
category <- c(rep(labs[2], 2), rep(labs[6], 2), rep(labs[10], 5), rep(labs[17], 9))

tab11 <- data.frame(departmentCode, department, categoryCode, category, tab11)

tab11$department <- str_to_title(string = tab11$department)
tab11$category <- str_to_title(string = tab11$category)

names(tab11) <- c("departmentCode", "department",
                  "categoryCode", "category",
                  "itemCode", "item",
                  "actual_2016_2017",
                  "budget_2017_2018", "outturn_2017_2018",
                  "budget_2018_2019", "projection_2019_2020",
                  "projection_2020_2021")

tab11[ , 5] <- as.numeric(as.character(tab11[ , 5]))
tab11[ , 6] <- as.character(tab11[ , 6])
tab11[ , 7] <- as.numeric(as.character(tab11[ , 7]))
tab11[ , 8] <- as.numeric(as.character(tab11[ , 8]))
tab11[ , 9] <- as.numeric(as.character(tab11[ , 9]))
tab11[ , 10] <- as.numeric(as.character(tab11[ , 10]))
tab11[ , 11] <- as.numeric(as.character(tab11[ , 11]))
tab11[ , 12] <- as.numeric(as.character(tab11[ , 12]))

################################################################################

mohHealthCountyAdd2018 <- tab11
usethis::use_data(mohHealthCountyAdd2018, overwrite = TRUE)

################################################################################

tab12 <- health2[[13]][18:20, ]

tab12[ , 1] <- str_replace_all(string = tab12[ , 1], pattern = " - ", replacement = " ")
tab12[ , 1] <- str_replace_all(string = tab12[ , 1], pattern = "-", replacement = " ")
tab12[ , 1] <- str_replace_all(string = tab12[ , 1], pattern = "–", replacement = " ")
tab12[ , 1] <- str_replace_all(string = tab12[ , 1], pattern = "[[:punct:]]", replacement = "")
tab12[ , 2] <- str_replace_all(string = tab12[ , 2], pattern = "[[:punct:]]", replacement = "")
tab12[ , 3] <- str_replace_all(string = tab12[ , 3], pattern = "[[:punct:]]", replacement = "")
tab12[ , 4] <- str_replace_all(string = tab12[ , 4], pattern = "[[:punct:]]", replacement = "")
tab12[ , 5] <- str_replace_all(string = tab12[ , 5], pattern = "[[:punct:]]", replacement = "")
tab12[ , 6] <- str_replace_all(string = tab12[ , 6], pattern = "[[:punct:]]", replacement = "")

tab12 <- paste(tab12[ , 1], tab12[ , 2], tab12[ , 3], tab12[ , 4], tab12[ , 5], tab12[ , 6], sep = " ")

x <- str_split_fixed(string = tab12, pattern = " ", n = 12)

labs <- NULL
df <- matrix(data = NA, nrow = nrow(x), ncol = 7)

for(i in 1:(nrow(x))) {
  words <- x[i, ][str_detect(string = x[i, ], pattern = "^[a-zA-Z]+$")]
  phrase <- ""


  for(j in 1:length(words)){
    phrase <- paste(phrase, words[j], sep = " ")
  }

  phrase <- str_remove(string = phrase, pattern = " ")
  labs <- c(labs, phrase)
  numbers <- x[i, ][str_detect(string = x[i, ],
                               pattern = "^(\\d{1}|\\d{2}|\\d{3}|\\d{4}|\\d{5}|\\d{6}|\\d{7}|\\d{8}|\\d{9}|\\d{10})$")]
  numbers <- as.numeric(numbers)
  df[i, ] <- numbers
}

tab12 <- cbind(df[ , 1], labs, df[ , 2:7])
tab12 <- tab12[3, ]

departmentCode <- 5500
department <- "General Claims"

categoryCode <- 26
category <- "Grants"

tab12 <- data.frame(rbind(c(departmentCode, department, categoryCode, category, tab12)))

#tab12$department <- str_to_title(string = tab12$department)
#tab12$category <- str_to_title(string = tab12$category)

names(tab12) <- c("departmentCode", "department",
                  "categoryCode", "category",
                  "itemCode", "item",
                  "actual_2016_2017",
                  "budget_2017_2018", "outturn_2017_2018",
                  "budget_2018_2019", "projection_2019_2020",
                  "projection_2020_2021")

tab12[ , 5] <- as.numeric(as.character(tab12[ , 5]))
tab12[ , 6] <- as.character(tab12[ , 6])
tab12[ , 7] <- as.numeric(as.character(tab12[ , 7]))
tab12[ , 8] <- as.numeric(as.character(tab12[ , 8]))
tab12[ , 9] <- as.numeric(as.character(tab12[ , 9]))
tab12[ , 10] <- as.numeric(as.character(tab12[ , 10]))
tab12[ , 11] <- as.numeric(as.character(tab12[ , 11]))
tab12[ , 12] <- as.numeric(as.character(tab12[ , 12]))

################################################################################

mohHealthClaims2018 <- tab12
usethis::use_data(mohHealthClaims2018, overwrite = TRUE)

################################################################################

mohHealthDepartment2018 <- data.frame(rbind(tab6, tab7, tab8, tab9, tab10, tab12))
usethis::use_data(mohHealthDepartment2018, overwrite = TRUE)

################################################################################
validmeasures/liberiaNutriBudget documentation built on June 4, 2019, 5:45 p.m.