data-raw/processData2017.R

################################################################################
#
# Load required libraries
#
################################################################################

library(pdftools)
library(tm)
library(tabulizer)
library(stringr)
library(tidyverse)
library(tidytext)


################################################################################
#
# Function to process tables
#
################################################################################

get_table <- function(tab, nrow = NULL, ncol = NULL) {
  labs <- NULL
  df <- matrix(data = NA, nrow = nrow, ncol = ncol)

  for(i in 1:(length(tab))) {
    words <- tab[[i]][str_detect(string = tab[[i]], pattern = "^[a-zA-Z]+$")]
    phrase <- ""

    for(j in 1:length(words)){
      phrase <- paste(phrase, words[j], sep = " ")
    }

    phrase <- str_remove(string = phrase, pattern = " ")
    labs <- c(labs, phrase)
    numbers <- tab[[i]][str_detect(string = tab[[i]],
                                   pattern = "^(\\d{6}|\\d{7}|\\d{8})$")]
    numbers <- as.numeric(numbers)
    df[i, ] <- numbers
  }
}



################################################################################
#
# Create list for information and tables in page 249 (2017)
#
################################################################################

## Extract tables from page 249
health1 <- extract_tables(file = "data-raw/budget/2017 Ministry of Finance and Development Planning.pdf",
                          pages = 249,
                          method = "decide")

goal <- health1[[1]][3, ]
strategicObjective <- str_c(health1[[1]][5, ], health1[[1]][6, ], health1[[1]][7, ], sep = " ")

## Extract first table in page 255
tab1 <- health1[[1]][15:20, ]
tab1 <- str_replace_all(string = tab1,  pattern = "[[:punct:]]", replace = "")
tab1 <- str_split(string = tab1, pattern = " ")

## Extract relevant fields to re-create first table in page 255 and create data.frame
economic_classification <- NULL
df <- matrix(data = NA, nrow = 6, ncol = 6)

for(i in 1:(length(tab1))) {
  words <- tab1[[i]][str_detect(string = tab1[[i]], pattern = "^[a-zA-Z]+$")]
  phrase <- ""

  for(j in 1:length(words)){
    phrase <- paste(phrase, words[j], sep = " ")
  }

  phrase <- str_remove(string = phrase, pattern = " ")
  economic_classification <- c(economic_classification, phrase)
  numbers <- tab1[[i]][str_detect(string = tab1[[i]],
                                  pattern = "^(\\d{6}|\\d{7}|\\d{8}|\\d{9}|\\d{10})$")]
  numbers <- as.numeric(numbers)
  df[i, ] <- numbers
}

df <- data.frame(economic_classification, df)
names(df) <- c("economic_classification",
               "actual_2014_2015",
               "budget_2015_2016", "actual_2015_2016",
               "budget_2016_2017", "projection_2017_2018",
               "projection_2018_2019")

df$economic_classification <- str_to_sentence(df$economic_classification)
df <- data.frame(code = c(21, 22, 23, 25, 26, 31), df)

summaryEconHealth2017 <- df

## Extract second table in page 255
tab2 <- health1[[1]][26:39, ]
tab2 <- str_replace_all(string = tab2,  pattern = "[[:punct:]]", replace = "")
tab2[9] <- paste(tab2[9], tab2[10], sep = " ")
tab2[13] <- paste(tab2[13], tab2[14], sep = " ")

tab2 <- tab2[c(1:9, 11:13)]
tab2 <- str_split_fixed(string = tab2, pattern = " ", n = 14)

## Extract relevant fields to re-create first table in page 255 and create data.frame
spending_entity <- NULL
df <- matrix(data = NA, nrow = 12, ncol = 6)

for(i in 1:nrow(tab2)) {
  words <- tab2[i, ][str_detect(string = tab2[i, ], pattern = "^[a-zA-Z]+$")]
  phrase <- ""

  for(j in 1:length(words)){
    phrase <- paste(phrase, words[j], sep = " ")
  }

  phrase <- str_remove(string = phrase, pattern = " ")
  spending_entity <- c(spending_entity, phrase)
  numbers <- tab2[i, ][str_detect(string = tab2[i, ],
                                  pattern = "^(\\d{1}|\\d{4}|\\d{5}|\\d{6}|\\d{7}|\\d{8}|\\d{9}|\\d{10})$")]
  numbers <- as.numeric(numbers)
  df[i, ] <- numbers
}

df <- data.frame(spending_entity, df)
names(df) <- c("spending_entity",
               "actual_2014_2015",
               "budget_2015_2016", "actual_2015_2016",
               "budget_2016_2017", "projection_2017_2018",
               "projection_2018_2019")

df$spending_entity <- str_to_title(df$spending_entity)
df <- data.frame(code = c(310, 311, 312, 313, 336, 337, 338, 339, 434, 435, 436, 439), df)

summarySpendingHealth2017 <- df

summaryHealth2017 <- list(goal, strategicObjective, summaryEconHealth2017, summarySpendingHealth2017)

names(summaryHealth2017) <- c("goal", "strategicObjective", "summaryEconHealth2017", "summarySpendingHealth2017")

usethis::use_data(summaryHealth2017, overwrite = TRUE)

## Tidy-up
rm(words, phrase, numbers, economic_classification, tab1, tab2, df,
   summaryEconHealth2017, summarySpendingHealth2017, health1)

################################################################################
#
#
#
################################################################################

## Extract tables from page 250-265
health2 <- extract_tables(file = "data-raw/budget/2017 Ministry of Finance and Development Planning.pdf",
                          pages = 250:265,
                          method = "decide")

tab1 <- health2[[1]][24:27, ]

tab1[ , 1] <- str_replace_all(string = tab1[ , 1], pattern = "-", replacement = " ")
tab1[ , 1] <- str_replace_all(string = tab1[ , 1], pattern = "[[:punct:]]", replacement = "")
tab1[ , 2] <- str_replace_all(string = tab1[ , 2], pattern = "[[:punct:]]", replacement = "")

tab1 <- cbind(str_split_fixed(string = tab1[ , 1], pattern = " ", n = 7),
              str_split_fixed(string = tab1[ , 2], pattern = " ", n = 6))

tab1[1, 9:13] <- tab1[1, 8:12]
tab1[1, 8] <- tab1[1, 5]
tab1[1, 5] <- ""
tab1[2, 8:13] <- tab1[2, 7:12]
tab1[2, 7] <- ""
tab1[4, 9:13] <- tab1[4, 8:12]
tab1[4, 8] <- tab1[4, 3]
tab1[4, 3] <- ""

x <- tab1

labs <- NULL

for(i in 1:nrow(x)) {
  words <- x[i, ][str_detect(string = x[i, ], pattern = "^[a-zA-Z]+$")]
  phrase <- ""

  for(j in 1:length(words)){
    phrase <- paste(phrase, words[j], sep = " ")
  }

  phrase <- str_remove(string = phrase, pattern = " ")
  labs <- c(labs, phrase)
}

categoryCode <- c(21, 22, 25, 26)

tab1 <- data.frame(categoryCode, labs, tab1[ , 8:13])

names(tab1) <- c("categoryCode", "category",
                 "actual_2015_2016",
                 "budget_2016_2017", "outturn_2016_2017",
                 "budget_2017_2018",
                 "projection_2018_2019", "projection_2019_2020")

tab1[ , 2] <- as.character(tab1[ , 2])
tab1[ , 3] <- as.numeric(as.character(tab1[ , 3]))
tab1[ , 4] <- as.numeric(as.character(tab1[ , 4]))
tab1[ , 5] <- as.numeric(as.character(tab1[ , 5]))
tab1[ , 6] <- as.numeric(as.character(tab1[ , 6]))
tab1[ , 7] <- as.numeric(as.character(tab1[ , 7]))
tab1[ , 8] <- as.numeric(as.character(tab1[ , 8]))

################################################################################

tab2 <- health2[[1]][33:43, ]

tab2[ , 1] <- str_replace_all(string = tab2[ , 1], pattern = "-", replacement = " ")
tab2[ , 1] <- str_replace_all(string = tab2[ , 1], pattern = "[[:punct:]]", replacement = "")
tab2[ , 2] <- str_replace_all(string = tab2[ , 2], pattern = "[[:punct:]]", replacement = "")

tab2 <- cbind(str_split_fixed(string = tab2[ , 1], pattern = " ", n = 7),
              str_split_fixed(string = tab2[ , 2], pattern = " ", n = 6))

tab2[1, 9:13] <- tab2[1, 8:12]
tab2[1, 8] <- tab2[1, 5]
tab2[1, 5] <- ""

tab2[6, 9:13] <- tab2[6, 8:12]
tab2[6, 8] <- tab2[6, 6]
tab2[6, 6] <- ""

x <- tab2

labs <- NULL

for(i in 1:nrow(x)) {
  words <- x[i, ][str_detect(string = x[i, ], pattern = "^[a-zA-Z]+$")]
  phrase <- ""

  for(j in 1:length(words)){
    phrase <- paste(phrase, words[j], sep = " ")
  }

  phrase <- str_remove(string = phrase, pattern = " ")
  labs <- c(labs, phrase)
}

df <- data.frame(tab2[ , 1], labs, tab2[ , c(8:13)])
names(df) <- c("departmentCode", "department",
               "actual_2015_2016",
               "budget_2016_2017", "outturn_2016_2017",
               "budget_2017_2018",
               "projection_2018_2019", "projection_2019_2020")

tab2 <- df

tab2[ , 1] <- as.numeric(as.character(tab2[ , 1]))
tab2[ , 2] <- as.character(tab2[ , 2])
tab2[ , 3] <- as.numeric(as.character(tab2[ , 3]))
tab2[ , 4] <- as.numeric(as.character(tab2[ , 4]))
tab2[ , 5] <- as.numeric(as.character(tab2[ , 5]))
tab2[ , 6] <- as.numeric(as.character(tab2[ , 6]))
tab2[ , 7] <- as.numeric(as.character(tab2[ , 7]))
tab2[ , 8] <- as.numeric(as.character(tab2[ , 8]))

################################################################################

tab3 <- health2[[2]][11:49, ]
tab3[7, 1] <- paste(tab3[7, 1], tab3[8, 1], sep = " ")
tab3[11, 1] <- paste(tab3[11, 1], tab3[12, 1], sep = " ")
tab3[19, 1] <- paste(tab3[19, 1], tab3[20, 1], sep = " ")
tab3[27, 1] <- paste(tab3[27, 1], tab3[28, 1], sep = " ")
tab3 <- tab3[c(1:7, 9:11, 13:19, 21:27, 29:39), ]

tab3[ , 1] <- str_replace_all(string = tab3[ , 1], pattern = "-", replacement = " ")
tab3[ , 2] <- str_replace_all(string = tab3[ , 2], pattern = "[[:punct:]]", replacement = "")
tab3[ , 3] <- str_replace_all(string = tab3[ , 3], pattern = "[[:punct:]]", replacement = "")
tab3[ , 4] <- str_replace_all(string = tab3[ , 4], pattern = "[[:punct:]]", replacement = "")
tab3[ , 5] <- str_replace_all(string = tab3[ , 5], pattern = "[[:punct:]]", replacement = "")
tab3[ , 6] <- str_replace_all(string = tab3[ , 6], pattern = "[[:punct:]]", replacement = "")

x <- str_split_fixed(string = tab3[ , 1], pattern = " ", n = 8)
y <- str_split_fixed(string = tab3[ , 6], pattern = " ", n = 2)

tab3 <- cbind(tab3[ , 1:5], y)

labs <- NULL
df <- matrix(data = NA, nrow = nrow(x), ncol = 2)

for(i in 1:(nrow(x))) {
  words <- x[i, ][str_detect(string = x[i, ], pattern = "^[a-zA-Z]+$")]
  phrase <- ""


  for(j in 1:length(words)){
    phrase <- paste(phrase, words[j], sep = " ")
  }

  phrase <- str_remove(string = phrase, pattern = " ")
  labs <- c(labs, phrase)
  numbers <- x[i, ][str_detect(string = x[i, ],
                                  pattern = "^(\\d{1}|\\d{2}|\\d{3}|\\d{4}|\\d{5}|\\d{6}|\\d{7}|\\d{8}|\\d{9}|\\d{10})$")]
  numbers <- as.numeric(numbers)
  df[i, ] <- numbers
}

tab3 <- data.frame(df[ , 1], labs, df[, 2], tab3[ , 3:7])
tab3 <- tab3[c(2:4, 6:27, 29:35), ]

categoryCode <- c(rep(21, 3), rep(22, 22), rep(25, 7))
category <- c(rep(labs[1], 3), rep(labs[5], 22), rep(labs[28], 7))

tab3 <- data.frame(categoryCode, category, tab3)

tab3$category <- str_to_title(tab3$category)

names(tab3) <- c("categoryCode", "category", "itemCode", "item",
                 "actual_2015_2016",
                 "budget_2016_2017", "outturn_2016_2017",
                 "budget_2017_2018",
                 "projection_2018_2019", "projection_2019_2020")

tab3[ , 1] <- as.numeric(as.character(tab3[ , 1]))
tab3[ , 2] <- as.character(tab3[ , 2])
tab3[ , 3] <- as.numeric(as.character(tab3[ , 3]))
tab3[ , 4] <- as.character(tab3[ , 4])
tab3[ , 5] <- as.numeric(as.character(tab3[ , 5]))
tab3[ , 6] <- as.numeric(as.character(tab3[ , 6]))
tab3[ , 7] <- as.numeric(as.character(tab3[ , 7]))
tab3[ , 8] <- as.numeric(as.character(tab3[ , 8]))
tab3[ , 9] <- as.numeric(as.character(tab3[ , 9]))
tab3[ , 10] <- as.numeric(as.character(tab3[ , 10]))

################################################################################

tab4 <- health2[[3]][6:50, ]
tab4[3, 1] <- paste(tab4[3, 1], tab4[4, 1], sep = " ")
tab4[9, 1] <- paste(tab4[9, 1], tab4[10, 1], sep = " ")
tab4[25, 1] <- paste(tab4[25, 1], tab4[26, 1], sep = " ")
tab4[30, 1] <- paste(tab4[30, 1], tab4[31, 1], sep = " ")
tab4[38, 1] <- paste(tab4[38, 1], tab4[39, 1], sep = " ")
tab4[42, 1] <- paste(tab4[42, 1], tab4[43, 1], sep = " ")

tab4 <- tab4[c(1:3, 5:9, 11:25, 27:30, 32:38, 40:42, 44:45), ]

tab4[ , 1] <- str_replace_all(string = tab4[ , 1], pattern = "-", replacement = " ")
tab4[ , 1] <- str_replace_all(string = tab4[ , 1], pattern = "[[:punct:]]", replacement = "")
tab4[ , 2] <- str_replace_all(string = tab4[ , 2], pattern = "[[:punct:]]", replacement = "")
tab4[ , 3] <- str_replace_all(string = tab4[ , 3], pattern = "[[:punct:]]", replacement = "")
tab4[ , 4] <- str_replace_all(string = tab4[ , 4], pattern = "[[:punct:]]", replacement = "")
tab4[ , 5] <- str_replace_all(string = tab4[ , 5], pattern = "[[:punct:]]", replacement = "")
tab4[ , 6] <- str_replace_all(string = tab4[ , 6], pattern = "[[:punct:]]", replacement = "")

x <- str_split_fixed(string = tab4[ , 1], pattern = " ", n = 8)
y <- str_split_fixed(string = tab4[ , 6], pattern = " ", n = 2)

tab4 <- cbind(tab4[ , 1:5], y)

labs <- NULL
df <- matrix(data = NA, nrow = nrow(x), ncol = 2)

for(i in 1:(nrow(x))) {
  words <- x[i, ][str_detect(string = x[i, ], pattern = "^[a-zA-Z]+$")]
  phrase <- ""


  for(j in 1:length(words)){
    phrase <- paste(phrase, words[j], sep = " ")
  }

  phrase <- str_remove(string = phrase, pattern = " ")
  labs <- c(labs, phrase)
  numbers <- x[i, ][str_detect(string = x[i, ],
                               pattern = "^(\\d{1}|\\d{2}|\\d{3}|\\d{4}|\\d{5}|\\d{6}|\\d{7}|\\d{8}|\\d{9}|\\d{10})$")]
  numbers <- as.numeric(numbers)
  df[i, ] <- numbers
}

tab4 <- data.frame(df[ , 1], labs, df[, 2], tab4[ , 3:7])
tab4 <- tab4[c(1, 3:39), ]

categoryCode <- c(25, rep(26, 37))
category <- c("Subsidy", rep(labs[2], 37))

tab4 <- data.frame(categoryCode, category, tab4)

tab4$category <- str_to_title(tab4$category)

names(tab4) <- c("categoryCode", "category", "itemCode", "item",
                 "actual_2015_2016",
                 "budget_2016_2017", "outturn_2016_2017",
                 "budget_2017_2018",
                 "projection_2018_2019", "projection_2019_2020")

tab4[ , 1] <- as.numeric(as.character(tab4[ , 1]))
tab4[ , 2] <- as.character(tab4[ , 2])
tab4[ , 3] <- as.numeric(as.character(tab4[ , 3]))
tab4[ , 4] <- as.character(tab4[ , 4])
tab4[ , 5] <- as.numeric(as.character(tab4[ , 5]))
tab4[ , 6] <- as.numeric(as.character(tab4[ , 6]))
tab4[ , 7] <- as.numeric(as.character(tab4[ , 7]))
tab4[ , 8] <- as.numeric(as.character(tab4[ , 8]))
tab4[ , 9] <- as.numeric(as.character(tab4[ , 9]))
tab4[ , 10] <- as.numeric(as.character(tab4[ , 10]))

################################################################################

tab5 <- health2[[4]][6:50, ]
tab5[6, 1] <- paste(tab5[6, 1], tab5[7, 1], sep = " ")
tab5[18, 1] <- paste(tab5[18, 1], tab5[19, 1], sep = " ")
tab5[24, 1] <- paste(tab5[24, 1], tab5[25, 1], sep = " ")
tab5[37, 1] <- paste(tab5[37, 1], tab5[38, 1], sep = " ")
tab5[43, 1] <- paste(tab5[43, 1], tab5[44, 1], sep = " ")

tab5 <- tab5[c(1:6, 8:18, 20:24, 26:37, 39:43, 45), ]

tab5[ , 1] <- str_replace_all(string = tab5[ , 1], pattern = "-", replacement = " ")
tab5[ , 1] <- str_replace_all(string = tab5[ , 1], pattern = "[[:punct:]]", replacement = "")
tab5[ , 2] <- str_replace_all(string = tab5[ , 2], pattern = "[[:punct:]]", replacement = "")
tab5[ , 3] <- str_replace_all(string = tab5[ , 3], pattern = "[[:punct:]]", replacement = "")
tab5[ , 4] <- str_replace_all(string = tab5[ , 4], pattern = "[[:punct:]]", replacement = "")
tab5[ , 5] <- str_replace_all(string = tab5[ , 5], pattern = "[[:punct:]]", replacement = "")
tab5[ , 6] <- str_replace_all(string = tab5[ , 6], pattern = "[[:punct:]]", replacement = "")

x <- str_split_fixed(string = tab5[ , 1], pattern = " ", n = 8)
y <- str_split_fixed(string = tab5[ , 6], pattern = " ", n = 2)

tab5 <- cbind(tab5[ , 1:5], y)

labs <- NULL
df <- matrix(data = NA, nrow = nrow(x), ncol = 2)

for(i in 1:(nrow(x))) {
  words <- x[i, ][str_detect(string = x[i, ], pattern = "^[a-zA-Z]+$")]
  phrase <- ""


  for(j in 1:length(words)){
    phrase <- paste(phrase, words[j], sep = " ")
  }

  phrase <- str_remove(string = phrase, pattern = " ")
  labs <- c(labs, phrase)
  numbers <- x[i, ][str_detect(string = x[i, ],
                               pattern = "^(\\d{1}|\\d{2}|\\d{3}|\\d{4}|\\d{5}|\\d{6}|\\d{7}|\\d{8}|\\d{9}|\\d{10})$")]
  numbers <- as.numeric(numbers)
  df[i, ] <- numbers
}

tab5 <- data.frame(df[ , 1], labs, tab5[ , 2:7])

categoryCode <- rep(26, 40)
category <- rep("Grants", 40)

tab5 <- data.frame(categoryCode, category, tab5)

tab5$category <- str_to_title(tab5$category)

names(tab5) <- c("categoryCode", "category", "itemCode", "item",
                 "actual_2015_2016",
                 "budget_2016_2017", "outturn_2016_2017",
                 "budget_2017_2018",
                 "projection_2018_2019", "projection_2019_2020")

tab5[ , 1] <- as.numeric(as.character(tab5[ , 1]))
tab5[ , 2] <- as.character(tab5[ , 2])
tab5[ , 3] <- as.numeric(as.character(tab5[ , 3]))
tab5[ , 4] <- as.character(tab5[ , 4])
tab5[ , 5] <- as.numeric(as.character(tab5[ , 5]))
tab5[ , 6] <- as.numeric(as.character(tab5[ , 6]))
tab5[ , 7] <- as.numeric(as.character(tab5[ , 7]))
tab5[ , 8] <- as.numeric(as.character(tab5[ , 8]))
tab5[ , 9] <- as.numeric(as.character(tab5[ , 9]))
tab5[ , 10] <- as.numeric(as.character(tab5[ , 10]))

################################################################################

tab6 <- health2[[5]][6:43, ]
tab6[14, 1] <- paste(tab6[14, 1], tab6[15, 1], sep = " ")
tab6[16, 1] <- paste(tab6[16, 1], tab6[17, 1], sep = " ")

tab6 <- tab6[c(1:14, 16, 18:38), ]

tab6[ , 1] <- str_replace_all(string = tab6[ , 1], pattern = "-", replacement = " ")
tab6[ , 1] <- str_replace_all(string = tab6[ , 1], pattern = "[[:punct:]]", replacement = "")
tab6[ , 2] <- str_replace_all(string = tab6[ , 2], pattern = "[[:punct:]]", replacement = "")
tab6[ , 3] <- str_replace_all(string = tab6[ , 3], pattern = "[[:punct:]]", replacement = "")
tab6[ , 4] <- str_replace_all(string = tab6[ , 4], pattern = "[[:punct:]]", replacement = "")
tab6[ , 5] <- str_replace_all(string = tab6[ , 5], pattern = "[[:punct:]]", replacement = "")
tab6[ , 6] <- str_replace_all(string = tab6[ , 6], pattern = "[[:punct:]]", replacement = "")

x <- str_split_fixed(string = tab6[ , 1], pattern = " ", n = 8)
y <- str_split_fixed(string = tab6[ , 6], pattern = " ", n = 2)

tab6 <- cbind(tab6[ , 1:5], y)

labs <- NULL
df <- matrix(data = NA, nrow = nrow(x), ncol = 2)

for(i in 1:(nrow(x))) {
  words <- x[i, ][str_detect(string = x[i, ], pattern = "^[a-zA-Z]+$")]
  phrase <- ""


  for(j in 1:length(words)){
    phrase <- paste(phrase, words[j], sep = " ")
  }

  phrase <- str_remove(string = phrase, pattern = " ")
  labs <- c(labs, phrase)
  numbers <- x[i, ][str_detect(string = x[i, ],
                               pattern = "^(\\d{1}|\\d{2}|\\d{3}|\\d{4}|\\d{5}|\\d{6}|\\d{7}|\\d{8}|\\d{9}|\\d{10})$")]
  numbers <- as.numeric(numbers)
  df[i, ] <- numbers
}

tab6 <- data.frame(df[ , 1], labs, df[ , 2], tab6[ , 3:7])

categoryCode <- rep(26, 36)
category <- rep("Grants", 36)

tab6 <- data.frame(categoryCode, category, tab6)

tab6$category <- str_to_title(tab6$category)

names(tab6) <- c("categoryCode", "category", "itemCode", "item",
                 "actual_2015_2016",
                 "budget_2016_2017", "outturn_2016_2017",
                 "budget_2017_2018",
                 "projection_2018_2019", "projection_2019_2020")

tab6[ , 1] <- as.numeric(as.character(tab6[ , 1]))
tab6[ , 2] <- as.character(tab6[ , 2])
tab6[ , 3] <- as.numeric(as.character(tab6[ , 3]))
tab6[ , 4] <- as.character(tab6[ , 4])
tab6[ , 5] <- as.numeric(as.character(tab6[ , 5]))
tab6[ , 6] <- as.numeric(as.character(tab6[ , 6]))
tab6[ , 7] <- as.numeric(as.character(tab6[ , 7]))
tab6[ , 8] <- as.numeric(as.character(tab6[ , 8]))
tab6[ , 9] <- as.numeric(as.character(tab6[ , 9]))
tab6[ , 10] <- as.numeric(as.character(tab6[ , 10]))

################################################################################

mohHealthEcon2017 <- data.frame(rbind(tab3, tab4, tab5, tab6))
usethis::use_data(mohHealthEcon2017, overwrite = TRUE)

################################################################################

tab7 <- rbind(health2[[5]][49:50, ], health2[[6]][5:17, ])

tab7[3:15, 3:4] <- tab7[3:15, 2:3]
tab7[ , 2] <- ""

tab7[ , 1] <- str_replace_all(string = tab7[ , 1], pattern = "-", replacement = " ")
tab7[ , 1] <- str_replace_all(string = tab7[ , 1], pattern = "[[:punct:]]", replacement = "")
tab7[ , 2] <- str_replace_all(string = tab7[ , 2], pattern = "[[:punct:]]", replacement = "")
tab7[ , 3] <- str_replace_all(string = tab7[ , 3], pattern = "[[:punct:]]", replacement = "")
tab7[ , 4] <- str_replace_all(string = tab7[ , 4], pattern = "[[:punct:]]", replacement = "")
tab7[ , 5] <- str_replace_all(string = tab7[ , 5], pattern = "[[:punct:]]", replacement = "")
tab7[ , 6] <- str_replace_all(string = tab7[ , 6], pattern = "[[:punct:]]", replacement = "")

x <- str_split_fixed(string = tab7[ , 1], pattern = " ", n = 8)
y <- str_split_fixed(string = tab7[ , 6], pattern = " ", n = 2)

tab7 <- cbind(tab7[ , 1:5], y)

labs <- NULL
df <- matrix(data = NA, nrow = nrow(x), ncol = 2)

for(i in 1:(nrow(x))) {
  words <- x[i, ][str_detect(string = x[i, ], pattern = "^[a-zA-Z]+$")]
  phrase <- ""


  for(j in 1:length(words)){
    phrase <- paste(phrase, words[j], sep = " ")
  }

  phrase <- str_remove(string = phrase, pattern = " ")
  labs <- c(labs, phrase)
  numbers <- x[i, ][str_detect(string = x[i, ],
                               pattern = "^(\\d{1}|\\d{2}|\\d{3}|\\d{4}|\\d{5}|\\d{6}|\\d{7}|\\d{8}|\\d{9}|\\d{10})$")]
  numbers <- as.numeric(numbers)
  df[i, ] <- numbers
}

tab7 <- data.frame(df[ , 1], labs, df[ , 2], tab7[ , 3:7])

tab7$labs <- str_to_title(tab7$labs)

names(tab7) <- c("countyCode", "county",
                 "actual_2015_2016",
                 "budget_2016_2017", "outturn_2016_2017",
                 "budget_2017_2018",
                 "projection_2018_2019", "projection_2019_2020")

tab7[ , 1] <- as.numeric(as.character(tab7[ , 1]))
tab7[ , 2] <- as.character(tab7[ , 2])
tab7[ , 3] <- as.numeric(as.character(tab7[ , 3]))
tab7[ , 4] <- as.numeric(as.character(tab7[ , 4]))
tab7[ , 5] <- as.numeric(as.character(tab7[ , 5]))
tab7[ , 6] <- as.numeric(as.character(tab7[ , 6]))
tab7[ , 7] <- as.numeric(as.character(tab7[ , 7]))
tab7[ , 8] <- as.numeric(as.character(tab7[ , 8]))

################################################################################

mohHealthCounty2017 <- tab7
usethis::use_data(mohHealthCounty2017, overwrite = TRUE)

################################################################################

tab8 <- rbind(health2[[6]][35:48, ], health2[[7]][5:49, ],
              health2[[8]][5:51, ], health2[[9]][5:49, ],
              health2[[10]][5:27, ])

tab8[7, 1] <- paste(tab8[7, 1], tab8[8, 1], sep = " ")
tab8[9, 1] <- paste(tab8[9, 1], tab8[10, 1], sep = " ")
tab8[11, 1] <- paste(tab8[11, 1], tab8[12, 1], sep = " ")
tab8[13, 1] <- paste(tab8[13, 1], tab8[14, 1], sep = " ")
tab8[16, 1] <- paste(tab8[16, 1], tab8[17, 1], sep = " ")
tab8[23, 1] <- paste(tab8[23, 1], tab8[24, 1], sep = " ")
tab8[28, 1] <- paste(tab8[28, 1], tab8[29, 1], sep = " ")
tab8[33, 1] <- paste(tab8[33, 1], tab8[34, 1], sep = " ")
tab8[38, 1] <- paste(tab8[38, 1], tab8[39, 1], sep = " ")
tab8[40, 1] <- paste(tab8[40, 1], tab8[41, 1], sep = " ")
tab8[44, 1] <- paste(tab8[44, 1], tab8[45, 1], sep = " ")
tab8[47, 1] <- paste(tab8[47, 1], tab8[48, 1], sep = " ")
tab8[53, 1] <- paste(tab8[53, 1], tab8[54, 1], sep = " ")
tab8[58, 1] <- paste(tab8[58, 1], tab8[59, 1], sep = " ")
tab8[60, 1] <- paste(tab8[60, 1], tab8[61, 1], sep = " ")
tab8[65, 1] <- paste(tab8[65, 1], tab8[66, 1], sep = " ")
tab8[71, 1] <- paste(tab8[71, 1], tab8[72, 1], sep = " ")
tab8[73, 1] <- paste(tab8[73, 1], tab8[74, 1], sep = " ")
tab8[75, 1] <- paste(tab8[75, 1], tab8[76, 1], sep = " ")
tab8[78, 1] <- paste(tab8[78, 1], tab8[79, 1], sep = " ")
tab8[81, 1] <- paste(tab8[81, 1], tab8[82, 1], sep = " ")
tab8[84, 1] <- paste(tab8[84, 1], tab8[85, 1], sep = " ")
tab8[86, 1] <- paste(tab8[86, 1], tab8[87, 1], sep = " ")
tab8[90, 1] <- paste(tab8[90, 1], tab8[91, 1], sep = " ")
tab8[92, 1] <- paste(tab8[92, 1], tab8[93, 1], sep = " ")
tab8[94, 1] <- paste(tab8[94, 1], tab8[95, 1], sep = " ")
tab8[96, 1] <- paste(tab8[96, 1], tab8[97, 1], sep = " ")
tab8[99, 1] <- paste(tab8[99, 1], tab8[100, 1], sep = " ")
tab8[102, 1] <- paste(tab8[102, 1], tab8[103, 1], sep = " ")
tab8[104, 1] <- paste(tab8[104, 1], tab8[105, 1], sep = " ")
tab8[107, 1] <- paste(tab8[107, 1], tab8[108, 1], sep = " ")
tab8[114, 1] <- paste(tab8[114, 1], tab8[115, 1], sep = " ")
tab8[117, 1] <- paste(tab8[117, 1], tab8[118, 1], sep = " ")
tab8[123, 1] <- paste(tab8[123, 1], tab8[124, 1], sep = " ")
tab8[126, 1] <- paste(tab8[126, 1], tab8[127, 1], sep = " ")
tab8[136, 1] <- paste(tab8[136, 1], tab8[137, 1], sep = " ")
tab8[138, 1] <- paste(tab8[138, 1], tab8[139, 1], sep = " ")
tab8[140, 1] <- paste(tab8[140, 1], tab8[141, 1], sep = " ")
tab8[152, 1] <- paste(tab8[152, 1], tab8[153, 1], sep = " ")
tab8[167, 1] <- paste(tab8[167, 1], tab8[168, 1], sep = " ")

tab8 <- tab8[c(1:7, 9, 11, 13, 15:16, 18:23, 25:28, 30:33, 35:38, 40, 42:44, 46:47, 49:53,
               55:58, 60, 62:65, 67:71, 73, 75, 77:78, 80:81, 83:84, 86, 88:90,
               92, 94, 96, 98:99, 101:102, 104, 106:107, 109:114, 116:117, 119:123,
               125:126, 128:136, 138, 140, 142:152, 154:167, 169:174), ]

tab8[ , 1] <- str_replace_all(string = tab8[ , 1], pattern = "-", replacement = " ")
tab8[ , 1] <- str_replace_all(string = tab8[ , 1], pattern = "[[:punct:]]", replacement = "")
tab8[ , 2] <- str_replace_all(string = tab8[ , 2], pattern = "[[:punct:]]", replacement = "")
tab8[ , 3] <- str_replace_all(string = tab8[ , 3], pattern = "[[:punct:]]", replacement = "")
tab8[ , 4] <- str_replace_all(string = tab8[ , 4], pattern = "[[:punct:]]", replacement = "")
tab8[ , 5] <- str_replace_all(string = tab8[ , 5], pattern = "[[:punct:]]", replacement = "")
tab8[ , 6] <- str_replace_all(string = tab8[ , 6], pattern = "[[:punct:]]", replacement = "")

x <- str_split_fixed(string = tab8[ , 1], pattern = " ", n = 10)
y <- str_split_fixed(string = tab8[ , 6], pattern = " ", n = 2)

tab8 <- cbind(tab8[ , 1:5], y)

tab8[1, 3:5] <- tab8[1, 2:4]
tab8[1, 2] <- ""

tab8[2:10, 3:4] <- tab8[2:10, 2:3]
tab8[2:10, 2] <- ""

z <- str_split_fixed(string = tab8[1:10 , 1], pattern = " ", n = 8)

df <- matrix(data = NA, nrow = nrow(z), ncol = 2)

for(i in 1:(nrow(z))) {
  numbers <- z[i, ][str_detect(string = z[i, ],
                               pattern = "^(\\d{1}|\\d{2}|\\d{3}|\\d{4}|\\d{5}|\\d{6}|\\d{7}|\\d{8}|\\d{9}|\\d{10})$")]
  numbers <- as.numeric(numbers)
  df[i, ] <- numbers
}

tab8[1:10 , 2] <- df[ , 2]

y <- str_split_fixed(string = tab8[11:45, 4], pattern = " ", n = 2)
tab8[11:45, 4:5] <- y

tab8[114:134, 4:5] <- tab8[114:134, 3:4]
tab8[114:134, 3] <- ""
y <- str_split_fixed(string = tab8[11:45, 4], pattern = " ", n = 2)
tab8[11:45, 4:5] <- y

tab8[114:134, 4:5] <- tab8[114:134, 3:4]
tab8[114:134, 3] <- ""
z <- tab8[114:134, 1]
z <- str_split_fixed(string = z, pattern = " ", n = 10)

df <- matrix(data = NA, nrow = nrow(z), ncol = 3)

for(i in 1:nrow(z)) {
  numbers <- z[i, ][str_detect(string = z[i, ],
                               pattern = "^(\\d{1}|\\d{2}|\\d{3}|\\d{4}|\\d{5}|\\d{6}|\\d{7}|\\d{8}|\\d{9}|\\d{10})$")]
  numbers <- as.numeric(numbers)
  df[i, ] <- numbers
}

tab8[114:134, 2:3] <- df[ , 2:3]

labs <- NULL
df <- matrix(data = NA, nrow = nrow(x), ncol = 1)

for(i in 1:(nrow(x))) {
  words <- x[i, ][str_detect(string = x[i, ], pattern = "^[a-zA-Z]+$")]
  phrase <- ""


  for(j in 1:length(words)){
    phrase <- paste(phrase, words[j], sep = " ")
  }

  phrase <- str_remove(string = phrase, pattern = " ")
  labs <- c(labs, phrase)
  numbers <- x[i, ][str_detect(string = x[i, ],
                               pattern = "^(\\d{2}|\\d{4}|\\d{6})$")]
  numbers <- as.numeric(numbers)
  df[i, ] <- numbers[1]
}

tab8 <- data.frame(df, labs, tab8[ , 2:7])
tab8 <- tab8[c(3:4, 6:15, 17:24, 26:123, 126, 128:134), ]

departmentCode <- rep(100, nrow(tab8))
department <- rep(labs[1], nrow(tab8))
categoryCode <- c(rep(21, 2), rep(22, 10), rep(25, 8), rep(26, 98), 25, rep(26, 7))
category <- c(rep("Compensation Of Employees", 2),
              rep("Use Of Goods And Services", 10),
              rep("Subsidy", 8),
              rep("Grants", 98), "Subsidy",
              rep("Grants", 7))

tab8 <- data.frame(departmentCode, department, categoryCode, category, tab8)

tab8$department <- str_to_title(tab8$department)
tab8$category <- str_to_title(tab8$category)

tab8[ , 1] <- as.numeric(as.character(tab8[ , 1]))
tab8[ , 2] <- as.character(tab8[ , 2])
tab8[ , 3] <- as.numeric(as.character(tab8[ , 3]))
tab8[ , 4] <- as.character(tab8[ , 4])
tab8[ , 5] <- as.numeric(as.character(tab8[ , 5]))
tab8[ , 6] <- as.character(tab8[ , 6])
tab8[ , 7] <- as.numeric(as.character(tab8[ , 7]))
tab8[ , 8] <- as.numeric(as.character(tab8[ , 8]))
tab8[ , 9] <- as.numeric(as.character(tab8[ , 9]))
tab8[ , 10] <- as.numeric(as.character(tab8[ , 10]))
tab8[ , 11] <- as.numeric(as.character(tab8[ , 11]))
tab8[ , 12] <- as.numeric(as.character(tab8[ , 12]))

tab8[119:126, 2] <- paste(tab8[119:126, 2], " - contingent", sep = "")
tab8[119:126, 6] <- paste(tab8[119:126, 6], " - contingent", sep = "")

names(tab8) <- c("departmentCode", "department",
                 "categoryCode", "category",
                 "itemCode", "item",
                 "actual_2015_2016",
                 "budget_2016_2017", "outturn_2016_2017",
                 "budget_2017_2018",
                 "projection_2018_2019", "projection_2019_2020")

################################################################################

mohHealthCurative2017 <- tab8
usethis::use_data(mohHealthCurative2017, overwrite = TRUE)

################################################################################

tab9 <- rbind(health2[[10]][40:48, ], health2[[11]][5:21, ])

tab9[6, 1] <- paste(tab9[6, 1], tab9[7, 1], sep = " ")
tab9[8, 1] <- paste(tab9[8, 1], tab9[9, 1], sep = " ")
tab9[10, 1] <- paste(tab9[10, 1], tab9[11, 1], sep = " ")
tab9[13, 1] <- paste(tab9[13, 1], tab9[14, 1], sep = " ")
tab9[16, 1] <- paste(tab9[16, 1], tab9[17, 1], sep = " ")
tab9[19, 1] <- paste(tab9[19, 1], tab9[20, 1], sep = " ")
tab9[25, 1] <- paste(tab9[25, 1], tab9[26, 1], sep = " ")

tab9 <- tab9[c(1:6, 8, 10, 12:13, 15:16, 18:19, 21:25), ]

tab9[ , 1] <- str_replace_all(string = tab9[ , 1], pattern = "-", replacement = " ")
tab9[ , 1] <- str_replace_all(string = tab9[ , 1], pattern = "[[:punct:]]", replacement = "")
tab9[ , 2] <- str_replace_all(string = tab9[ , 2], pattern = "[[:punct:]]", replacement = "")
tab9[ , 3] <- str_replace_all(string = tab9[ , 3], pattern = "[[:punct:]]", replacement = "")
tab9[ , 4] <- str_replace_all(string = tab9[ , 4], pattern = "[[:punct:]]", replacement = "")
tab9[ , 5] <- str_replace_all(string = tab9[ , 5], pattern = "[[:punct:]]", replacement = "")
tab9[ , 6] <- str_replace_all(string = tab9[ , 6], pattern = "[[:punct:]]", replacement = "")

x <- str_split_fixed(string = tab9[ , 1], pattern = " ", n = 9)
y <- str_split_fixed(string = tab9[ , 6], pattern = " ", n = 2)

tab9 <- cbind(tab9[ , 1:5], y)

tab9[1:7, 4:5] <- tab9[1:7, 3:4]
tab9[1:7, 3] <- ""

tab9[8:19, 3] <- tab9[8:19, 2]
tab9[8:19, 2] <- ""

z <- tab9[1:7, 1]
z <- str_split_fixed(string = z, pattern = " ", n = 9)

df <- matrix(data = NA, nrow = nrow(z), ncol = 3)

for(i in 1:nrow(z)) {
  numbers <- z[i, ][str_detect(string = z[i, ],
                               pattern = "^(\\d{1}|\\d{2}|\\d{3}|\\d{4}|\\d{5}|\\d{6}|\\d{7}|\\d{8}|\\d{9}|\\d{10})$")]
  numbers <- as.numeric(numbers)
  df[i, ] <- numbers
}

tab9[1:7, 2:3] <- df[ , 2:3]

z <- tab9[8:19, 1]
z <- str_split_fixed(string = z, pattern = " ", n = 8)

df <- matrix(data = NA, nrow = nrow(z), ncol = 2)

for(i in 1:nrow(z)) {
  numbers <- z[i, ][str_detect(string = z[i, ],
                               pattern = "^(\\d{1}|\\d{2}|\\d{3}|\\d{4}|\\d{5}|\\d{6}|\\d{7}|\\d{8}|\\d{9}|\\d{10})$")]
  numbers <- as.numeric(numbers)
  df[i, ] <- numbers
}

tab9[8:19, 2] <- df[ , 2]

labs <- NULL
df <- matrix(data = NA, nrow = nrow(x), ncol = 1)

for(i in 1:(nrow(x))) {
  words <- x[i, ][str_detect(string = x[i, ], pattern = "^[a-zA-Z]+$")]
  phrase <- ""


  for(j in 1:length(words)){
    phrase <- paste(phrase, words[j], sep = " ")
  }

  phrase <- str_remove(string = phrase, pattern = " ")
  labs <- c(labs, phrase)
  numbers <- x[i, ][str_detect(string = x[i, ],
                               pattern = "^(\\d{2}|\\d{4}|\\d{6})$")]
  numbers <- as.numeric(numbers)
  df[i, ] <- numbers[1]
}

tab9 <- data.frame(df, labs, tab9[ , 2:7])
tab9 <- tab9[c(3, 5:17, 19), ]

departmentCode <- rep(200, nrow(tab9))
department <- rep(labs[1], nrow(tab9))
categoryCode <- c(21, rep(22, 13), 26)
category <- c(labs[2],
              rep(labs[4], 13),
              labs[18])

tab9 <- data.frame(departmentCode, department, categoryCode, category, tab9)

tab9$department <- str_to_title(tab9$department)
tab9$category <- str_to_title(tab9$category)

tab9[ , 6] <- as.character(tab9[ , 6])
tab9[ , 7] <- as.numeric(as.character(tab9[ , 7]))
tab9[ , 8] <- as.numeric(as.character(tab9[ , 8]))
tab9[ , 9] <- as.numeric(as.character(tab9[ , 9]))
tab9[ , 10] <- as.numeric(as.character(tab9[ , 10]))
tab9[ , 11] <- as.numeric(as.character(tab9[ , 11]))
tab9[ , 12] <- as.numeric(as.character(tab9[ , 12]))

names(tab9) <- c("departmentCode", "department",
                 "categoryCode", "category",
                 "itemCode", "item",
                 "actual_2015_2016",
                 "budget_2016_2017", "outturn_2016_2017",
                 "budget_2017_2018",
                 "projection_2018_2019", "projection_2019_2020")

################################################################################

mohHealthPreventive2017 <- tab9
usethis::use_data(mohHealthPreventive2017, overwrite = TRUE)

################################################################################

tab10 <- rbind(health2[[11]][33:50, ], health2[[12]][5, ])

tab10[5, 1] <- paste(tab10[5, 1], tab10[6, 1], sep = " ")
tab10[8, 1] <- paste(tab10[8, 1], tab10[9, 1], sep = " ")
tab10[10, 1] <- paste(tab10[10, 1], tab10[11, 1], sep = " ")
tab10[12, 1] <- paste(tab10[12, 1], tab10[13, 1], sep = " ")
tab10[14, 1] <- paste(tab10[14, 1], tab10[15, 1], sep = " ")

tab10 <- tab10[c(1:5, 7:8, 10, 12, 14, 16:19), ]

tab10[ , 1] <- str_replace_all(string = tab10[ , 1], pattern = "-", replacement = " ")
tab10[ , 1] <- str_replace_all(string = tab10[ , 1], pattern = "[[:punct:]]", replacement = "")
tab10[ , 2] <- str_replace_all(string = tab10[ , 2], pattern = "[[:punct:]]", replacement = "")
tab10[ , 3] <- str_replace_all(string = tab10[ , 3], pattern = "[[:punct:]]", replacement = "")
tab10[ , 4] <- str_replace_all(string = tab10[ , 4], pattern = "[[:punct:]]", replacement = "")
tab10[ , 5] <- str_replace_all(string = tab10[ , 5], pattern = "[[:punct:]]", replacement = "")
tab10[ , 6] <- str_replace_all(string = tab10[ , 6], pattern = "[[:punct:]]", replacement = "")

x <- str_split_fixed(string = tab10[ , 1], pattern = " ", n = 8)
y <- str_split_fixed(string = tab10[ , 6], pattern = " ", n = 2)

tab10 <- cbind(tab10[ , 1:5 ], y)

tab10[ , 3] <- tab10[ , 2]
tab10[ , 2] <- ""

labs <- NULL
df <- matrix(data = NA, nrow = nrow(x), ncol = 1)

for(i in 1:(nrow(x))) {
  words <- x[i, ][str_detect(string = x[i, ], pattern = "^[a-zA-Z]+$")]
  phrase <- ""


  for(j in 1:length(words)){
    phrase <- paste(phrase, words[j], sep = " ")
  }

  phrase <- str_remove(string = phrase, pattern = " ")
  labs <- c(labs, phrase)
  numbers <- x[i, ][str_detect(string = x[i, ],
                               pattern = "^(\\d{2}|\\d{4}|\\d{6})$")]
  numbers <- as.numeric(numbers)
  df[i, ] <- numbers[1]
}

tab10 <- data.frame(df, labs, tab10[ , 2:7])
tab10 <- tab10[c(3, 5:14), ]

departmentCode <- rep(300, nrow(tab10))
department <- rep(labs[1], nrow(tab10))
categoryCode <- c(22, rep(26, 10))
category <- c(labs[2],
              rep(labs[4], 10))

tab10 <- data.frame(departmentCode, department, categoryCode, category, tab10)

tab10$department <- str_to_title(tab10$department)
tab10$category <- str_to_title(tab10$category)

names(tab10) <- c("departmentCode", "department",
                 "categoryCode", "category",
                 "itemCode", "item",
                 "actual_2015_2016",
                 "budget_2016_2017", "outturn_2016_2017",
                 "budget_2017_2018",
                 "projection_2018_2019", "projection_2019_2020")

tab10[ , 6] <- as.character(tab10[ , 6])
tab10[ , 7] <- as.numeric(as.character(tab10[ , 7]))
tab10[ , 8] <- as.numeric(as.character(tab10[ , 8]))
tab10[ , 9] <- as.numeric(as.character(tab10[ , 9]))
tab10[ , 10] <- as.numeric(as.character(tab10[ , 10]))
tab10[ , 11] <- as.numeric(as.character(tab10[ , 11]))
tab10[ , 12] <- as.numeric(as.character(tab10[ , 12]))

################################################################################

mohHealthSocial2017 <- tab10
usethis::use_data(mohHealthSocial2017, overwrite = TRUE)

################################################################################

tab11 <- health2[[12]][19:39, ]

tab11[1, 1] <- paste(tab11[1, 1], tab11[2, 1], sep = " ")
tab11[7, 1] <- paste(tab11[7, 1], tab11[8, 1], sep = " ")
tab11[9, 1] <- paste(tab11[9, 1], tab11[10, 1], sep = " ")
tab11[11, 1] <- paste(tab11[11, 1], tab11[12, 1], sep = " ")
tab11[14, 1] <- paste(tab11[14, 1], tab11[15, 1], sep = " ")
tab11[1, 1] <- paste(tab11[1, 1], tab11[2, 1], sep = " ")

tab11 <- tab11[c(1, 3:7, 9, 11, 13:14, 16:21), ]

tab11[ , 1] <- str_replace_all(string = tab11[ , 1], pattern = " - ", replacement = " ")
tab11[ , 1] <- str_replace_all(string = tab11[ , 1], pattern = "-", replacement = " ")
tab11[ , 1] <- str_replace_all(string = tab11[ , 1], pattern = "[[:punct:]]", replacement = "")
tab11[ , 2] <- str_replace_all(string = tab11[ , 2], pattern = "[[:punct:]]", replacement = "")
tab11[ , 3] <- str_replace_all(string = tab11[ , 3], pattern = "[[:punct:]]", replacement = "")
tab11[ , 4] <- str_replace_all(string = tab11[ , 4], pattern = "[[:punct:]]", replacement = "")
tab11[ , 5] <- str_replace_all(string = tab11[ , 5], pattern = "[[:punct:]]", replacement = "")
tab11[ , 6] <- str_replace_all(string = tab11[ , 6], pattern = "[[:punct:]]", replacement = "")

x <- str_split_fixed(string = tab11[ , 1], pattern = " ", n = 7)
y <- str_split_fixed(string = tab11[ , 6], pattern = " ", n = 2)

tab11 <- cbind(tab11[ , 1:5], y)
tab11[ , 3] <- tab11[ , 2]
tab11[ , 2] <- ""

labs <- NULL
df <- matrix(data = NA, nrow = nrow(x), ncol = 2)

for(i in 1:(nrow(x))) {
  words <- x[i, ][str_detect(string = x[i, ], pattern = "^[a-zA-Z]+$")]
  phrase <- ""


  for(j in 1:length(words)){
    phrase <- paste(phrase, words[j], sep = " ")
  }

  phrase <- str_remove(string = phrase, pattern = " ")
  labs <- c(labs, phrase)
  numbers <- x[i, ][str_detect(string = x[i, ],
                               pattern = "^(\\d{1}|\\d{2}|\\d{3}|\\d{4}|\\d{5}|\\d{6}|\\d{7}|\\d{8})$")]
  numbers <- as.numeric(numbers)
  df[i, ] <- numbers
}

tab11 <- data.frame(df[ , 1], labs, df[ , 2], tab11[ , 3:7])
tab11 <- tab11[c(3, 5:13, 15:16), ]

departmentCode <- rep(400, nrow(tab11))
department <- rep(labs[1], nrow(tab11))
categoryCode <- c(21, rep(22, 9), rep(26, 2))
category <- c(labs[2], rep(labs[4], 9), rep(labs[14], 2))

tab11 <- data.frame(departmentCode, department, categoryCode, category, tab11)

tab11$department <- str_to_title(tab11$department)
tab11$category <- str_to_title(tab11$category)

names(tab11) <- c("departmentCode", "department",
                  "categoryCode", "category",
                  "itemCode", "item",
                  "actual_2015_2016",
                  "budget_2016_2017", "outturn_2016_2017",
                  "budget_2017_2018",
                  "projection_2018_2019", "projection_2019_2020")

tab11[ , 6] <- as.character(tab11[ , 6])
tab11[ , 7] <- as.numeric(as.character(tab11[ , 7]))
tab11[ , 8] <- as.numeric(as.character(tab11[ , 8]))
tab11[ , 9] <- as.numeric(as.character(tab11[ , 9]))
tab11[ , 10] <- as.numeric(as.character(tab11[ , 10]))
tab11[ , 11] <- as.numeric(as.character(tab11[ , 11]))
tab11[ , 12] <- as.numeric(as.character(tab11[ , 12]))

################################################################################

mohHealthPlanning2017 <- tab11
usethis::use_data(mohHealthPlanning2017, overwrite = TRUE)

################################################################################

tab12 <- health2[[13]][5:19, ]

tab12[7, 1] <- paste(tab12[7, 1], tab12[8, 1], sep = " ")
tab12[9, 1] <- paste(tab12[9, 1], tab12[10, 1], sep = " ")
tab12[12, 1] <- paste(tab12[12, 1], tab12[13, 1], sep = " ")
tab12 <- tab12[c(1:7, 9, 11:12, 14:15), ]

tab12[ , 1] <- str_replace_all(string = tab12[ , 1], pattern = " - ", replacement = " ")
tab12[ , 1] <- str_replace_all(string = tab12[ , 1], pattern = "-", replacement = " ")
tab12[ , 1] <- str_replace_all(string = tab12[ , 1], pattern = "[[:punct:]]", replacement = "")
tab12[ , 2] <- str_replace_all(string = tab12[ , 2], pattern = "[[:punct:]]", replacement = "")
tab12[ , 3] <- str_replace_all(string = tab12[ , 3], pattern = "[[:punct:]]", replacement = "")
tab12[ , 4] <- str_replace_all(string = tab12[ , 4], pattern = "[[:punct:]]", replacement = "")
tab12[ , 5] <- str_replace_all(string = tab12[ , 5], pattern = "[[:punct:]]", replacement = "")
tab12[ , 6] <- str_replace_all(string = tab12[ , 6], pattern = "[[:punct:]]", replacement = "")

x <- str_split_fixed(string = tab12[ , 1], pattern = " ", n = 7)
y <- str_split_fixed(string = tab12[ , 6], pattern = " ", n = 2)

tab12 <- cbind(tab12[ , 1:5], y)
tab12[ , 3] <- tab12[ , 2]
tab12[ , 2] <- ""

labs <- NULL
df <- matrix(data = NA, nrow = nrow(x), ncol = 2)

for(i in 1:(nrow(x))) {
  words <- x[i, ][str_detect(string = x[i, ], pattern = "^[a-zA-Z]+$")]
  phrase <- ""


  for(j in 1:length(words)){
    phrase <- paste(phrase, words[j], sep = " ")
  }

  phrase <- str_remove(string = phrase, pattern = " ")
  labs <- c(labs, phrase)
  numbers <- x[i, ][str_detect(string = x[i, ],
                               pattern = "^(\\d{1}|\\d{2}|\\d{3}|\\d{4}|\\d{5}|\\d{6}|\\d{7}|\\d{8})$")]
  numbers <- as.numeric(numbers)
  df[i, ] <- numbers
}

tab12 <- data.frame(df[ , 1], labs, df[ , 2], tab12[ , 3:7])
tab12 <- tab12[c(3:4, 6:12), ]

departmentCode <- rep(500, nrow(tab12))
department <- rep(labs[1], nrow(tab12))
categoryCode <- c(rep(21, 2), rep(22, 7))
category <- c(rep(labs[2], 2), rep(labs[5], 7))

tab12 <- data.frame(departmentCode, department, categoryCode, category, tab12)

tab12$department <- str_to_title(tab12$department)
tab12$category <- str_to_title(tab12$category)

names(tab12) <- c("departmentCode", "department",
                  "categoryCode", "category",
                  "itemCode", "item",
                  "actual_2015_2016",
                  "budget_2016_2017", "outturn_2016_2017",
                  "budget_2017_2018",
                  "projection_2018_2019", "projection_2019_2020")

tab12[ , 6] <- as.character(tab12[ , 6])
tab12[ , 7] <- as.numeric(as.character(tab12[ , 7]))
tab12[ , 8] <- as.numeric(as.character(tab12[ , 8]))
tab12[ , 9] <- as.numeric(as.character(tab12[ , 9]))
tab12[ , 10] <- as.numeric(as.character(tab12[ , 10]))
tab12[ , 11] <- as.numeric(as.character(tab12[ , 11]))
tab12[ , 12] <- as.numeric(as.character(tab12[ , 12]))

################################################################################

mohHealthVital2017 <- tab12
usethis::use_data(mohHealthVital2017, overwrite = TRUE)

################################################################################

tab13 <- rbind(health2[[13]][32:49, ], health2[[14]][5:17, ])

tab13[8, 1] <- paste(tab13[8, 1], tab13[9, 1], sep = " ")
tab13[10, 1] <- paste(tab13[10, 1], tab13[11, 1], sep = " ")
tab13[13, 1] <- paste(tab13[13, 1], tab13[14, 1], sep = " ")
tab13[15, 1] <- paste(tab13[15, 1], tab13[16, 1], sep = " ")
tab13[19, 1] <- paste(tab13[19, 1], tab13[20, 1], sep = " ")
tab13[23, 1] <- paste(tab13[23, 1], tab13[24, 1], sep = " ")
tab13[28, 1] <- paste(tab13[28, 1], tab13[29, 1], sep = " ")

tab13 <- tab13[c(1:8, 10, 12:13, 15, 17:19, 21:23, 25:28, 30:31), ]

tab13[ , 1] <- str_replace_all(string = tab13[ , 1], pattern = " - ", replacement = " ")
tab13[ , 1] <- str_replace_all(string = tab13[ , 1], pattern = " – ", replacement = " ")
tab13[ , 1] <- str_replace_all(string = tab13[ , 1], pattern = "-", replacement = " ")
tab13[ , 1] <- str_replace_all(string = tab13[ , 1], pattern = "[[:punct:]]", replacement = "")
tab13[ , 2] <- str_replace_all(string = tab13[ , 2], pattern = "[[:punct:]]", replacement = "")
tab13[ , 3] <- str_replace_all(string = tab13[ , 3], pattern = "[[:punct:]]", replacement = "")
tab13[ , 4] <- str_replace_all(string = tab13[ , 4], pattern = "[[:punct:]]", replacement = "")
tab13[ , 5] <- str_replace_all(string = tab13[ , 5], pattern = "[[:punct:]]", replacement = "")
tab13[ , 6] <- str_replace_all(string = tab13[ , 6], pattern = "[[:punct:]]", replacement = "")

x <- str_split_fixed(string = tab13[ , 1], pattern = " ", n = 7)
y <- str_split_fixed(string = tab13[ , 6], pattern = " ", n = 2)

tab13 <- cbind(tab13[ , 1:5], y)
tab13[ , 3] <- tab13[ , 2]
tab13[ , 2] <- ""

labs <- NULL
df <- matrix(data = NA, nrow = nrow(x), ncol = 2)

for(i in 1:(nrow(x))) {
  words <- x[i, ][str_detect(string = x[i, ], pattern = "^[a-zA-Z]+$")]
  phrase <- ""


  for(j in 1:length(words)){
    phrase <- paste(phrase, words[j], sep = " ")
  }

  phrase <- str_remove(string = phrase, pattern = " ")
  labs <- c(labs, phrase)
  numbers <- x[i, ][str_detect(string = x[i, ],
                               pattern = "^(\\d{1}|\\d{2}|\\d{3}|\\d{4}|\\d{5}|\\d{6}|\\d{7}|\\d{8})$")]
  numbers <- as.numeric(numbers)
  df[i, ] <- numbers
}

tab13 <- data.frame(df[ , 1], labs, df[ , 2], tab13[ , 3:7])
tab13 <- tab13[c(3:5, 7:24), ]

departmentCode <- rep(600, nrow(tab13))
department <- rep(labs[1], nrow(tab13))
categoryCode <- c(rep(21, 3), rep(22, 18))
category <- c(rep(labs[2], 3), rep(labs[5], 18))

tab13 <- data.frame(departmentCode, department, categoryCode, category, tab13)

tab13$department <- str_to_title(tab13$department)
tab13$category <- str_to_title(tab13$category)

names(tab13) <- c("departmentCode", "department",
                  "categoryCode", "category",
                  "itemCode", "item",
                  "actual_2015_2016",
                  "budget_2016_2017", "outturn_2016_2017",
                  "budget_2017_2018",
                  "projection_2018_2019", "projection_2019_2020")

tab13[ , 6] <- as.character(tab13[ , 6])
tab13[ , 7] <- as.numeric(as.character(tab13[ , 7]))
tab13[ , 8] <- as.numeric(as.character(tab13[ , 8]))
tab13[ , 9] <- as.numeric(as.character(tab13[ , 9]))
tab13[ , 10] <- as.numeric(as.character(tab13[ , 10]))
tab13[ , 11] <- as.numeric(as.character(tab13[ , 11]))
tab13[ , 12] <- as.numeric(as.character(tab13[ , 12]))

################################################################################

mohHealthAdmin2017 <- tab13
usethis::use_data(mohHealthAdmin2017, overwrite = TRUE)

################################################################################

tab14 <- rbind(health2[[14]][30:48, ], health2[[15]][5:49, ], health2[[16]][5:12, ])

tab14[20, 1] <- paste(tab14[20, 1], tab14[21, 1], sep = " ")
tab14[22, 1] <- paste(tab14[22, 1], tab14[23, 1], sep = " ")
tab14[38, 1] <- paste(tab14[38, 1], tab14[39, 1], sep = " ")
tab14[59, 1] <- paste(tab14[59, 1], tab14[60, 1], sep = " ")
tab14[63, 1] <- paste(tab14[63, 1], tab14[64, 1], sep = " ")
tab14[71, 1] <- paste(tab14[71, 1], tab14[72, 1], sep = " ")

tab14 <- tab14[c(1:7, 12:13, 18:20, 22, 28:29, 34:38, 40:41, 46:49, 54:59, 61:63, 65:71), ]

tab14[ , 1] <- str_replace_all(string = tab14[ , 1], pattern = " - ", replacement = " ")
tab14[ , 1] <- str_replace_all(string = tab14[ , 1], pattern = " – ", replacement = " ")
tab14[ , 1] <- str_replace_all(string = tab14[ , 1], pattern = "-", replacement = " ")
tab14[ , 1] <- str_replace_all(string = tab14[ , 1], pattern = "[[:punct:]]", replacement = "")
tab14[ , 2] <- str_replace_all(string = tab14[ , 2], pattern = "[[:punct:]]", replacement = "")
tab14[ , 3] <- str_replace_all(string = tab14[ , 3], pattern = "[[:punct:]]", replacement = "")
tab14[ , 4] <- str_replace_all(string = tab14[ , 4], pattern = "[[:punct:]]", replacement = "")
tab14[ , 5] <- str_replace_all(string = tab14[ , 5], pattern = "[[:punct:]]", replacement = "")
tab14[ , 6] <- str_replace_all(string = tab14[ , 6], pattern = "[[:punct:]]", replacement = "")

tab14[36:42, 3:4] <- tab14[36:42, 2:3]
tab14[36:42, 2] <- ""

tab14[1:35, 3] <- tab14[1:35, 2]
tab14[1:35, 2] <- ""

x <- str_split_fixed(string = tab14[ , 1], pattern = " ", n = 7)
y <- str_split_fixed(string = tab14[ , 6], pattern = " ", n = 2)

tab14 <- cbind(tab14[ , 1:5], y)
tab14[ , 3] <- tab14[ , 2]
tab14[ , 2] <- ""

labs <- NULL
df <- matrix(data = NA, nrow = nrow(x), ncol = 2)

for(i in 1:(nrow(x))) {
  words <- x[i, ][str_detect(string = x[i, ], pattern = "^[a-zA-Z]+$")]
  phrase <- ""


  for(j in 1:length(words)){
    phrase <- paste(phrase, words[j], sep = " ")
  }

  phrase <- str_remove(string = phrase, pattern = " ")
  labs <- c(labs, phrase)
  numbers <- x[i, ][str_detect(string = x[i, ],
                               pattern = "^(\\d{1}|\\d{2}|\\d{3}|\\d{4}|\\d{5}|\\d{6}|\\d{7}|\\d{8})$")]
  numbers <- as.numeric(numbers)
  df[i, ] <- numbers
}

tab14 <- data.frame(df[ , 1], labs, df[ , 2], tab14[ , 3:7])
tab14 <- tab14[c(3:4, 7, 12:13, 18:22, 29:37, 40:42), ]

departmentCode <- c(rep(702, 3), rep(704, 2), rep(709, 5), rep(712, 12))
department <- c(rep(labs[1], 3), rep(labs[8], 2), rep(labs[14], 5), rep(labs[23], 12))
categoryCode <- rep(26, 22)
category <- rep(labs[2], 22)

tab14 <- data.frame(departmentCode, department, categoryCode, category, tab14)

tab14$department <- str_to_title(tab14$department)
tab14$category <- str_to_title(tab14$category)

names(tab14) <- c("departmentCode", "department",
                  "categoryCode", "category",
                  "itemCode", "item",
                  "actual_2015_2016",
                  "budget_2016_2017", "outturn_2016_2017",
                  "budget_2017_2018",
                  "projection_2018_2019", "projection_2019_2020")

tab14[ , 6] <- as.character(tab14[ , 6])
tab14[ , 7] <- as.numeric(as.character(tab14[ , 7]))
tab14[ , 8] <- as.numeric(as.character(tab14[ , 8]))
tab14[ , 9] <- as.numeric(as.character(tab14[ , 9]))
tab14[ , 10] <- as.numeric(as.character(tab14[ , 10]))
tab14[ , 11] <- as.numeric(as.character(tab14[ , 11]))
tab14[ , 12] <- as.numeric(as.character(tab14[ , 12]))

################################################################################

mohHealthCountyAdd2017 <- tab14
usethis::use_data(mohHealthCountyAdd2017, overwrite = TRUE)

################################################################################

tab15 <- health2[[16]][24:28, ]

tab15[ , 1] <- str_replace_all(string = tab15[ , 1], pattern = " - ", replacement = " ")
tab15[ , 1] <- str_replace_all(string = tab15[ , 1], pattern = " – ", replacement = " ")
tab15[ , 1] <- str_replace_all(string = tab15[ , 1], pattern = "-", replacement = " ")
tab15[ , 1] <- str_replace_all(string = tab15[ , 1], pattern = "[[:punct:]]", replacement = "")
tab15[ , 2] <- str_replace_all(string = tab15[ , 2], pattern = "[[:punct:]]", replacement = "")
tab15[ , 3] <- str_replace_all(string = tab15[ , 3], pattern = "[[:punct:]]", replacement = "")
tab15[ , 4] <- str_replace_all(string = tab15[ , 4], pattern = "[[:punct:]]", replacement = "")
tab15[ , 5] <- str_replace_all(string = tab15[ , 5], pattern = "[[:punct:]]", replacement = "")
tab15[ , 6] <- str_replace_all(string = tab15[ , 6], pattern = "[[:punct:]]", replacement = "")

tab15[c(1, 5), 4] <- tab15[c(1, 5), 3]
tab15[c(1, 5), 3] <- ""
tab15[2:4, 3:4] <- tab15[2:4, 2:3]
tab15[2:4, 2] <- ""


y <- str_split_fixed(string = tab15[ , 6], pattern = " ", n = 2)

tab15 <- cbind(tab15[ , 1:5], y)

z <- tab15[c(1, 5), ]
x <- str_split_fixed(string = z[ , 1], pattern = " ", n = 7)

df <- matrix(data = NA, nrow = nrow(x), ncol = 3)

for(i in 1:(nrow(x))) {
  numbers <- x[i, ][str_detect(string = x[i, ],
                               pattern = "^(\\d{1}|\\d{2}|\\d{3}|\\d{4}|\\d{5}|\\d{6}|\\d{7}|\\d{8})$")]
  numbers <- as.numeric(numbers)
  df[i, ] <- numbers
}

tab15[c(1, 5), 2:3] <- df[ , 2:3]

z <- tab15[c(2:4), ]
x <- str_split_fixed(string = z[ , 1], pattern = " ", n = 7)

df <- matrix(data = NA, nrow = nrow(x), ncol = 2)

for(i in 1:(nrow(x))) {
  numbers <- x[i, ][str_detect(string = x[i, ],
                               pattern = "^(\\d{1}|\\d{2}|\\d{3}|\\d{4}|\\d{5}|\\d{6}|\\d{7}|\\d{8})$")]
  numbers <- as.numeric(numbers)
  df[i, ] <- numbers
}

tab15[2:4, 2] <- df[ , 2]

x <- str_split_fixed(string = tab15[ , 1], pattern = " ", n = 7)

labs <- NULL
df <- matrix(data = NA, nrow = nrow(x), ncol = 1)

for(i in 1:(nrow(x))) {
  words <- x[i, ][str_detect(string = x[i, ], pattern = "^[a-zA-Z]+$")]
  phrase <- ""


  for(j in 1:length(words)){
    phrase <- paste(phrase, words[j], sep = " ")
  }

  phrase <- str_remove(string = phrase, pattern = " ")
  labs <- c(labs, phrase)
  numbers <- x[i, ][str_detect(string = x[i, ],
                               pattern = "^(\\d{2}|\\d{4}|\\d{6})$")]
  numbers <- as.numeric(numbers)
  df[i, ] <- numbers
}

tab15 <- data.frame(df[ , 1], labs, tab15[ , 2:7])
tab15 <- tab15[c(3, 5), ]

departmentCode <- rep(5500, 2)
department <- rep(labs[1], 2)
categoryCode <- c(22, 26)
category <- c(labs[2], labs[[4]])

tab15 <- data.frame(departmentCode, department, categoryCode, category, tab15)

tab15$department <- str_to_title(tab15$department)
tab15$category <- str_to_title(tab15$category)

names(tab15) <- c("departmentCode", "department",
                  "categoryCode", "category",
                  "itemCode", "item",
                  "actual_2015_2016",
                  "budget_2016_2017", "outturn_2016_2017",
                  "budget_2017_2018",
                  "projection_2018_2019", "projection_2019_2020")

tab15[ , 6] <- as.character(tab15[ , 6])
tab15[ , 7] <- as.numeric(as.character(tab15[ , 7]))
tab15[ , 8] <- as.numeric(as.character(tab15[ , 8]))
tab15[ , 9] <- as.numeric(as.character(tab15[ , 9]))
tab15[ , 10] <- as.numeric(as.character(tab15[ , 10]))
tab15[ , 11] <- as.numeric(as.character(tab15[ , 11]))
tab15[ , 12] <- as.numeric(as.character(tab15[ , 12]))

################################################################################

mohHealthClaims2017 <- tab15
usethis::use_data(mohHealthClaims2017, overwrite = TRUE)

################################################################################

mohHealthDepartment2017 <- data.frame(rbind(tab8, tab9, tab10, tab11, tab12, tab13, tab15))
usethis::use_data(mohHealthDepartment2017, overwrite = TRUE)
validmeasures/liberiaNutriBudget documentation built on June 4, 2019, 5:45 p.m.