R/combine_ons_with_lfs.R

Defines functions combine_ons_with_lfs

Documented in combine_ons_with_lfs

#' A Function that Combines the ONS Demographic Data, with the LFS
#' Demographic Data
#'
#' @description This function takes demographic data summarised by \code{\link[tbinenglanddataclean]{clean_demographics_uk}} and
#' \code{\link[tbinenglanddataclean]{clean_labour_force_survey}} and combines it into a single tidy dataset.
#' Summary statistics and plots can be returned to check both datasets.
#' @inherit clean_demographics_uk
#' @param ons_name Character string of the file name of the ONS demographic data.
#' @param lfs_name Character string of the file name of the LFS demographic data.
#' @param countries  A character string, the countries to include in the
#' dataset. By default only England is included. Note this is reliant on the data being present in the
#' demographic datasets.
#'
#' @return A tidy tibble of demographic data by age between 2000 and 2015 for the specified countries
#' for both ONS and LFS data.
#' @export
#' @importFrom dplyr mutate summarise select group_by mutate filter full_join bind_rows
#' @examples
#'
combine_ons_with_lfs <- function(data_path = "~/data/tb_data/tbinenglanddataclean",
                                 ons_name = "E_demo_2000_2015.rds",
                                 lfs_name = "formatted_LFS_2000_2016.rds",
                                 countries = "England",
                                      return = TRUE,
                                      save = TRUE,
                                      save_name = "E_ons_lfs_2000_2016",
                                      save_path = "~/data/tb_data/tbinenglanddataclean",
                                      save_format = "rds",
                                      verbose = TRUE,
                                      theme_set = NULL) {
  
  if (is.null(theme_set)) {
    theme_set <- theme_minimal()
  }

  demo_path <- file.path(data_path, ons_name)
  if (verbose) {
    message("Loading demographic data from: ", demo_path)
  }
  demo_2000_2015 <- readRDS(demo_path)

  lfs_path <- file.path(data_path, lfs_name)
  if (verbose) {
    message("Loading labour force survey data from: ", lfs_path)
  }
  lfs_data <- readRDS(lfs_path)


  # munge LFS to generate yearly population counts --------------------------

  lfs_data %>%
    filter(Country %in% countries) %>%
    group_by(Year, Age, CoB) %>%
    summarise(Population = sum(Weight)) %>%
    mutate(CoB = as.character(CoB)) -> demo_2000_2016_strat_est



  # Format demographics for consistency -------------------------------------
  demo_2000_2015 %>%
    mutate(CoB = 'Total') %>%
    mutate(CoB = CoB) %>%
    mutate(Year = as.character(Year) %>% as.numeric) -> demo_2000_2015


  # Bind data ---------------------------------------------------------------
  demo_2000_2016_strat_est <- demo_2000_2015 %>%
    full_join(demo_2000_2016_strat_est, by = c('Year', 'Age', 'CoB', 'Population')) %>%
    mutate(CoB = factor(CoB, levels = c('Total', 'UK born', 'Non-UK born')))



  # Add comparison total from LFS -------------------------------------------
  demo_2000_2016_strat_est <- demo_2000_2016_strat_est %>%
    filter(!(CoB %in% 'Total')) %>%
    group_by(Age, Year) %>%
    summarise(Population = sum(Population)) %>%
    mutate(CoB = 'Total (LFS)') %>%
    bind_rows(demo_2000_2016_strat_est %>%
                mutate(CoB = as.character(CoB))) %>%
    mutate(CoB = factor(CoB, levels = c('Total', 'Total (LFS)', 'UK born', 'Non-UK born')))



  # Add 5 year age groups ---------------------------------------------------
  demo_2000_2016_strat_est %>%
    mutate(`Age group` = Age %>%  as.character %>% replace(Age %in% '90+', '90') %>%
             as.numeric %>%
             cut(breaks = seq(0,95,5), right = FALSE,
                 ordered_result = TRUE,
                 labels = c(paste(seq(0,85,5), seq(4,89,5), sep = '-'), '90+'))) -> demo_2000_2016_strat_est

  ## Add 0-15, 16-65, 65+
  demo_2000_2016_strat_est %>%
    mutate(`Age group (condensed)` = Age %>%  as.character %>% replace(Age %in% '90+', '90') %>%
             as.numeric %>%
             cut(breaks = c(0, 15, 65, 91), right = FALSE,
                 ordered_result = TRUE, labels = c('0-14', '15-64', '65+'))) -> demo_2000_2016_strat_est

  ## ungroup
  demo_2000_2016_strat_est <- demo_2000_2016_strat_est %>%
    ungroup

  # Plots to visualise ------------------------------------------------------
 if (verbose) {
   ## Plots of Non-UK born over time
   demo_2000_2016_strat_est %>%
     filter(Year %% 5 == 0, CoB %in% 'Non-UK born') %>%
     ggplot(aes(x = Age, y = Population)) +
     geom_density(stat = "identity", alpha = 0.4) +
     facet_wrap(~Year) +
     theme_set +
     theme(axis.text.x = element_text(angle = 90)) +
     labs(caption = "Non-UK born population by age, every 5 years") -> p

   p

   ## Plots of UK born over time
   demo_2000_2016_strat_est %>%
     filter(Year %% 5 == 0, CoB %in% 'UK born') %>%
     ggplot(aes(x = Age, y = Population)) +
     geom_density(stat = "identity", alpha = 0.4) +
     facet_wrap(~Year) +
     theme_set +
     theme(axis.text.x = element_text(angle = 90)) +
     labs(caption = "UK born population by age, every 5 years") -> p1

   p1


   ## Compare Population strat by year - 2000
   demo_2000_2016_strat_est %>%
     filter(Year %in% 2000, !is.na(CoB)) %>%
     ggplot(aes(x = Age, y = Population)) +
     geom_density(stat = "identity", alpha = 0.4) +
     facet_wrap(~CoB) +
     theme_set +
     theme(axis.text.x = element_text(angle = 90)) +
     labs(caption = "Comparision of ONS, LFS, UK born,
          and non-UK born population estimates for 2000") -> p2

  p2


   ## Compare Population strat by year - 2005
   demo_2000_2016_strat_est %>%
     filter(Year %in% 2005, !is.na(CoB)) %>%
     ggplot(aes(x = Age, y = Population)) +
     geom_density(stat = "identity", alpha = 0.4) +
     facet_wrap(~CoB) +
     theme_set +
     theme(axis.text.x = element_text(angle = 90)) +
     labs(caption = "Comparision of ONS, LFS, UK born,
          and non-UK born population estimates for 2005") -> p3

   p3


   ## Compare Population strat by year - 2010
   demo_2000_2016_strat_est %>%
     filter(Year %in% 2010, !is.na(CoB)) %>%
     ggplot(aes(x = Age, y = Population)) +
     geom_density(stat = "identity", alpha = 0.4) +
     facet_wrap(~CoB) +
     theme_set +
     theme(axis.text.x = element_text(angle = 90)) +
     labs(caption = "Comparision of ONS, LFS, UK born,
          and non-UK born population estimates for 2010") -> p4

   p4


   ## Compare Population strat by year - 2015
   demo_2000_2016_strat_est %>%
     filter(Year %in% 2015, !is.na(CoB)) %>%
     ggplot(aes(x = Age, y = Population)) +
     geom_density(stat = "identity", alpha = 0.4) +
     facet_wrap(~CoB) +
     theme_set +
     theme(axis.text.x = element_text(angle = 90)) +
     labs(caption = "Comparision of ONS, LFS, UK born,
          and non-UK born population estimates for 2015") -> p5

   p5




   # Look at total population over time --------------------------------------
   demo_2000_2016_strat_est %>%
     filter(Year %% 5 == 0) %>%
     group_by(CoB, Year) %>%
     summarise(Population = sum(Population)) %>%
     ggplot(aes(x = Year, y = Population, fill = CoB, colour = CoB)) +
     geom_point() +
     geom_line() +
     theme_set +
     labs(caption = "Population estimates over time for both the ONS abd KFS") -> p6

   p6
 }

  if (verbose) {
    #current bug in plotly for negative values in box plots means this will not present the correct results so use static table
    demo_2000_2016_strat_est %>%
      na.omit %>%
      filter(Year < 2016) %>%
      plot_pop_age_compare_ons_lfs(theme_set = theme_set) -> p7
    p7

    ## plot removing 85+ due to distortion
    demo_2000_2016_strat_est %>%
      filter (Year < 2016) %>%
      na.omit %>%
      filter(!(`Age group` %in% c('85-89', '90+'))) %>%
      plot_pop_age_compare_ons_lfs(theme_set = theme_set) -> p8
    
    p8

  }

  if (save) {
    save_data(demo_2000_2016_strat_est,
              name = save_name,
              path = data_path,
              format = save_format,
              message = "ONS combined with LFS data saved to: ",
              verbose = verbose
    )
  }

  if (return) {
    return(demo_2000_2016_strat_est)
  }
}
seabbs/tbinenglanddataclean documentation built on July 12, 2019, 2:54 p.m.