R/clean_migration_data.R

Defines functions clean_migration_data

#' Clean migration data
#'
#' There are several issues with the data frame
#' read_migration_data provides. First the column labels are not
#' terribly great. Second, the periods column contains some sort of
#' code instead of a year. A new column depicting the balance of the population
#' should be added for the necessary plots
#'
#' @param df a data frame generated by read_migration_data
#' @param codes_df a data frame generated by read_municipality
#' @return a data frame with five columns: municipality, province, 2015, 2019 and Balance.
#' @import stringr
#' @importFrom dplyr left_join
#' @importFrom rlang .data
#' @importFrom tidyr spread
#' @noRd

clean_migration_data <- function(df,codes_df) {
  year <- NULL
  codes_df <- filter(codes_df, .data$Provincienaam == "Groningen" | .data$Provincienaam == "Drenthe" |
                       .data$Provincienaam == "Friesland")
 
  df <- dplyr::left_join(codes_df, df, by = c("GemeentecodeGM" = "Regions"))
  df[["Gemeentecode"]] <- NULL
  df[["GemeentecodeGM"]] <- NULL
  df[["Provinciecode"]] <- NULL
  df[["ProvinciecodePV"]] <- NULL
  
  data = data.frame()
  for (i in 1:nrow(codes_df)){
    x1 = i*6 -5
    y1 = i*6 -4
    data = append(data, slice(df, x1))
    data = append(data, slice(df, y1))
  } 
  index <- 2 * nrow(codes_df)
  df <- data.frame(matrix(unlist(data), nrow=index, byrow=T),stringsAsFactors=FALSE)
  
  df[["X3"]] <- as.integer(stringr::str_extract(df[["X3"]], "^[0-9]{4}"))
  colnames(df)[which(colnames(df) == "X3")] <- "year"
  colnames(df)[which(colnames(df) == "X4")] <- "Population on 1 January"
  colnames(df)[which(colnames(df) == "X2")] <- "province"
  colnames(df)[which(colnames(df) == "X1")] <- "municipality"
  
  
  
  df <- tidyr::spread(df, year, 'Population on 1 January')
  
  Balance <- c()
  endp <- c()
  startp <- c()
  for (i in 1:nrow(df)){
    endp <- as.numeric(df[[4]][i])
    startp <- as.numeric(df[[3]][i])
    Balance[i] <- endp - startp
  }
  df <- cbind(df, Balance)
  df
}
ikbentimkramer/cdphmd documentation built on Jan. 28, 2021, 2:51 p.m.