#' Clean migration data
#'
#' There are several issues with the data frame
#' read_migration_data provides. First the column labels are not
#' terribly great. Second, the periods column contains some sort of
#' code instead of a year. A new column depicting the balance of the population
#' should be added for the necessary plots
#'
#' @param df a data frame generated by read_migration_data
#' @param codes_df a data frame generated by read_municipality
#' @return a data frame with five columns: municipality, province, 2015, 2019 and Balance.
#' @import stringr
#' @importFrom dplyr left_join
#' @importFrom rlang .data
#' @importFrom tidyr spread
#' @noRd
clean_migration_data <- function(df,codes_df) {
year <- NULL
codes_df <- filter(codes_df, .data$Provincienaam == "Groningen" | .data$Provincienaam == "Drenthe" |
.data$Provincienaam == "Friesland")
df <- dplyr::left_join(codes_df, df, by = c("GemeentecodeGM" = "Regions"))
df[["Gemeentecode"]] <- NULL
df[["GemeentecodeGM"]] <- NULL
df[["Provinciecode"]] <- NULL
df[["ProvinciecodePV"]] <- NULL
data = data.frame()
for (i in 1:nrow(codes_df)){
x1 = i*6 -5
y1 = i*6 -4
data = append(data, slice(df, x1))
data = append(data, slice(df, y1))
}
index <- 2 * nrow(codes_df)
df <- data.frame(matrix(unlist(data), nrow=index, byrow=T),stringsAsFactors=FALSE)
df[["X3"]] <- as.integer(stringr::str_extract(df[["X3"]], "^[0-9]{4}"))
colnames(df)[which(colnames(df) == "X3")] <- "year"
colnames(df)[which(colnames(df) == "X4")] <- "Population on 1 January"
colnames(df)[which(colnames(df) == "X2")] <- "province"
colnames(df)[which(colnames(df) == "X1")] <- "municipality"
df <- tidyr::spread(df, year, 'Population on 1 January')
Balance <- c()
endp <- c()
startp <- c()
for (i in 1:nrow(df)){
endp <- as.numeric(df[[4]][i])
startp <- as.numeric(df[[3]][i])
Balance[i] <- endp - startp
}
df <- cbind(df, Balance)
df
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.