#' Clean housing price data
#'
#' There are several issues with the data frame
#' read_housing_price_data provides. First the column labels are not
#' terribly great. Second, the periods column contains some sort of
#' code instead of a year. Lastly, the regions column contains all
#' the municipality codes from the Netherlands
#'
#' @param df a data frame generated by read_housing_price_data
#' @param codes_df a data frame generated by read_municipality
#' @return a data frame with four columns: Gemeentenaam, Provincienaam, year and `housing price`.
#' @import stringr
#' @importFrom dplyr left_join
#' @importFrom rlang .data
#' @noRd
clean_housing_price_data <- function(df,codes_df) {
codes_df <- filter(codes_df, .data$Provincienaam == "Groningen" |
.data$Provincienaam == "Drenthe" |
.data$Provincienaam == "Friesland")
df <- dplyr::left_join(codes_df, df, by = c("GemeentecodeGM" = "RegioS"))
colnames(df)[which(colnames(df) == "Perioden")] <- "year"
colnames(df)[which(colnames(df) == "GemiddeldeVerkoopprijs_1")] <- "Average selling price"
colnames(df)[which(colnames(df) == "Gemeentenaam")] <- "municipality"
colnames(df)[which(colnames(df) == "Provincienaam")] <- "province"
colnames(df)[which(colnames(df) == "GemeentecodeGM")] <- "municip_code"
df[["year"]] <- as.integer(stringr::str_extract(df[["year"]], "^[0-9]{4}"))
df[["Provinciecode"]] <- NULL
df[["ProvinciecodePV"]] <- NULL
df
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.