knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)
suppressWarnings(library(SfSpHelpers))
suppressWarnings(library(ggplot2))
suppressWarnings(library(dplyr))
suppressWarnings(library(magrittr))

Data

Caches results to defaul cache + downloads a large zip

  shpBuildings <- cache_csv_sf_wrapper('eval_fonciere_mtl', 
                                       fun = get_zipped_remote_shapefile,
                                       url = 'https://data.montreal.ca/dataset/4ad6baea-4d2c-460f-a8bf-5d000db498f7/resource/43c2cccf-a439-429b-a3c8-5d4ebce53e1b/download/uniteevaluationfonciere.zip',
                                       id_name='ID_UEV')

#Clean up 
shpBuildings %<>% mutate( ANNEE_CONS = plyr::mapvalues(ANNEE_CONS, from=9999, to=NA) )

#Sample
shpBuildings %<>% sample_frac(size=0.001) 

Spatial k mean to thin out large number of points

Year built

  #No need to get the centroids, spatialKMeans does it automatically
  shpBuildingsAgg <- spatialKMeans (shpBuildings,
                            numCentroids = as.integer(nrow(shpBuildings)/10),
                            numClosestPoints = 10,
                            var="ANNEE_CONS",
                            aggFct=function(x) {median(x,na.rm = T)} #remove NAs since we introduced some in the chunk above
  )

  #Check same crs
  assertthat::are_equal(st_crs(shpBuildingsAgg), st_crs(shpBuildings))

  #Number of centroids
  assertthat::are_equal(nrow(shpBuildingsAgg) , 10**3)

  #Plot to see the difference
  shpBuildingsAgg$id <- 'aggregated'
  shpBuildings$id <- 'raw'


  shpBoth <- rbind(shpBuildingsAgg,
                   shpBuildings %>% dplyr::select( all_of(colnames(shpBuildingsAgg))))



  ggplot() +
    geom_sf(data=shpBoth, aes(col=ANNEE_CONS) ) +
    facet_wrap(~id, ncol=1) + 
    ggplot2::theme_minimal(base_family="Roboto Condensed", base_size=11.5) +
    scale_color_viridis_c()

Area grouped by type

#Keep the most prevalent groups and make sure to have at least 10 observations so that propToKeep = 0.1 does not fail after
top_cat <- shpBuildings %>% st_drop_geometry() %>% count(LIBELLE_UT) %>% arrange(desc(n)) %>% top_n(n=3) %>% filter(n > 10)

shpBuildingsAgg_by_build_type <- map_dfr(
  top_cat$LIBELLE_UT,
  function(x) {
    spatialKMeans (shp = shpBuildings %>%  filter(LIBELLE_UT == x ) ,
                 propToKeep = 0.1,
                 numClosestPoints = 10,
                 var="SUPERFICIE") %>% 
    mutate(LIBELLE_UT = x)
  }
)

ggplot() +
    geom_sf(data=shpBuildingsAgg_by_build_type, aes(col=SUPERFICIE) ) +
    facet_wrap(~LIBELLE_UT, ncol=1) +
    ggplot2::theme_minimal(base_family="Roboto Condensed", base_size=11.5) +
    scale_color_viridis_c()


cgauvi/sfSpHelpers documentation built on June 30, 2023, 10:48 p.m.