R/rasterise_buildings.R

Defines functions rasterise_buildings

Documented in rasterise_buildings

#'Rasterise building polygons
#'
#'Rasterise building polygons (`sf_buildings`) with reference to population (`sf_pop`) and/or
#'land use data (`sf_landuse`), if supplied. Multiple output (a list of) rasters will be generated if
#'building data for multiple years are present.
#'
#'If population (`sf_pop`) and/or land use data (`sf_landuse`) are supplied,
#'their raster(s) must have been previously generated in the `tempdir()` using
#'the functions `rasterise_pop()` and `rasterise_landuse()`, respectively.
#'Rasterised buildings will be masked away (i.e., convert pixels to `NA`)
#'at areas with no (zero) population and/or land use data, according to the respective matching year(s).
#'Custom ways to match the years between datasets can be set via the helper function `matchyear()`.
#'The argument `match_buildings_pop` provides ways to match each year in the `sf_pop` (if provided)
#'to a specific year in `sf_buildings`. Building raster(s) will then be associated
#'with a specific population census year, and removed if there are no matching population census years.
#'
#'If necessary, the argument `match_buildings_landuse` provides ways to match each year in `sf_buildings`
#'with a specific year in `sf_landuse`, but only if `sf_pop` is not provided. This is because the main
#'reference point for all the datasets is `sf_pop` (primary focus is the analysis of population data).
#'
#'
#'@param sf_buildings `sf `polygons of the buildings.
#'Data should be in a projected coordinate reference system.
#'@param proxy_pop_density character. Specify column name of the building attribute that is used
#'as an indicator of population density (e.g. building height, no. of levels).
#'This column will be used to assign values to the output raster. If not provided, population density
#'across all building pixels are assumed to be similar.
#'@param year character. Specify column name for the year within `sf_buildings`, `sf_landuse`
#'and `sf_pop` (if provided). Defaults to `NULL`, assuming that there is no 'year' column. Column data should be numeric.
#'@param sf_pop (optional) `sf` polygons containing the population census data with column containing the census year.
#'If absent, the output (building rasters) will not be associated with specific population census year(s).
#'@param sf_landuse (optional) `sf` polygons of the land use zones.
#'@param match_buildings_pop character. Type of matching between `sf_buildings` and `sf_pop` (if provided), passed on to `match` argument in function `matchyear()`.
#'Either `'exact'`, `'closest'`, `'recent'` or `'soonest'`. Defaults to `'closest'`.
#'@param match_buildings_landuse character. Type of matching between `sf_buildings` and `sf_landuse` (if provided), passed on to `match` argument in function `matchyear()`.
#'Either `'exact'`, `'closest'`, `'recent'` or `'soonest'`. Defaults to `'closest'`.
#'This argument is used only if `sf_pop` is not provided.
#'@param raster_template Either a `terra::rast` object, or a filepath to the raster used to define the pixel resolution, extent, nrow, ncol of
#'the output raster. Defaults to raster template in `tempdir()` processed in previous steps.
#'Object is passed to the '`y`' argument in `terra::rasterize()`.
#'Defaults to the template raster generated by the function `rasterise_pop()` within `dir_processing` (see next argument).
#'@param dir_processing character. Directory to get intermediate files generated in previous steps (e.g. rasterised population/land use data).
#'Defaults to `tempdir()`.
#'@param dir_export character (optional). File path to directory to export output raster(s) to.
#'@param overwrite logical. Argument passed to `terra::writeRaster()`. Defaults to `TRUE`.
#'@param wopt list. Argument passed to `terra::writeRaster()`.
#'@param ... Other arguments passed to `terra::writeRaster()`.
#'
#'@return List of raster files. Zero values are converted to `NA`.
#'
#'@import sf
#'@import checkmate
#'@importFrom glue glue
#'@importFrom dplyr filter mutate
#'@importFrom terra vect rast rasterize
#'@importFrom stringr str_extract
#'@importFrom stats na.omit
#'@importFrom rlang .data
#'
#'@examples
#' \dontrun{
#' # load data
#' data(pop_sgp) # population census block polygons
#' data(landuse_sgp) # land use polygons
#'
#'
#' # transform to projected crs
#' pop_sgp <- sf::st_transform(pop_sgp, sf::st_crs(32648))
#' landuse_sgp <- sf::st_transform(landuse_sgp, sf::st_crs(32648))
#'
#'
#' # get osm buildings based on census block polygons (year 2020)
#' city_boundaries <- pop_sgp %>%
#'    dplyr::filter(year == 2020) %>%
#'    sf::st_union() %>%
#'    sf::st_as_sf() %>%
#'    smoothr::fill_holes(threshold = units::set_units(1, 'km^2'))  %>% # clean up
#'    smoothr::drop_crumbs(threshold = units::set_units(1, 'km^2'))  %>%
#'    sf::st_make_valid()
#'
#' buildings <- get_buildings_osm(place = city_boundaries,
#'                                date = as.Date('2021-01-01')) %>%
#'    mutate(year = 2020)
#'
#'
#' # run function
#' buildings_rasters <- rasterise_buildings(buildings,
#'                                          proxy_pop_density = 'levels',
#'                                          year = 'year',
#'                                          sf_pop = pop_sgp,
#'                                          sf_landuse = landuse_sgp,
#'                                          match_buildings_pop = 'closest')
#' }
#'
#'@export
rasterise_buildings <- function(sf_buildings, proxy_pop_density = NULL, year = NULL, sf_pop = NULL,
    sf_landuse = NULL, match_buildings_pop = "closest", match_buildings_landuse = "closest",
    raster_template = NULL, dir_processing = tempdir(), dir_export = NULL, overwrite = TRUE,
    wopt = list(gdal = c("COMPRESS=LZW")), ...) {

    # Error checking ------------------

    coll <- checkmate::makeAssertCollection()

    # file formats
    checkmate::assertTRUE(!is.null(st_crs(sf_buildings)), add = coll)  # must have crs
    # checkmate::assertTRUE(all(st_is_valid(sf_buildings)), add = coll) # all features must be
    # valid (too time consuming)


    # colnames
    checkmate::assert_subset(proxy_pop_density, choices = colnames(sf_buildings), empty.ok = FALSE,
        add = coll)
    checkmate::assert_subset(year, choices = colnames(sf_buildings), empty.ok = TRUE, add = coll)

    # proxy_pop_density
    checkmate::assert_character(proxy_pop_density, min.len = 1, any.missing = FALSE, all.missing = FALSE,
        null.ok = TRUE, add = coll)
    checkmate::assert_subset(proxy_pop_density, choices = colnames(sf_buildings), empty.ok = TRUE,
        add = coll)  # as vector

    # file paths
    checkmate::assert_character(dir_processing, min.len = 1, any.missing = FALSE, all.missing = FALSE,
        null.ok = TRUE, add = coll)
    checkmate::assert_character(dir_export, min.len = 1, any.missing = FALSE, all.missing = FALSE,
        null.ok = TRUE, add = coll)

    checkmate::reportAssertions(coll)


    # Calculations ------------------

    # IF POP DATA IS PROVIDED
    if (!is.null(sf_pop)) {

        checkmate::assertTRUE(st_crs(sf_buildings) == st_crs(sf_pop))  # must have same crs

        # match pop data to most recent building data
        sf_pop <- matchyear(data = sf_pop, data_tomatch = sf_buildings, year = year, match = match_buildings_pop)

        # remove building years not required (no matching pop years)
        sf_buildings <- sf_buildings %>%
            dplyr::filter(.data[[year]] %in% unique(sf_pop$year_match))

        if (nrow(sf_buildings) == 0) {
            # none fulfill criteria!
            stop("Error: No building data fulfills the criteria specified in 'match_buildings_pop'!")
        }

    }


    # GET RASTER TEMPLATE (depending on object supplied)
    if(is.null(raster_template)) {
        raster_template <- rast(glue::glue("{dir_processing}/popdensity_raster-template.tif"))
    } else if(!is.null(raster_template) & is.character(raster_template)) { # user supplied file path
        raster_template <- rast(raster_template)
    }
    checkmate::assertTRUE(st_crs(sf_buildings) == st_crs(raster_template))  # must have same crs



    # RASTERIZE
    results <- list()  # create empty list for output

    # If there is a year column in sf_buildings
    if (!is.null(year)) {

        years_buildings <- unique(sf_buildings[[year]])

        # each buildings year
        for (i in seq_along(years_buildings)) {

            sf_buildings_subset <- sf_buildings %>%
              dplyr::filter(.data[[year]] == years_buildings[i])

            # rasterize & export to temp directory
            buildings_raster <- terra::rasterize(terra::vect(sf_buildings_subset), raster_template,
              field = proxy_pop_density)

            buildings_raster[buildings_raster == 0] <- NA  # convert 0s to NAs


            # IF POP DATA IS PROVIDED, remove areas not covered by pop census blocks (for matching
            # census yr)
            if (!is.null(sf_pop)) {

              years_popmatch <- unique(sf_pop$year[sf_pop$year_match == years_buildings[i]])
              years_popmatch <- stats::na.omit(years_popmatch)

              results[[i]] <- list()

              for (j in seq_along(years_popmatch)) {

                pop_raster_matching <- rast(glue::glue("{dir_processing}/popdensity-by-census-block_{years_popmatch[j]}.tif"))

                # # mask away building pixels not within relevant pop zones
                results[[i]][[j]] <- terra::mask(buildings_raster, pop_raster_matching)
                names(results[[i]][[j]]) <- glue::glue("buildings-{years_buildings[i]}_for-pop-{years_popmatch[j]}")


                # IF BOTH POP & LANDUSE PROVIDED (landuse file will have pop year as suffix)
                if (!is.null(sf_landuse)) {

                  file <- dir(dir_processing, pattern = glue::glue("^landuse.*{years_popmatch[j]}.tif$"),
                    full.names = TRUE)
                  landuse_raster_matching <- rast(file)

                  # mask away building pixels not within relevant landuse zones
                  results[[i]][[j]] <- terra::mask(buildings_raster, landuse_raster_matching)  # overwrite!
                  landuse_name <- sub("\\..*$", "", basename(file))
                  names(results[[i]][[j]]) <- glue::glue("buildings-{years_buildings[i]}_{landuse_name}")

                }

                # export
                if (!is.null(dir_export)) {
                  terra::writeRaster(results[[i]][[j]], filename = file.path(glue::glue("{dir_export}/{names(results[[i]][[j]])}.tif")),
                    overwrite = overwrite, wopt = wopt, ...)
                }

                rm(j)
              }


            # POP DATA NOT PROVIDED
            } else {

              results[[i]] <- buildings_raster
              names(results[[i]]) <- glue::glue("buildings-{years_buildings[i]}")


              # POP NOT PROVIDED, BUT LANDUSE DATA IS, remove areas not covered by relevant landuse
              if (!is.null(sf_landuse)) {

                file <- dir(dir_processing, pattern = glue::glue("^landuse.*"), full.names = TRUE)

                if (length(file) > 1) {
                  # multiple files: choose closest year to building data

                  # years in file
                  years_landuse <- str_extract(basename(file), "landuse-residential-[0-9]{4}") %>%
                    str_extract("[0-9]{4}") %>%
                    as.numeric()

                  # match building to landuse (rmbr not to use the full sf)
                  buildings_temp <- matchyear(data = sf_buildings_subset, data_tomatch = sf_landuse,
                    year = year, match = match_buildings_landuse)

                  which_file <- which(years_landuse == unique(buildings_temp$year_match))
                  file <- file[which_file]
                  rm(which_file)
                }

                landuse_raster_matching <- rast(file)

                # mask away building pixels not within relevant landuse zones
                results[[i]] <- terra::mask(buildings_raster, landuse_raster_matching)  # overwrite!
                landuse_name <- sub("\\..*$", "", basename(file))
                names(results[[i]]) <- glue::glue("buildings-{years_buildings[i]}_{landuse_name}")

              }

              # export
              if (!is.null(dir_export)) {
                terra::writeRaster(results[[i]], filename = file.path(glue::glue("{dir_export}/{names(results[[i]])}.tif")),
                  overwrite = overwrite, wopt = wopt, ...)
              }
            }

            rm(buildings_raster, i)
        }


        # IF NO YEAR COLUMN IN sf_buildings
        }  else {

          buildings_raster <- terra::rasterize(terra::vect(sf_buildings),
                                               raster_template,
                                               field = proxy_pop_density)

          buildings_raster[buildings_raster == 0] <- NA  # convert 0s to NAs
          names(buildings_raster) <- 'buildings'


          # IF POP DATA IS PROVIDED
          if (!is.null(sf_pop)) {

              file <- dir(dir_processing, pattern = glue::glue("^popdensity-by-census-block.tif$"),
                           full.names = TRUE)
              pop_raster_matching <- rast(file[1]) # if multiple files, pick the top one

              # # mask away building pixels not within relevant pop zones
              buildings_raster <- terra::mask(buildings_raster, pop_raster_matching)
              names(buildings_raster) <- glue::glue("buildings_for-pop")


              # IF BOTH POP & LANDUSE PROVIDED
              if (!is.null(sf_landuse)) {

                file <- dir(dir_processing, pattern = glue::glue("^landuse.*.tif$"),
                            full.names = TRUE)
                landuse_raster_matching <- rast(file[1]) # if multiple files, pick the top one

                # mask away building pixels not within relevant landuse zones
                buildings_raster <- terra::mask(buildings_raster, landuse_raster_matching)  # overwrite!
                landuse_name <- sub("\\..*$", "", basename(file))
                names(buildings_raster) <- glue::glue("buildings_{landuse_name}")

              }

              # export
              if (!is.null(dir_export)) {
                terra::writeRaster(buildings_raster, filename = file.path(glue::glue("{dir_export}/{names(buildings_raster)}.tif")),
                                   overwrite = overwrite, wopt = wopt, ...)
              }


            # POP DATA NOT PROVIDED
          } else {

            # POP NOT PROVIDED, BUT LANDUSE DATA IS
            if (!is.null(sf_landuse)) {

              file <- dir(dir_processing, pattern = glue::glue("^landuse.*"), full.names = TRUE)
              landuse_raster_matching <- rast(file[1]) # if multiple files, pick the top one

              # mask away building pixels not within relevant landuse zones
              buildings_raster <- terra::mask(buildings_raster, landuse_raster_matching)  # overwrite!
              landuse_name <- sub("\\..*$", "", basename(file))
              names(buildings_raster) <- glue::glue("buildings_{landuse_name}")

            }

            # export
            if (!is.null(dir_export)) {
              terra::writeRaster(buildings_raster, filename = file.path(glue::glue("{dir_export}/{names(buildings_raster)}.tif")),
                                 overwrite = overwrite, wopt = wopt, ...)
            }

            results[[1]] <- buildings_raster

          }

          rm(buildings_raster)
        }


    return(unlist(results))
}
ecological-cities/home2park documentation built on Dec. 6, 2023, 1:05 a.m.