#######################################
### Worflow functions ###
### for EcoservR ###
### Sandra Angers-Blondin ###
### 29 March 2021 ###
#######################################
#' Prepare MasterMap
#'
#' This function imports the OS MasterMap from its specified folder. It only reads in polygons that intersect the study area but needs to query all files in the folder to do so. For optimal performance, set your mastermap folder to only contain OS tiles needed for a given project.
#' @param projectLog The RDS project log file generated by the wizard app and containing all file paths to data inputs and model parameters
#' @return Saves a project_title_MM_01.RDS file to project folder, and returns the buffered study area and the mm object to the environment.
#' @export
#'
prepare_basemap <- function(projectLog = parent.frame()$projectLog){
# the parent frame bit makes sure the function knows that the argument comes from the users's environment
timeA <- Sys.time() # start time
## Extract the file paths and other info from project log -----
mmpath <- projectLog$df[projectLog$df$dataset == "mm", ][["path"]]
mmlayer <- projectLog$df[projectLog$df$dataset == "mm", ][["layer"]]
mm_cols <- projectLog$df[projectLog$df$dataset == "mm", ][["cols"]][[1]] # coerce to named character (remove list nesting)
mmtype <- guessFiletype(mmpath) # extract filetype
studypath <- projectLog$df[projectLog$df$dataset == "studyArea", ][["path"]]
studybuffer <- projectLog$parameters$SAbuffer
output_temp <- projectLog$output_temp
title <- projectLog$title
### Fist check colnames are valid:
checkNames(mmpath, mmlayer, mm_cols) # will stop execution if col names specified by user don't match names in layer
### Create output folder if doesn't exist
if (!dir.exists(output_temp)){
dir.create(output_temp)
}
### Step 1. Import and buffer the study area ------------------------------------------
### Import the study area outline (specifying OSGB as crs)
studyArea <- try(loadSpatial(studypath,
layer = NULL,
filetype = guessFiletype(studypath)) %>%
do.call(rbind, .))
if (inherits(studyArea, "try-error")) stop("Could not import study area. Check file path and format.") else message("Study area imported.")
studyArea <- suppressWarnings({
checkcrs(studyArea, 27700) %>% # check that CRS is Brit National Grid, and transform if not
sf::st_set_crs(., 27700) # set the crs manually to get rid of GDAL errors with init string format
})
## Drop Z dimension if it has one
studyArea <- sf::st_zm(studyArea, drop = TRUE)
### Buffer and union
studyAreaBuffer <- sf::st_buffer(studyArea, studybuffer) %>% # create a buffer around the study area shape
sf::st_union() %>% # dissolve so that overlapping parts do not generate overlapping polygons in other datasets
sf::st_make_valid() %>% # make sure outline is valid
sf::st_geometry() %>% sf::st_as_sf() # retain only geometry
rm(studyArea) # remove original boundary
# Save the buffer to disk for next scripts
sf::st_write(studyAreaBuffer,
dsn = file.path(output_temp,
paste(title, "_studyAreaBuffer_",studybuffer,"m.gpkg", sep="")),
append = FALSE # will overwrite existing object if present
)
# and as R object for easy handling - recalling in other modules
saveRDS(studyAreaBuffer, file.path(output_temp, paste0(title,"_studyAreaBuffer.RDS")))
if(sf::st_is_valid(studyAreaBuffer)) {message("Study area buffered and saved to your output folder.")}
### Step 2. Import and tidy mastermap data ----------------------------------------------
# NOTE: Most users will have done a data download that has many files, one for each 10km tile.
# All the files should be in the same folder for the function to work (the name of the folder is the first argument of the function).
# List available MasterMap files ------------------------------------------
## List all acceptable spatial files in folder
fileList <- list.files(mmpath, pattern = paste0(mmtype, collapse="|"),
ignore.case = TRUE,
recursive = TRUE, full.names = TRUE)
if (length(fileList) == 0){stop("Cannot find MasterMap data. Please check file path and format.")}
## Amend the file list and create a layer object based on the file type
if (any(grepl("shp", mmtype))){
## If we're dealing with shapefiles, we want the file list to be the directories,
## and create a layer list with the actual layer names.
# first we subset the files to just those with the right string
# UPDATE 27 July 2021: mmlayer can be a list of layer names rather than a common string, so paste0 is required to test across all names
fileList <- fileList[grepl(paste0(mmlayer, collapse = "|"), fileList)]
if (length(fileList) == 0){stop("Could not find layers corresponding to ", mmlayer)}
# Then we extract the layer names from the file name
layer <- lapply(fileList, function(x)
sub(pattern = mmtype,
replacement = "\\1", basename(x), ignore.case = TRUE)) # remove extension from file name
# And remove it from the dsn list
fileList <- dirname(fileList)
} else {
## If we are dealing with any other format, the fileList with the full extension is the dsn,
## and the layer name should always be what was detected by the wizard.
layer <- mmlayer
}
## Now we have two key objects to work with: fileList which is always the dsn argument of the read functions,
## and layer which is either a list of the same length as fileList (for shapefiles) or one
## character object, to be used in the layer argument of the read functions.
# Prepare selection of relevant data from MasterMap files ------------------------
## This section only accesses the metadata of the Mastermap data,
## to identify which files(s) belong to each tile of the study area.
## These functions are custom wrapper functions to keep this script short.
## See fun_mastermap if problems with any of them
## Prepare the study area grids (10x10km OS squares intersected with the SA)
SAgrid <- ecoservR::grid_study(studyAreaBuffer)
## Extract file extents without reading files in memory
ex <- ecoservR::getFileExtent(fileList, layer)
# This ex object is a polygon corresponding to the extent of every tile.
# The file and layername variables can be used to read in the files.
## Now we can figure out which files belong to which study area tile
## (only those that intersect the square)
## Since we have all the spatial and file name info we need in ex, this section is not
## dependent on file format
explo <- lapply(SAgrid, function(x){
belongs_tile <- unlist(sf::st_intersects(x, ex, sparse=TRUE))
sf::st_drop_geometry(ex[belongs_tile,])
})
## This reduces the number of files against which to perform the spatial query for a given SA tile.
## It returns, for each study area tile, a dataframe of dsn and layer names to read in.
# Import mastermap data ---------------------------------------------------
mm <- vector(mode = "list", length = length(SAgrid)) # initialise empty mm object
names(mm) <- names(SAgrid)
for (i in 1:length(SAgrid)){ # loop through each part of the study area to import features,
# tidy them and remove duplicates
gridref <- names(SAgrid)[[i]] # the grid reference we are working on for this iteration
if (nrow(explo[[gridref]]) == 0){
message("Warning: no MasterMap data for grid reference ", gridref)
next
} # skip if no matching files
message("Importing MasterMap data for grid reference ", gridref) # show progress
# cycle through all files that intersect this grid reference, and import polygons overlapping SA
mm[[i]] <- lapply(seq_along(c(1:nrow(explo[[gridref]]))), function(x){
poly_in_boundary(explo[[gridref]][["file"]][x], # the dsn argument
SAgrid[[i]], # the query layer
layer = c(explo[[gridref]][["layername"]][x]) # the layer name
) # read in features
})
# convert all names to lowercase for easier matching with mm_cols
mm[[i]] <- lapply(mm[[i]], function(x){
names(x) <- tolower(names(x))
return(x)
})
# Each mm item is now NULL (no features), or a list of 1 or more items.
# They might differ in number and names of columns so processing them right away before binding
mm[[i]] <- lapply(mm[[i]], function(x){
dplyr::select(x, tidyselect::all_of(tolower(mm_cols))) %>% # keep only cols we need,
## using names specified by user and renaming on the fly with our standard names
dplyr::mutate(dplyr::across(tidyselect::where(is.list) & !attr(., "sf_column"), # convert weird list-columns to character
ecoservR::list_to_char)) %>%
dplyr::filter(PhysicalLevel != "51", # remove things above ground level
Group != "Landform" ) %>%
dplyr::mutate(TOID = as.character(gsub("[a-zA-Z ]", "", TOID))) %>% # remove osgb characters which sometimes appear and sometimes not
dplyr::select(-PhysicalLevel) # remove useless column
})
## Harmonise geometry column before binding
mm[[i]] <- lapply(mm[[i]], function(x) ecoservR::rename_geometry(x, "geometry"))
# Now hopefully all list items are consistent, so can be bound together.
# There will be many duplicated polygons at this stage (from neighbouring files containing the same straddling polygons) so remove them
mm[[i]] <- do.call(rbind, mm[[i]])
mm[[i]] <- mm[[i]][!duplicated(mm[[i]]$TOID),]
}
### Once the loop is over we have a list as long as there are 10x10km grid references in the study area.
### We make sure all empty items are removed and do another duplicate check (there should not be duplicates within tiles, but there will be some across tiles)
## Remove empty tiles
mm <- mm[sapply(mm, function(x) (!is.null(x)) == TRUE)]
mm <- mm[sapply(mm, function(x) (nrow(x) > 0) == TRUE)]
## Remove duplicated polygons (same polygon can occur in multiple tiles)
mm <- removeDuplicPoly(mm, "TOID")
## the duplicate check may dwindle a tile to nothing (this fixes a bug)
## so we check and remove empty tiles again
mm <- mm[sapply(mm, function(x) (nrow(x) > 0) == TRUE)]
## Clip to study area
message("Clipping to study area...")
## identify core vs edge tiles
SAgrid <- suppressWarnings({
grid[
lengths(sf::st_intersects(sf::st_set_crs(grid, 27700), studyAreaBuffer))>0,] %>%
sf::st_set_crs(27700)
})
is_core <- unlist(sf::st_contains(studyAreaBuffer,SAgrid))
message("Identified tiles to clip")
## Clip where necessary
# (if a whole 10k tile is included within study area, no need)
for (i in 1:length(mm)){
# we only need to perform clipping on edge tiles
if (names(mm)[[i]] %in% names(SAgrid)[is_core]){next}
## if we have an edge tile, we apply the faster_intersect custom function
## which deletes polygons outside the boundary, protects those inside
## and clips those on the edge
mm[[i]] <- faster_intersect(mm[[i]], studyAreaBuffer)
message("Clipped tile ", names(mm)[[i]])
}
# # (we only imported the features intersecting the SA but sometimes they are very long, e.g. roads, and stick out of the map)
# mm <- lapply(mm, function(x) suppressWarnings(
# sf::st_intersection(x, sf::st_geometry(studyAreaBuffer))) %>% checkgeometry(., "POLYGON"))
### Step 6. Add a buffer for the sea -----
## Import coastline boundaries
## anything outside the UK boundary is labelled as sea and a buffer added...
# Low-priority step --- is it really useful?
# SAVE UPDATED MAP ----------------------------------------------------------------------------
# Save the list of spatial objects as an RDS object (quicker to reload in R)
saveRDS(mm, file.path(output_temp, paste0(title,"_MM_01.RDS")))
# Update the project log with the information that map was updated
projectLog$SAbuffer <- file.path(output_temp,
paste(title, "_studyAreaBuffer_",studybuffer,"m.gpkg", sep=""))
projectLog$last_success <- "MM_01.RDS"
### Create a "performance" list in the project log which saves the time taken
## for each module
projectLog$performance <- vector(mode = "list", length = 17)
names(projectLog$performance) <- c(
"prep_mastermap",
"add_greenspace",
"add_corine",
"add_NFI",
"add_PHI",
"add_CROME",
"add_DTM",
"add_hedges",
"classify_map",
"add_socioeco",
"cap_carbon",
"cap_air",
"cap_flood",
"cap_pollin",
"cap_noise",
"cap_clim",
"cap_access"
)
timeB <- Sys.time() # stop time
projectLog$performance[["prep_mastermap"]] <- as.numeric(difftime(
timeB, timeA, units="mins"
))
updateProjectLog(projectLog) # save revised log
message(paste0("MasterMap preparation finished. Process took ",
round(difftime(timeB, timeA, units = "mins"), digits = 1), " minutes. Ready for
processing."))
on.exit(invisible(gc())) # garbage collection - return some memory to computer
return({
## returns the objects in the global environment
invisible({
mm <<- mm
studyAreaBuffer <<- studyAreaBuffer
projectLog <<- projectLog
})
})
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.