knitr::opts_chunk$set( collapse = TRUE, comment = "#>", warning = FALSE, message = FALSE, dev = "ragg_png", dpi = 300, tidy = "styler", out.width = "100%" )
climaemet can retrieve data from the stations included on AEMET Open Data. However, in terms of spatial analysis and visualization, it can be useful to extend the data from points (stations) to the whole extent of Spain. On this article we would explain a method to interpolate the climatic data trough Spatial Interpolation, that is the process of using points with known values to estimate values at other unknown points.
For this analysis, we would need the following libraries:
library(climaemet) library(mapSpain) # Base maps of Spain library(sf) # spatial shape handling library(terra) # Spatial raster handling library(gstat) # for spatial interpolation library(tidyverse) # data handling library(ggplot2) # for plots library(tidyterra) # Plotting SpatRasters with tidyterra library(gifski) # we would create an animation
We choose here the daily climatic data from Winter 2020-2021 in Spain. Note that on the first half of January, Storm Filomena brought unusual heavy snowfall to parts of Spain, with Madrid recording its heaviest snowfall since 1971. We should be able to spot that.
clim_data <- aemet_daily_clim( start = "2020-12-21", end = "2021-03-20", return_sf = TRUE )
# sf::st_write(clim_data, "clim_data.gpkg") # Load a cached version of the file to avoid unneeded api calls clim_data <- sf::read_sf("clim_data.gpkg")
Let's keep only the stations on mainland Spain:
clim_data_clean <- clim_data %>% # Exclude Canary Islands from analysis filter(str_detect(provincia, "PALMAS|TENERIFE", negate = TRUE)) %>% dplyr::select(fecha, tmed) %>% # Exclude NAs filter(!is.na(tmed)) summary(clim_data_clean$tmed) ggplot(clim_data_clean) + geom_sf()
We load also a basic shape file of Spain using mapSpain:
ccaa_esp <- esp_get_ccaa(epsg = 4326) %>% # Exclude Canary Islands from analysis filter(ine.ccaa.name != "Canarias") ggplot(ccaa_esp) + geom_sf() + geom_sf(data = clim_data_clean)
As it can be seed, the climatic data we have available so far is restricted to the stations (points), but we want to extend these values to the whole territory.
As we need to predict values at locations where no measurements have been made, we would need to interpolate the data. On this example we would use the terra package and we would apply the Inverse Distance Weighted method, that is one of several approaches to perform spatial interpolation. We recommend consulting @hijmans2023 on how to perform these analysis on R.
The process would be as follow:
For this analysis, we need a destination object with the locations to be predicted. A common technique is to create a spatial grid (a "raster") covering the targeted locations.
On this example, we would use terra to create a regular grid that we would use for interpolation.
An important thing to consider in any spatial analysis or visualization is the coordinate reference system (CRS). We won't cover this in detail on this article, but we should mention a few key considerations:
On this exercise, we choose to project our objects to ETRS89 / UTM zone 30N EPSG:25830, that provides x and y values on meters and maximizes the accuracy for Spain.
clim_data_utm <- st_transform(clim_data_clean, 25830) ccaa_utm <- st_transform(ccaa_esp, 25830) # Note the original projection st_crs(ccaa_esp)$proj4string # vs the utm projection st_crs(ccaa_utm)$proj4string
Now, we create a regular grid using terra. This grid is composed to equally spaced points over the whole extent (bounding box) of Spain.
We use here a density of 5,000 (m), so the grid density is 5 x 5 kms (25 km2):
# Create grid 5*5 km (25 km2) grd <- rast(ccaa_utm, res = c(5000, 5000)) # Number of cells ncell(grd)
Now we just need to populate the (empty) grid with the predicted values based on the observations:
# Test with a single day test_day <- clim_data_utm %>% filter(fecha == "2021-01-08") # Interpolate temp init_p <- test_day %>% vect() %>% as_tibble(geom = "XY") gs <- gstat( formula = tmed ~ 1, locations = ~ x + y, data = init_p, set = list(idp = 2) ) interp_temp <- interpolate(grd, gs) # Plot with tidyterra ggplot() + geom_spatraster(data = interp_temp %>% select(var1.pred)) + scale_fill_whitebox_c( palette = "bl_yl_rd", labels = scales::label_number(suffix = "ºC") ) + labs( title = "(interpolated) temperature", subtitle = "2021-01-08" )
Let's create a nice ggplot2 plot! See also @roye2020 for more on this.
# Making a nice plot on ggplot2 temp_values <- interp_temp %>% pull(var1.pred) %>% range(na.rm = TRUE) # Get min and max from interpolated values min_temp <- floor(min(temp_values)) max_temp <- ceiling(max(temp_values)) ggplot() + geom_sf(data = ccaa_utm, fill = "grey95") + geom_spatraster(data = interp_temp %>% select(var1.pred)) + scale_fill_gradientn( colours = hcl.colors(11, "Spectral", rev = TRUE, alpha = 0.7), limits = c(min_temp, max_temp) ) + theme_minimal() + labs( title = "Avg. Temperature in Spain", subtitle = "2021-01-08", caption = "Data: AEMET, IGN", fill = "C" )
On this section, we would loop over the dates to create a single SpatRaster with several layers, each one holding the interpolation for a specific date. After that, we would create an animation to observe the evolution of temperature through the winter of 2020/21.
# We would create a SpatRaster with a layer for each date dates <- sort(unique(clim_data_clean$fecha)) # Loop through days and create interpolation interp_list <- lapply(dates, function(x) { thisdate <- x tmp_day <- clim_data_utm %>% filter(fecha == thisdate) %>% vect() %>% as_tibble(geom = "XY") gs_day <- gstat(formula = tmed ~ 1, locations = ~ x + y, data = tmp_day) interp_day <- interpolate(grd, gs_day, idp = 2.0) %>% select(interpolated = var1.pred) names(interp_day) <- format(thisdate) interp_day }) # Concatenate to a single SpatRaster interp_rast <- do.call(c, interp_list) %>% mask(vect(ccaa_utm)) time(interp_rast) <- dates
Now we can check the results:
interp_rast nlyr(interp_rast) head(names(interp_rast)) # Facet map with some data ggplot() + geom_spatraster(data = interp_rast %>% select(1:16)) + facet_wrap(~lyr) + scale_fill_whitebox_c(palette = "pi_y_g", alpha = 1) + theme_minimal() + theme(axis.text = element_blank()) + labs(title = "Temperatures (selected)")
And finally we loop through each layer to produce a plot (png file) for each date. The last step is to concatenate each png file into a gif file with gifski.
# Extending and animating # Create gif # We need a common scale using all the observed values on each layer allvalues <- values(interp_rast, mat = FALSE, na.rm = TRUE) min_temp2 <- floor(min(allvalues)) max_temp2 <- ceiling(max(allvalues)) # Loop through all the layers all_layers <- names(interp_rast) for (i in seq_len(length(all_layers))) { # Create a gif for each date this <- all_layers[i] interp_rast_day <- interp_rast %>% select(all_of(this)) this_date <- as.Date(gsub("interp_", "", this)) g <- ggplot() + geom_spatraster(data = interp_rast_day) + geom_sf(data = ccaa_utm, fill = NA) + coord_sf(expand = FALSE) + scale_fill_gradientn( colours = hcl.colors(20, "Spectral", rev = TRUE, alpha = 0.8), limits = c(min_temp2, max_temp2), na.value = NA, labels = scales::label_number(suffix = "º") ) + theme_minimal() + labs( title = "Avg. Temperature in Spain", subtitle = this_date, caption = "Data: AEMET, IGN", fill = "" ) tmp <- file.path(tempdir(), paste0(this, ".png")) ggsave(tmp, g, width = 1600, height = 1200, units = "px", bg = "white") }
And finally we use gifski to create the animation:
# Create gif from temporary pngs allfiles <- file.path(tempdir(), paste0(all_layers, ".png")) gifski::gifski(allfiles, loop = TRUE, delay = 1 / 6, gif_file = "winter_2021.gif", width = 1600, height = 1200 )
# From cache knitr::include_graphics("winter_2021.gif")
Let's plot an histogram for each Autonomous Community using the geofacet
package:
library(geofacet) clim_data_geofacet <- clim_data %>% st_drop_geometry() %>% select(fecha, tmed, provincia) %>% filter(!is.na(tmed)) # Paste Province info and codes clim_data_mean <- clim_data_geofacet %>% mutate(name_norm = ifelse( provincia == "STA. CRUZ DE TENERIFE", "Santa Cruz de Tenerife", provincia )) %>% mutate(name_norm = esp_dict_translate(name_norm, "es")) %>% mutate(cpro = esp_dict_region_code(name_norm, destination = "cpro")) # Paste CCAA codes and names by provincia ccaas <- esp_codelist %>% select(cpro, codauto, ine.ccaa.name) %>% distinct() clim_data_mean <- clim_data_mean %>% left_join(ccaas, by = "cpro") %>% group_by(fecha, ine.ccaa.name, codauto) %>% summarize( mean_tmed = mean(tmed, na.rm = TRUE), obs = n(), .groups = "keep" ) %>% mutate(code = codauto) # Label the grid with shortnames ccaagrid <- merge(geofacet::spain_ccaa_grid1, esp_codelist[c("codauto", "ccaa.shortname.es")], by.x = "code", by.y = "codauto" ) %>% mutate(name = ccaa.shortname.es) %>% select(-ccaa.shortname.es) %>% distinct() # Abbrev. ccaagrid$name <- gsub("Comunidad", "C.", ccaagrid$name) # Plot ggplot(clim_data_mean, aes(fecha, mean_tmed)) + geom_line(color = "steelblue") + # Line on Filomena peak geom_vline( xintercept = as.Date("2021-01-08"), colour = "red" ) + facet_geo(~code, grid = ccaagrid, scales = "fixed", label = "name") + ylab("Mean Temperature") + xlab("") + theme_minimal() + theme( strip.text.x = element_text(size = 7), axis.text.x = element_text( color = "grey20", size = 5, angle = 90, hjust = .5, vjust = .5, face = "plain" ), axis.text.y = element_text( color = "grey20", size = 5, angle = 0, hjust = 1, vjust = 0, face = "plain" ) )
if (!require("sessioninfo")) { install.packages("sessioninfo") } sessioninfo::session_info()
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.