knitr::opts_chunk$set(
  collapse = FALSE,
  warning = FALSE,
  message = FALSE,
  echo = FALSE,
  cache = FALSE,
  comment = "#>",
  # fig.path = "../figures/",
  dpi = 300
)
library(sf)
library(tidyverse)
library(spatstat)
library(patchwork)
devtools::load_all(".")

Introduction

The analysis is divided into three scales:

This document describes results for Pottery groups.

The study area is broadly defined as Easter part of Bohemia (abbreviated B) and Morava river catchment (M). It spans across the area of the Czech Republic and bordering regions of Austria and Slovakia.

Data

Data is published in the Journal of Open Archaeology Data:

Pajdla, P. and Trampota, F., 2021. Neolithic Settlements in Central Europe: Data from the Project ‘Lifestyle as an Unintentional Identity in the Neolithic’. Journal of Open Archaeology Data, 9, p.13. DOI: http://doi.org/10.5334/joad.88

source(here::here("analysis/code/data_load.R"))
set_base$period2 %>%
  table_nr_set()
settlements2 %>%
  plot_nr_set()

Analysis

Settlement density

Settlement density is estimated using KDE. Kernel size of 4 km is used. This value is completely arbitrary but is selected because settlement clusters are nicely highlighted at this scale.

settlements2 %>% 
  plot_kde()
kde <- settlements2 %>% 
  est_kde() %>% 
  mutate(reg = str_extract(id, "^."))
kde %>% 
  add_period_label() %>% 
  mutate(reg = if_else(reg == "B", "East Bohemia", "Morava river catchment")) %>% 
  add_period_label() %>% 
  ggplot(aes(period_label, value)) +
  geom_violin() +
  geom_boxplot(alpha = 0.4, width = 0.2) +
  geom_jitter(alpha = 0.05, width = 0.4) +
  facet_wrap(vars(reg)) +
  theme_bw() +
  theme(axis.text.x = element_text(angle = -45, hjust = 0)) +
  labs(y = "KDE (Density of settlements per square km)", x = "Period")

# ggsave(here::here("groups_kde.svg"), width = 10, height = 5)

Terrain surrounding the settlements

Altitude

alt <- settlements2 %>% 
  select(id, altitude) %>% 
  st_drop_geometry() %>% 
  distinct(id, altitude) %>% 
  transmute(id, value = altitude,
            variable = "altitude")
settlements2 %>% 
  mutate(reg = str_extract(id, "^."),
         reg = if_else(reg == "B", "East Bohemia", "Morava river catchment")) %>% 
  add_period_label() %>% 
  ggplot(aes(period_label, altitude)) +
  geom_violin() +
  geom_boxplot(alpha = 0.4, width = 0.2) +
  geom_jitter(alpha = 0.05, width = 0.4) +
  facet_wrap(vars(reg)) +
  theme_bw() +
  theme(axis.text.x = element_text(angle = -45, hjust = 0)) +
  labs(y = "Altitude (m)", x = "Period")

# ggsave(here::here("groups_altitude.svg"), width = 10, height = 5)

Slope

slope <- read_csv(here::here("analysis/data/derived_data", 
                             "slope_at_points.csv")) %>% 
  filter(id %in% settlements2$id) %>% 
  mutate(variable = "slope")
slope %>% 
  left_join(settlements2, by = "id") %>% 
  mutate(reg = str_extract(id, "^."),
         reg = if_else(reg == "B", "East Bohemia", "Morava river catchment")) %>% 
  add_period_label() %>% 
  ggplot(aes(period_label, value)) +
  geom_violin() +
  geom_boxplot(alpha = 0.4, width = 0.2) +
  geom_jitter(alpha = 0.05, width = 0.4) +
  facet_wrap(vars(reg)) +
  theme_bw() +
  theme(axis.text.x = element_text(angle = -45, hjust = 0)) +
  labs(y = "Slope (°)", x = "Period")

# ggsave(here::here("groups_slope.svg"), width = 10, height = 5)

TPI

Terrain / Topographic position index

tpi <- read_csv(here::here("analysis/data/derived_data", 
                           "tpi_at_points.csv")) %>% 
  filter(id %in% settlements2$id) %>% 
  mutate(variable = "tpi")
tpi %>% 
  left_join(settlements2, by = "id") %>% 
  mutate(reg = str_extract(id, "^."),
         reg = if_else(reg == "B", "East Bohemia", "Morava river catchment")) %>% 
  add_period_label() %>% 
  ggplot(aes(period_label, value)) +
  geom_violin() +
  geom_boxplot(alpha = 0.4, width = 0.2) +
  geom_jitter(alpha = 0.05, width = 0.4) +
  facet_wrap(vars(reg)) +
  theme_bw() +
  theme(axis.text.x = element_text(angle = -45, hjust = 0)) +
  labs(y = "TPI", x = "Period")
# ggsave(here::here("groups_tpi.svg"), width = 10, height = 5)

Density of water courses

hydro <- read_csv(here::here("analysis/data/derived_data", 
                             "hydro_at_points.csv")) %>% 
  filter(id %in% settlements2$id) %>% 
  mutate(variable = "hydro") %>% 
  rename(value = hydro_kde)
hydro %>% 
  left_join(settlements2, by = "id") %>% 
  mutate(reg = str_extract(id, "^."),
         reg = if_else(reg == "B", "East Bohemia", "Morava river catchment")) %>% 
  add_period_label() %>% 
  ggplot(aes(period_label, value)) +
  geom_violin() +
  geom_boxplot(alpha = 0.4, width = 0.2) +
  geom_jitter(alpha = 0.05, width = 0.4) +
  facet_wrap(vars(reg)) +
  theme_bw() +
  theme(axis.text.x = element_text(angle = -45, hjust = 0)) +
  labs(y = "Hydrology KDE", x = "Period")
# ggsave(here::here("groups_hydro.svg"), width = 10, height = 5)

Arrangement along natural lines

along_lines <- read_rds(here::here(derived_data, "lines/linear_arrangement.RDS"))

along_lines <- along_lines$groups %>% 
  mutate(chrono = str_remove(chrono, ".$")) %>% 
  group_by(id, chrono, period, variable) %>% 
  summarise(value = max(value)) %>% 
  ungroup(id, chrono, variable) %>% 
  mutate(value = value / n()) %>% 
  select(-chrono) %>% 
  group_by(variable, period, id) %>% 
  summarise(value = mean(value), .groups = "drop")

Distance to raw material sources

# calculated separately in a script `rm_distance.R`
# dist_pts <- dist_to_rm(set_spat, rm_pts, "rm")
# dist_lns <- dist_to_rm(set_spat, rm_lns, "rm")
rm_dist <- read_csv(here::here("analysis/data/input_data", "rm_dist.csv")) %>% 
  mutate(rm = str_to_lower(rm)) %>% 
  filter(id %in% settlements2$id)

# 0 = not used
# 0.5 = somewhat used
# 1 = used
rm_b <- readxl::read_xlsx(here::here("analysis/data/raw_data",
                                     "sidliste_vs_zdroje.xlsx"), sheet = "B")
rm_m <- readxl::read_xlsx(here::here("analysis/data/raw_data",
                                     "sidliste_vs_zdroje.xlsx"), sheet = "M")

rm_relevance <- bind_rows(b = rm_b, m = rm_m, .id = "reg") %>% 
  mutate(across(starts_with("p"), function(x) x * 0.5)) %>% 
  pivot_longer(starts_with("p"), 
               names_to = "period", values_to = "relevance") %>% 
  rename(rm = surovina)

rm_dist_rel <- set_base$period2 %>% 
  filter_rel_dist_rm()

# ---
rm_types <- rm_dist %>% 
  select(rm, type) %>% 
  distinct()
rm_types_tab <- rm_types %>% 
  pull(type)
names(rm_types_tab) <- rm_types %>% 
  pull(rm)
# ---

rm_dist_fin <- rm_dist_rel %>% 
  prep_dist_rel_rm()
rm_dist_fin %>% 
  filter(variable == "rm_dist_chipped") %>% 
  add_period_label() %>% 
  mutate(reg = str_extract(id, "^."),
         reg = if_else(reg == "B", "East Bohemia", "Morava river catchment")) %>% 
  ggplot(aes(period_label, value)) +
  geom_violin() +
  geom_boxplot(alpha = 0.4, width = 0.2) +
  geom_jitter(alpha = 0.05, width = 0.4) +
  facet_wrap(vars(reg)) +
  theme_bw() +
  theme(axis.text.x = element_text(angle = -45, hjust = 0)) +
  labs(y = "Distance to lithic raw. mat. (km)", x = "Period")

# ggsave(here::here("groups_distSI.svg"), width = 10, height = 5)

Continuity

cont <- set_base$period2 %>%
  select(-period_label, -chrono, - facet) %>%
  distinct(id, period) %>%
  mutate(pres = 1L) %>%
  pivot_wider(names_from = period, values_from = pres, values_fill = 0L) %>%
  pivot_longer(starts_with("p"), names_to = "period") %>%
  mutate(period = factor(period, levels = levels(set_base$period2$period))) %>%
  arrange(id, period) %>%
  group_by(id) %>%
  mutate(prev = if_else(value == 1, lag(value), 0L),
         prev = if_else(is.na(prev), 0L, prev)) %>%
  ungroup(id) %>%
  select(-value) %>%
  mutate(variable = "cont") %>%
  rename(value = prev)

n_set <- set_base$period2 %>% 
  add_region() %>% 
  group_by(period_label, reg) %>% 
  summarise(sum = n())

cont %>%
  filter(variable == "cont") %>%
  add_period_label() %>%
  add_region() %>%
  group_by(period_label, reg) %>%
  summarise(n = sum(value, na.rm = TRUE)) %>%
  full_join(n_set) %>%
  mutate(n = if_else(is.na(n), 0L, n),
         perc = (n / sum) * 100,
         reg = if_else(reg == "B", "East Bohemia", "Morava river catchment")) %>%
  ggplot(aes(period_label, perc)) +
  geom_col(color = "black", fill = "white", 
           position = "dodge", show.legend = FALSE) +
  facet_wrap(vars(reg), ncol = 1) +
  labs(x = "Period", y = "Continuity of settlements (%)") +
  theme_bw()

# ggsave(here::here("groups_continuity.svg"), width = 10, height = 5)

Synthesis

labs_variables <- c(
  "settlements_kde" = "Settlement density",
  # "dist_fenced" = "Min. dist. to fenced set. (km)",
  "line_terrain" = "Along terrain line",
  "line_water" = "Along water line",
  # "cont" = "Settlement continuity",
  "hydro" = "Watercourse density",
  # "dem" = "Altitude (DEM)", 
  "altitude" = "Altitude (map)",
  "slope" = "Slope (°)",
  "tpi" = "TPI",
  "rm_dist_chipped" = "Distance to chipped RM",
  "rm_dist_polished" = "Distance to polished RM"
  # "rm_pc1" = "Distance to RM sources 1",
  # "rm_pc2" = "Distance to RM sources 2",
  # "rm_pc3" = "Distance to RM sources 3"
) %>% 
  map_chr(str_wrap, 12)
res_long <- alt %>% 
  bind_rows(slope) %>% 
  bind_rows(tpi) %>% 
  bind_rows(hydro) %>% 
  left_join(set_base$period2) %>% 
  select(id, variable, value, period) %>% 
  bind_rows(along_lines) %>% 
  bind_rows(rm_dist_fin) %>% 
  bind_rows(select(kde, -reg)) %>% 
  filter(id %in% set_base$period2$id) %>% 
  group_by(variable, id, period) %>% 
  summarise(value = mean(value)) %>% 
  ungroup()

res_wide <- res_long %>% 
  pivot_wider(id_cols = c(id, period), 
              names_from = variable, 
              values_from = value, values_fill = 0)

Principal components analysis

pca_per <- res_wide %>%
  add_region() %>% 
  group_by(reg, period) %>% 
  nest() %>% 
  nested_pca()
pca_per %>% plot_nested_pca(var = "period")
# ggsave(here::here("analysis/groups/pca.pdf"), width = 21, height = 35)
pca_per %>% plot_nested_pca(var = "period", pc = c("PC3", "PC4"))
# ggsave(here::here("analysis/groups/pca2.pdf"), width = 21, height = 35)
loadings_sum <- pca_per %>% 
  summarise_variables()
# loadings_sum_m <- as.matrix(loadings_sum[, paste0("PC", 1:(ncol(loadings_sum) - 1))])
# rownames(loadings_sum_m) <- loadings_sum$column
# heatmap(loadings_sum_m)
loadings_sum %>% 
  mutate(r = (PC1 + PC2 + PC3 + PC4 + PC5)/(ncol(loadings_sum) - 1)) %>% 
  select(column, r) %>% 
  arrange(reg, desc(r)) %>% 
  knitr::kable()

Groups in the data

library(mclust)
mb_clust <- pca_per %>% mbc_cluster()

classif <- mb_clust %>%
  dplyr::select(reg, period, classif) %>% 
  tidyr::unnest(c(classif)) %>% 
  dplyr::ungroup(c(reg, period)) %>% 
  dplyr::mutate(g = str_c(reg, g))

set_spat %>% 
  select(id) %>% 
  inner_join(classif, by = "id") %>% 
  ggplot() +
  geom_sf(aes(color = g)) +
  facet_grid(vars(period), vars(g)) +
  theme_void()
# set_spat %>% 
#   select(id) %>% 
#   inner_join(classif, by = "id") %>% 
#   write_sf(here::here("analysis/groups/groups.geojson"), delete_layer = TRUE)
plot_g_distribution <- function(var) {
  res_long %>% 
    filter(variable == var) %>% 
    inner_join(classif, by = c("id", "period")) %>% 
    add_period_label() %>% 
    ggplot(aes(g, value)) +
    geom_boxplot() +
    facet_wrap(vars(period_label), scales = "free_x") +
    theme_bw() +
    labs(title = var)
}

plot_g_distribution("altitude")
plot_g_distribution("slope")
plot_g_distribution("tpi")

plot_g_distribution("hydro")

plot_g_distribution("line_terrain")
plot_g_distribution("line_water")

plot_g_distribution("rm_dist_chipped")
plot_g_distribution("rm_dist_polished")

plot_g_distribution("settlements_kde")

\newpage

References

::: {#refs} :::

\newpage

Colophon

This report was generated on r Sys.time() using the following computational environment and dependencies:

# which R packages and versions?
if ("devtools" %in% installed.packages()) devtools::session_info()


petrpajdla/settlements documentation built on June 27, 2022, 10:21 p.m.