In petrpajdla/settlements: Compendium accompanying article on settlement patters in the Neolithic period.

knitr::opts_chunk$set(
  collapse = FALSE,
  warning = FALSE,
  message = FALSE,
  echo = FALSE,
  cache = FALSE,
  comment = "#>",
  # fig.path = "../figures/",
  dpi = 300
)

library(sf)
library(tidyverse)
library(spatstat)
library(patchwork)
devtools::load_all(".")

Introduction

The analysis is divided into three scales:

Neolithic B/C
Pottery traditions
Pottery groups

This document describes results for Pottery groups.

The study area is broadly defined as Easter part of Bohemia (abbreviated B) and Morava river catchment (M). It spans across the area of the Czech Republic and bordering regions of Austria and Slovakia.

Data

Data is published in the Journal of Open Archaeology Data:

Pajdla, P. and Trampota, F., 2021. Neolithic Settlements in Central Europe: Data from the Project ‘Lifestyle as an Unintentional Identity in the Neolithic’. Journal of Open Archaeology Data, 9, p.13. DOI: http://doi.org/10.5334/joad.88

source(here::here("analysis/code/data_load.R"))

set_base$period2 %>%
  table_nr_set()

settlements2 %>%
  plot_nr_set()

Analysis

Settlement density

Settlement density is estimated using KDE. Kernel size of 4 km is used. This value is completely arbitrary but is selected because settlement clusters are nicely highlighted at this scale.

settlements2 %>% 
  plot_kde()

kde <- settlements2 %>% 
  est_kde() %>% 
  mutate(reg = str_extract(id, "^."))

kde %>% 
  add_period_label() %>% 
  mutate(reg = if_else(reg == "B", "East Bohemia", "Morava river catchment")) %>% 
  add_period_label() %>% 
  ggplot(aes(period_label, value)) +
  geom_violin() +
  geom_boxplot(alpha = 0.4, width = 0.2) +
  geom_jitter(alpha = 0.05, width = 0.4) +
  facet_wrap(vars(reg)) +
  theme_bw() +
  theme(axis.text.x = element_text(angle = -45, hjust = 0)) +
  labs(y = "KDE (Density of settlements per square km)", x = "Period")

# ggsave(here::here("groups_kde.svg"), width = 10, height = 5)

Terrain surrounding the settlements

Altitude

alt <- settlements2 %>% 
  select(id, altitude) %>% 
  st_drop_geometry() %>% 
  distinct(id, altitude) %>% 
  transmute(id, value = altitude,
            variable = "altitude")

settlements2 %>% 
  mutate(reg = str_extract(id, "^."),
         reg = if_else(reg == "B", "East Bohemia", "Morava river catchment")) %>% 
  add_period_label() %>% 
  ggplot(aes(period_label, altitude)) +
  geom_violin() +
  geom_boxplot(alpha = 0.4, width = 0.2) +
  geom_jitter(alpha = 0.05, width = 0.4) +
  facet_wrap(vars(reg)) +
  theme_bw() +
  theme(axis.text.x = element_text(angle = -45, hjust = 0)) +
  labs(y = "Altitude (m)", x = "Period")

# ggsave(here::here("groups_altitude.svg"), width = 10, height = 5)

Slope

slope <- read_csv(here::here("analysis/data/derived_data", 
                             "slope_at_points.csv")) %>% 
  filter(id %in% settlements2$id) %>% 
  mutate(variable = "slope")

slope %>% 
  left_join(settlements2, by = "id") %>% 
  mutate(reg = str_extract(id, "^."),
         reg = if_else(reg == "B", "East Bohemia", "Morava river catchment")) %>% 
  add_period_label() %>% 
  ggplot(aes(period_label, value)) +
  geom_violin() +
  geom_boxplot(alpha = 0.4, width = 0.2) +
  geom_jitter(alpha = 0.05, width = 0.4) +
  facet_wrap(vars(reg)) +
  theme_bw() +
  theme(axis.text.x = element_text(angle = -45, hjust = 0)) +
  labs(y = "Slope (°)", x = "Period")

# ggsave(here::here("groups_slope.svg"), width = 10, height = 5)

TPI

Terrain / Topographic position index

tpi <- read_csv(here::here("analysis/data/derived_data", 
                           "tpi_at_points.csv")) %>% 
  filter(id %in% settlements2$id) %>% 
  mutate(variable = "tpi")

tpi %>% 
  left_join(settlements2, by = "id") %>% 
  mutate(reg = str_extract(id, "^."),
         reg = if_else(reg == "B", "East Bohemia", "Morava river catchment")) %>% 
  add_period_label() %>% 
  ggplot(aes(period_label, value)) +
  geom_violin() +
  geom_boxplot(alpha = 0.4, width = 0.2) +
  geom_jitter(alpha = 0.05, width = 0.4) +
  facet_wrap(vars(reg)) +
  theme_bw() +
  theme(axis.text.x = element_text(angle = -45, hjust = 0)) +
  labs(y = "TPI", x = "Period")

# ggsave(here::here("groups_tpi.svg"), width = 10, height = 5)

Density of water courses

hydro <- read_csv(here::here("analysis/data/derived_data", 
                             "hydro_at_points.csv")) %>% 
  filter(id %in% settlements2$id) %>% 
  mutate(variable = "hydro") %>% 
  rename(value = hydro_kde)

hydro %>% 
  left_join(settlements2, by = "id") %>% 
  mutate(reg = str_extract(id, "^."),
         reg = if_else(reg == "B", "East Bohemia", "Morava river catchment")) %>% 
  add_period_label() %>% 
  ggplot(aes(period_label, value)) +
  geom_violin() +
  geom_boxplot(alpha = 0.4, width = 0.2) +
  geom_jitter(alpha = 0.05, width = 0.4) +
  facet_wrap(vars(reg)) +
  theme_bw() +
  theme(axis.text.x = element_text(angle = -45, hjust = 0)) +
  labs(y = "Hydrology KDE", x = "Period")

# ggsave(here::here("groups_hydro.svg"), width = 10, height = 5)

Arrangement along natural lines

along_lines <- read_rds(here::here(derived_data, "lines/linear_arrangement.RDS"))

along_lines <- along_lines$groups %>% 
  mutate(chrono = str_remove(chrono, ".$")) %>% 
  group_by(id, chrono, period, variable) %>% 
  summarise(value = max(value)) %>% 
  ungroup(id, chrono, variable) %>% 
  mutate(value = value / n()) %>% 
  select(-chrono) %>% 
  group_by(variable, period, id) %>% 
  summarise(value = mean(value), .groups = "drop")

Distance to raw material sources

# calculated separately in a script `rm_distance.R`
# dist_pts <- dist_to_rm(set_spat, rm_pts, "rm")
# dist_lns <- dist_to_rm(set_spat, rm_lns, "rm")
rm_dist <- read_csv(here::here("analysis/data/input_data", "rm_dist.csv")) %>% 
  mutate(rm = str_to_lower(rm)) %>% 
  filter(id %in% settlements2$id)

# 0 = not used
# 0.5 = somewhat used
# 1 = used
rm_b <- readxl::read_xlsx(here::here("analysis/data/raw_data",
                                     "sidliste_vs_zdroje.xlsx"), sheet = "B")
rm_m <- readxl::read_xlsx(here::here("analysis/data/raw_data",
                                     "sidliste_vs_zdroje.xlsx"), sheet = "M")

rm_relevance <- bind_rows(b = rm_b, m = rm_m, .id = "reg") %>% 
  mutate(across(starts_with("p"), function(x) x * 0.5)) %>% 
  pivot_longer(starts_with("p"), 
               names_to = "period", values_to = "relevance") %>% 
  rename(rm = surovina)

rm_dist_rel <- set_base$period2 %>% 
  filter_rel_dist_rm()

# ---
rm_types <- rm_dist %>% 
  select(rm, type) %>% 
  distinct()
rm_types_tab <- rm_types %>% 
  pull(type)
names(rm_types_tab) <- rm_types %>% 
  pull(rm)
# ---

rm_dist_fin <- rm_dist_rel %>% 
  prep_dist_rel_rm()

rm_dist_fin %>% 
  filter(variable == "rm_dist_chipped") %>% 
  add_period_label() %>% 
  mutate(reg = str_extract(id, "^."),
         reg = if_else(reg == "B", "East Bohemia", "Morava river catchment")) %>% 
  ggplot(aes(period_label, value)) +
  geom_violin() +
  geom_boxplot(alpha = 0.4, width = 0.2) +
  geom_jitter(alpha = 0.05, width = 0.4) +
  facet_wrap(vars(reg)) +
  theme_bw() +
  theme(axis.text.x = element_text(angle = -45, hjust = 0)) +
  labs(y = "Distance to lithic raw. mat. (km)", x = "Period")

# ggsave(here::here("groups_distSI.svg"), width = 10, height = 5)

Continuity

cont <- set_base$period2 %>%
  select(-period_label, -chrono, - facet) %>%
  distinct(id, period) %>%
  mutate(pres = 1L) %>%
  pivot_wider(names_from = period, values_from = pres, values_fill = 0L) %>%
  pivot_longer(starts_with("p"), names_to = "period") %>%
  mutate(period = factor(period, levels = levels(set_base$period2$period))) %>%
  arrange(id, period) %>%
  group_by(id) %>%
  mutate(prev = if_else(value == 1, lag(value), 0L),
         prev = if_else(is.na(prev), 0L, prev)) %>%
  ungroup(id) %>%
  select(-value) %>%
  mutate(variable = "cont") %>%
  rename(value = prev)

n_set <- set_base$period2 %>% 
  add_region() %>% 
  group_by(period_label, reg) %>% 
  summarise(sum = n())

cont %>%
  filter(variable == "cont") %>%
  add_period_label() %>%
  add_region() %>%
  group_by(period_label, reg) %>%
  summarise(n = sum(value, na.rm = TRUE)) %>%
  full_join(n_set) %>%
  mutate(n = if_else(is.na(n), 0L, n),
         perc = (n / sum) * 100,
         reg = if_else(reg == "B", "East Bohemia", "Morava river catchment")) %>%
  ggplot(aes(period_label, perc)) +
  geom_col(color = "black", fill = "white", 
           position = "dodge", show.legend = FALSE) +
  facet_wrap(vars(reg), ncol = 1) +
  labs(x = "Period", y = "Continuity of settlements (%)") +
  theme_bw()

# ggsave(here::here("groups_continuity.svg"), width = 10, height = 5)

Synthesis

labs_variables <- c(
  "settlements_kde" = "Settlement density",
  # "dist_fenced" = "Min. dist. to fenced set. (km)",
  "line_terrain" = "Along terrain line",
  "line_water" = "Along water line",
  # "cont" = "Settlement continuity",
  "hydro" = "Watercourse density",
  # "dem" = "Altitude (DEM)", 
  "altitude" = "Altitude (map)",
  "slope" = "Slope (°)",
  "tpi" = "TPI",
  "rm_dist_chipped" = "Distance to chipped RM",
  "rm_dist_polished" = "Distance to polished RM"
  # "rm_pc1" = "Distance to RM sources 1",
  # "rm_pc2" = "Distance to RM sources 2",
  # "rm_pc3" = "Distance to RM sources 3"
) %>% 
  map_chr(str_wrap, 12)

res_long <- alt %>% 
  bind_rows(slope) %>% 
  bind_rows(tpi) %>% 
  bind_rows(hydro) %>% 
  left_join(set_base$period2) %>% 
  select(id, variable, value, period) %>% 
  bind_rows(along_lines) %>% 
  bind_rows(rm_dist_fin) %>% 
  bind_rows(select(kde, -reg)) %>% 
  filter(id %in% set_base$period2$id) %>% 
  group_by(variable, id, period) %>% 
  summarise(value = mean(value)) %>% 
  ungroup()

res_wide <- res_long %>% 
  pivot_wider(id_cols = c(id, period), 
              names_from = variable, 
              values_from = value, values_fill = 0)

Principal components analysis

pca_per <- res_wide %>%
  add_region() %>% 
  group_by(reg, period) %>% 
  nest() %>% 
  nested_pca()

pca_per %>% plot_nested_pca(var = "period")
# ggsave(here::here("analysis/groups/pca.pdf"), width = 21, height = 35)

pca_per %>% plot_nested_pca(var = "period", pc = c("PC3", "PC4"))
# ggsave(here::here("analysis/groups/pca2.pdf"), width = 21, height = 35)

loadings_sum <- pca_per %>% 
  summarise_variables()

# loadings_sum_m <- as.matrix(loadings_sum[, paste0("PC", 1:(ncol(loadings_sum) - 1))])
# rownames(loadings_sum_m) <- loadings_sum$column
# heatmap(loadings_sum_m)

loadings_sum %>% 
  mutate(r = (PC1 + PC2 + PC3 + PC4 + PC5)/(ncol(loadings_sum) - 1)) %>% 
  select(column, r) %>% 
  arrange(reg, desc(r)) %>% 
  knitr::kable()

Groups in the data

library(mclust)

mb_clust <- pca_per %>% mbc_cluster()

classif <- mb_clust %>%
  dplyr::select(reg, period, classif) %>% 
  tidyr::unnest(c(classif)) %>% 
  dplyr::ungroup(c(reg, period)) %>% 
  dplyr::mutate(g = str_c(reg, g))

set_spat %>% 
  select(id) %>% 
  inner_join(classif, by = "id") %>% 
  ggplot() +
  geom_sf(aes(color = g)) +
  facet_grid(vars(period), vars(g)) +
  theme_void()

# set_spat %>% 
#   select(id) %>% 
#   inner_join(classif, by = "id") %>% 
#   write_sf(here::here("analysis/groups/groups.geojson"), delete_layer = TRUE)

plot_g_distribution <- function(var) {
  res_long %>% 
    filter(variable == var) %>% 
    inner_join(classif, by = c("id", "period")) %>% 
    add_period_label() %>% 
    ggplot(aes(g, value)) +
    geom_boxplot() +
    facet_wrap(vars(period_label), scales = "free_x") +
    theme_bw() +
    labs(title = var)
}

plot_g_distribution("altitude")
plot_g_distribution("slope")
plot_g_distribution("tpi")

plot_g_distribution("hydro")

plot_g_distribution("line_terrain")
plot_g_distribution("line_water")

plot_g_distribution("rm_dist_chipped")
plot_g_distribution("rm_dist_polished")

plot_g_distribution("settlements_kde")

\newpage

References

::: {#refs} :::

\newpage

Colophon

This report was generated on r Sys.time() using the following computational environment and dependencies:

# which R packages and versions?
if ("devtools" %in% installed.packages()) devtools::session_info()

petrpajdla/settlements documentation built on June 27, 2022, 10:21 p.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

petrpajdla/settlements
Compendium accompanying article on settlement patters in the Neolithic period.

In petrpajdla/settlements: Compendium accompanying article on settlement patters in the Neolithic period.

Introduction

Data

Analysis

Settlement density

Terrain surrounding the settlements

Altitude

Slope

TPI

Density of water courses

Arrangement along natural lines

Distance to raw material sources

Continuity

Synthesis

Principal components analysis

Groups in the data

References

Colophon

R Package Documentation

Browse R Packages

We want your feedback!

petrpajdla/settlements Compendium accompanying article on settlement patters in the Neolithic period.

In petrpajdla/settlements: Compendium accompanying article on settlement patters in the Neolithic period.

Introduction

Data

Analysis

Settlement density

Terrain surrounding the settlements

Altitude

Slope

TPI

Density of water courses

Arrangement along natural lines

Distance to raw material sources

Continuity

Synthesis

Principal components analysis

Groups in the data

References

Colophon

R Package Documentation

Browse R Packages

We want your feedback!

petrpajdla/settlements
Compendium accompanying article on settlement patters in the Neolithic period.