knitr::opts_chunk$set( collapse = FALSE, warning = FALSE, message = FALSE, echo = FALSE, cache = FALSE, comment = "#>", # fig.path = "../figures/", dpi = 300 )
library(sf) library(tidyverse) library(spatstat) library(patchwork) devtools::load_all(".")
The analysis is divided into three scales:
This document describes results for Pottery groups.
The study area is broadly defined as Easter part of Bohemia (abbreviated B) and Morava river catchment (M). It spans across the area of the Czech Republic and bordering regions of Austria and Slovakia.
Data is published in the Journal of Open Archaeology Data:
Pajdla, P. and Trampota, F., 2021. Neolithic Settlements in Central Europe: Data from the Project ‘Lifestyle as an Unintentional Identity in the Neolithic’. Journal of Open Archaeology Data, 9, p.13. DOI: http://doi.org/10.5334/joad.88
source(here::here("analysis/code/data_load.R"))
set_base$period2 %>% table_nr_set()
settlements2 %>% plot_nr_set()
Settlement density is estimated using KDE. Kernel size of 4 km is used. This value is completely arbitrary but is selected because settlement clusters are nicely highlighted at this scale.
settlements2 %>% plot_kde()
kde <- settlements2 %>% est_kde() %>% mutate(reg = str_extract(id, "^."))
kde %>% add_period_label() %>% mutate(reg = if_else(reg == "B", "East Bohemia", "Morava river catchment")) %>% add_period_label() %>% ggplot(aes(period_label, value)) + geom_violin() + geom_boxplot(alpha = 0.4, width = 0.2) + geom_jitter(alpha = 0.05, width = 0.4) + facet_wrap(vars(reg)) + theme_bw() + theme(axis.text.x = element_text(angle = -45, hjust = 0)) + labs(y = "KDE (Density of settlements per square km)", x = "Period") # ggsave(here::here("groups_kde.svg"), width = 10, height = 5)
alt <- settlements2 %>% select(id, altitude) %>% st_drop_geometry() %>% distinct(id, altitude) %>% transmute(id, value = altitude, variable = "altitude")
settlements2 %>% mutate(reg = str_extract(id, "^."), reg = if_else(reg == "B", "East Bohemia", "Morava river catchment")) %>% add_period_label() %>% ggplot(aes(period_label, altitude)) + geom_violin() + geom_boxplot(alpha = 0.4, width = 0.2) + geom_jitter(alpha = 0.05, width = 0.4) + facet_wrap(vars(reg)) + theme_bw() + theme(axis.text.x = element_text(angle = -45, hjust = 0)) + labs(y = "Altitude (m)", x = "Period") # ggsave(here::here("groups_altitude.svg"), width = 10, height = 5)
slope <- read_csv(here::here("analysis/data/derived_data", "slope_at_points.csv")) %>% filter(id %in% settlements2$id) %>% mutate(variable = "slope")
slope %>% left_join(settlements2, by = "id") %>% mutate(reg = str_extract(id, "^."), reg = if_else(reg == "B", "East Bohemia", "Morava river catchment")) %>% add_period_label() %>% ggplot(aes(period_label, value)) + geom_violin() + geom_boxplot(alpha = 0.4, width = 0.2) + geom_jitter(alpha = 0.05, width = 0.4) + facet_wrap(vars(reg)) + theme_bw() + theme(axis.text.x = element_text(angle = -45, hjust = 0)) + labs(y = "Slope (°)", x = "Period") # ggsave(here::here("groups_slope.svg"), width = 10, height = 5)
Terrain / Topographic position index
tpi <- read_csv(here::here("analysis/data/derived_data", "tpi_at_points.csv")) %>% filter(id %in% settlements2$id) %>% mutate(variable = "tpi")
tpi %>% left_join(settlements2, by = "id") %>% mutate(reg = str_extract(id, "^."), reg = if_else(reg == "B", "East Bohemia", "Morava river catchment")) %>% add_period_label() %>% ggplot(aes(period_label, value)) + geom_violin() + geom_boxplot(alpha = 0.4, width = 0.2) + geom_jitter(alpha = 0.05, width = 0.4) + facet_wrap(vars(reg)) + theme_bw() + theme(axis.text.x = element_text(angle = -45, hjust = 0)) + labs(y = "TPI", x = "Period")
# ggsave(here::here("groups_tpi.svg"), width = 10, height = 5)
hydro <- read_csv(here::here("analysis/data/derived_data", "hydro_at_points.csv")) %>% filter(id %in% settlements2$id) %>% mutate(variable = "hydro") %>% rename(value = hydro_kde)
hydro %>% left_join(settlements2, by = "id") %>% mutate(reg = str_extract(id, "^."), reg = if_else(reg == "B", "East Bohemia", "Morava river catchment")) %>% add_period_label() %>% ggplot(aes(period_label, value)) + geom_violin() + geom_boxplot(alpha = 0.4, width = 0.2) + geom_jitter(alpha = 0.05, width = 0.4) + facet_wrap(vars(reg)) + theme_bw() + theme(axis.text.x = element_text(angle = -45, hjust = 0)) + labs(y = "Hydrology KDE", x = "Period")
# ggsave(here::here("groups_hydro.svg"), width = 10, height = 5)
along_lines <- read_rds(here::here(derived_data, "lines/linear_arrangement.RDS")) along_lines <- along_lines$groups %>% mutate(chrono = str_remove(chrono, ".$")) %>% group_by(id, chrono, period, variable) %>% summarise(value = max(value)) %>% ungroup(id, chrono, variable) %>% mutate(value = value / n()) %>% select(-chrono) %>% group_by(variable, period, id) %>% summarise(value = mean(value), .groups = "drop")
# calculated separately in a script `rm_distance.R` # dist_pts <- dist_to_rm(set_spat, rm_pts, "rm") # dist_lns <- dist_to_rm(set_spat, rm_lns, "rm") rm_dist <- read_csv(here::here("analysis/data/input_data", "rm_dist.csv")) %>% mutate(rm = str_to_lower(rm)) %>% filter(id %in% settlements2$id) # 0 = not used # 0.5 = somewhat used # 1 = used rm_b <- readxl::read_xlsx(here::here("analysis/data/raw_data", "sidliste_vs_zdroje.xlsx"), sheet = "B") rm_m <- readxl::read_xlsx(here::here("analysis/data/raw_data", "sidliste_vs_zdroje.xlsx"), sheet = "M") rm_relevance <- bind_rows(b = rm_b, m = rm_m, .id = "reg") %>% mutate(across(starts_with("p"), function(x) x * 0.5)) %>% pivot_longer(starts_with("p"), names_to = "period", values_to = "relevance") %>% rename(rm = surovina) rm_dist_rel <- set_base$period2 %>% filter_rel_dist_rm() # --- rm_types <- rm_dist %>% select(rm, type) %>% distinct() rm_types_tab <- rm_types %>% pull(type) names(rm_types_tab) <- rm_types %>% pull(rm) # --- rm_dist_fin <- rm_dist_rel %>% prep_dist_rel_rm()
rm_dist_fin %>% filter(variable == "rm_dist_chipped") %>% add_period_label() %>% mutate(reg = str_extract(id, "^."), reg = if_else(reg == "B", "East Bohemia", "Morava river catchment")) %>% ggplot(aes(period_label, value)) + geom_violin() + geom_boxplot(alpha = 0.4, width = 0.2) + geom_jitter(alpha = 0.05, width = 0.4) + facet_wrap(vars(reg)) + theme_bw() + theme(axis.text.x = element_text(angle = -45, hjust = 0)) + labs(y = "Distance to lithic raw. mat. (km)", x = "Period") # ggsave(here::here("groups_distSI.svg"), width = 10, height = 5)
cont <- set_base$period2 %>% select(-period_label, -chrono, - facet) %>% distinct(id, period) %>% mutate(pres = 1L) %>% pivot_wider(names_from = period, values_from = pres, values_fill = 0L) %>% pivot_longer(starts_with("p"), names_to = "period") %>% mutate(period = factor(period, levels = levels(set_base$period2$period))) %>% arrange(id, period) %>% group_by(id) %>% mutate(prev = if_else(value == 1, lag(value), 0L), prev = if_else(is.na(prev), 0L, prev)) %>% ungroup(id) %>% select(-value) %>% mutate(variable = "cont") %>% rename(value = prev) n_set <- set_base$period2 %>% add_region() %>% group_by(period_label, reg) %>% summarise(sum = n()) cont %>% filter(variable == "cont") %>% add_period_label() %>% add_region() %>% group_by(period_label, reg) %>% summarise(n = sum(value, na.rm = TRUE)) %>% full_join(n_set) %>% mutate(n = if_else(is.na(n), 0L, n), perc = (n / sum) * 100, reg = if_else(reg == "B", "East Bohemia", "Morava river catchment")) %>% ggplot(aes(period_label, perc)) + geom_col(color = "black", fill = "white", position = "dodge", show.legend = FALSE) + facet_wrap(vars(reg), ncol = 1) + labs(x = "Period", y = "Continuity of settlements (%)") + theme_bw() # ggsave(here::here("groups_continuity.svg"), width = 10, height = 5)
labs_variables <- c( "settlements_kde" = "Settlement density", # "dist_fenced" = "Min. dist. to fenced set. (km)", "line_terrain" = "Along terrain line", "line_water" = "Along water line", # "cont" = "Settlement continuity", "hydro" = "Watercourse density", # "dem" = "Altitude (DEM)", "altitude" = "Altitude (map)", "slope" = "Slope (°)", "tpi" = "TPI", "rm_dist_chipped" = "Distance to chipped RM", "rm_dist_polished" = "Distance to polished RM" # "rm_pc1" = "Distance to RM sources 1", # "rm_pc2" = "Distance to RM sources 2", # "rm_pc3" = "Distance to RM sources 3" ) %>% map_chr(str_wrap, 12)
res_long <- alt %>% bind_rows(slope) %>% bind_rows(tpi) %>% bind_rows(hydro) %>% left_join(set_base$period2) %>% select(id, variable, value, period) %>% bind_rows(along_lines) %>% bind_rows(rm_dist_fin) %>% bind_rows(select(kde, -reg)) %>% filter(id %in% set_base$period2$id) %>% group_by(variable, id, period) %>% summarise(value = mean(value)) %>% ungroup() res_wide <- res_long %>% pivot_wider(id_cols = c(id, period), names_from = variable, values_from = value, values_fill = 0)
pca_per <- res_wide %>% add_region() %>% group_by(reg, period) %>% nest() %>% nested_pca()
pca_per %>% plot_nested_pca(var = "period") # ggsave(here::here("analysis/groups/pca.pdf"), width = 21, height = 35)
pca_per %>% plot_nested_pca(var = "period", pc = c("PC3", "PC4")) # ggsave(here::here("analysis/groups/pca2.pdf"), width = 21, height = 35)
loadings_sum <- pca_per %>% summarise_variables()
# loadings_sum_m <- as.matrix(loadings_sum[, paste0("PC", 1:(ncol(loadings_sum) - 1))]) # rownames(loadings_sum_m) <- loadings_sum$column # heatmap(loadings_sum_m)
loadings_sum %>% mutate(r = (PC1 + PC2 + PC3 + PC4 + PC5)/(ncol(loadings_sum) - 1)) %>% select(column, r) %>% arrange(reg, desc(r)) %>% knitr::kable()
library(mclust)
mb_clust <- pca_per %>% mbc_cluster() classif <- mb_clust %>% dplyr::select(reg, period, classif) %>% tidyr::unnest(c(classif)) %>% dplyr::ungroup(c(reg, period)) %>% dplyr::mutate(g = str_c(reg, g)) set_spat %>% select(id) %>% inner_join(classif, by = "id") %>% ggplot() + geom_sf(aes(color = g)) + facet_grid(vars(period), vars(g)) + theme_void()
# set_spat %>% # select(id) %>% # inner_join(classif, by = "id") %>% # write_sf(here::here("analysis/groups/groups.geojson"), delete_layer = TRUE)
plot_g_distribution <- function(var) { res_long %>% filter(variable == var) %>% inner_join(classif, by = c("id", "period")) %>% add_period_label() %>% ggplot(aes(g, value)) + geom_boxplot() + facet_wrap(vars(period_label), scales = "free_x") + theme_bw() + labs(title = var) } plot_g_distribution("altitude") plot_g_distribution("slope") plot_g_distribution("tpi") plot_g_distribution("hydro") plot_g_distribution("line_terrain") plot_g_distribution("line_water") plot_g_distribution("rm_dist_chipped") plot_g_distribution("rm_dist_polished") plot_g_distribution("settlements_kde")
\newpage
::: {#refs} :::
\newpage
This report was generated on r Sys.time()
using the following computational
environment and dependencies:
# which R packages and versions? if ("devtools" %in% installed.packages()) devtools::session_info()
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.