knitr::opts_chunk$set(
  collapse = FALSE,
  warning = FALSE,
  message = FALSE,
  echo = FALSE,
  cache = TRUE,
  comment = "#>",
  # fig.path = "../figures/",
  dpi = 300
)
library(sf)
library(tidyverse)
library(spatstat)
library(patchwork)
devtools::load_all(".")

Introduction

The analysis is divided into two scales:

The study area is broadly defined as Easter part of Bohemia (abbreviated B) and Morava river catchment (M). It spans across the area of the Czech Republic and bordering regions of Austria and Slovakia.

Background

derived_data <- "analysis/data/derived_data"
input_data <- "analysis/data/input_data"
path_maps <- here::here(derived_data, "maps/")
path_data <- here::here(derived_data)
path_temp <- here::here(derived_data, "temp/")

# mask
mask <- st_read(paste0(path_maps, "mask.geojson"), quiet = TRUE)
bbox <- st_read(paste0(path_maps, "bbox.geojson"), quiet = TRUE)

# rivers + admin for plots
admin0 <- st_read(paste0(path_maps, "admin0.geojson"), quiet = TRUE)
admin1 <- st_read(paste0(path_maps, "admin1.geojson"), quiet = TRUE)
rivers0 <- st_read(paste0(path_maps, "rivers0.geojson"), quiet = TRUE)
rivers1 <- st_read(paste0(path_maps, "rivers1.geojson"), quiet = TRUE)

# labels
labs_chrono <- read_rds(here::here(input_data, "chrono_labels.RDS"))

# database
set_base <- read_rds(here::here(input_data, "settlements.RDS"))

set_spat <- st_read(here::here(input_data, "settlements_sf.geojson"), 
                    quiet = TRUE)

settlements1 <- set_spat %>%
  right_join(set_base$period1, by = "id") %>% 
  mutate(label = labs_chrono$chrono1[chrono])
settlements2 <- set_spat %>%
  right_join(set_base$period2, by = "id") %>% 
  mutate(label = labs_chrono$chrono2[chrono])

# colors
mycol <- c("B" = "#7FC97F", "M" = "#BEAED4", "b" = "#7FC97F", "m" = "#BEAED4"
)

Pottery traditions

set_base$period1 %>%
  table_nr_set()
settlements1 %>%
  plot_nr_set()

Pottery groups

set_base$period2 %>%
  table_nr_set()
settlements2 %>%
  plot_nr_set()

Settlement density

Settlement density is estimated using KDE. Kernel size of 4 km is used. This value is completely arbitrary but is selected because settlement clusters are nicely highlighted at this scale.

Pottery traditions

res1_long <- settlements1 %>% 
  est_kde()
settlements1 %>% 
  plot_kde()

Pottery groups

res2_long <- settlements2 %>% 
  est_kde()
settlements2 %>% 
  plot_kde()

Distance to raw material sources

Raw materials distribution

Raw material sources are defined as either points or lines. Each raw material can be identified by single or multiple simple features. If the location of source used in the Neolithic period is unknown or several sources or even modes of procurement (e. g. from river bed etc.) are possible, the most probable are included.

Minimum, mean, median and maximum distances are calculated, median is used as a result.

rm_pts <- st_read(here::here(derived_data, "rm_points.geojson"), quiet = TRUE) %>% 
  add_rm_type()

rm_lns <- st_read(here::here(derived_data, "rm_lines.geojson"), quiet = TRUE) %>% 
  add_rm_type()
ggplot() + 
  geom_sf(data = mask, fill = "gray80", color = NA, alpha = 0.4) +
  geom_sf(data = rivers1, color = "gray60", size = 0.4, alpha = 0.4) +
  geom_sf(data = rivers0, color = "gray60", size = 0.6, alpha = 0.4) +
  geom_sf(data = set_spat, alpha = 0.1, color = "gray80") +
  geom_sf(data = rm_pts, aes(color = rm), shape = 4) +
  geom_sf(data = rm_lns, aes(color = rm)) +
  scale_x_continuous(expand = c(0, 0)) +
  scale_y_continuous(expand = c(0, 0)) +
  facet_wrap(vars(label)) +
  theme_void() +
  theme(panel.border = element_rect(color = "black", fill = NA)) +
  labs(color = "Raw material")

Distances

# calculated separately in a script `rm_distance.R`
# dist_pts <- dist_to_rm(set_spat, rm_pts, "rm")
# dist_lns <- dist_to_rm(set_spat, rm_lns, "rm")

rm_dist <- read_csv(paste0(path_temp, "rm_dist.csv")) %>% 
  mutate(rm = str_to_lower(rm))
# rm_dist %>%
#   ggplot(aes(x = min, y = rm, fill = region)) +
#   geom_violin() +
#   scale_fill_brewer(palette = "Accent") +
#   facet_wrap(~type, scales = "free_y") +
#   labs(x = "min. dist. (km)", y = "raw material", fill = "Region") +
#   theme_minimal() +
#   theme(legend.position = "bottom",
#         panel.border = element_rect(color = "black", fill = NA))

Settlement density against distance to raw materials

Hypothesis: In Morava river catchment settlement position is directly associated with raw material source.

We compare settlement density (KDE) with distance to chosen raw materials. There is no strong evidence that the distribution of settlements is associated with distance to any of the key raw material sources occurring in the region in any of the studied periods. There are some mild negative correlations in the period 4.8 -- 4.2 k BCE for polished stone tools materials. Strongest connection is observed between given pottery groups in period 4.8 -- 4.6 k BCE in case of ad/pmd/zelesice raw material sources.

res1_long %>% plot_dist_lm()
res1_long %>% tab_dist_lm()
res2_long %>% plot_dist_lm()
res2_long %>% tab_dist_lm()

RM relevant to given period and/or region

# 0 = not used
# 0.5 = somewhat used
# 1 = used
rm_b <- readxl::read_xlsx(here::here("analysis/data/raw_data",
                                     "sidliste_vs_zdroje.xlsx"), sheet = "B")
rm_m <- readxl::read_xlsx(here::here("analysis/data/raw_data",
                                     "sidliste_vs_zdroje.xlsx"), sheet = "M")

rm_relevance <- bind_rows(b = rm_b, m = rm_m, .id = "reg") %>% 
  mutate(across(starts_with("p"), function(x) x * 0.5)) %>% 
  pivot_longer(starts_with("p"), 
               names_to = "period", values_to = "relevance") %>% 
  rename(rm = surovina)

rm_dist_rel_p1 <- set_base$period2 %>% 
  filter_rel_dist_rm()

rm_dist_rel_p2 <- set_base$period2 %>% 
  filter_rel_dist_rm()

RM relations in general pottery traditions

Morava river catchment

rm_dist_rel_p1 %>% 
  plot_rm_reg_dist("m")

Eastern part of Bohemia

rm_dist_rel_p1 %>% 
  plot_rm_reg_dist("b")

RM relations in detailed pottery groups

Morava river catchment

rm_dist_rel_p2 %>% 
  plot_rm_reg_dist("m")

Eastern part of Bohemia

rm_dist_rel_p2 %>% 
  plot_rm_reg_dist("b")
rm_types <- rm_dist %>% 
  select(rm, type) %>% 
  distinct()

rm_types_tab <- rm_types %>% 
  pull(type)

names(rm_types_tab) <- rm_types %>% 
  pull(rm)

res1_long <- rm_dist_rel_p1 %>% 
  prep_dist_rel_rm() %>% 
  bind_rows(res1_long)

res2_long <- rm_dist_rel_p2 %>% 
  prep_dist_rel_rm() %>% 
  bind_rows(res2_long)
rm_dist_rel_p1 %>% 
  count_dist_rel_rm() %>% 
  plot_dist_rel_rm()
rm_dist_rel_p2 %>% 
  count_dist_rel_rm() %>% 
  plot_dist_rel_rm()
rm(list = ls(pattern = "^rm_"))

Density of watercourses

In settlement structure studies it is often assumed that past human settlements have some kind of a relation to the network of watercourses. Numerous problems are faced while assessing the relationship between settlements and close watercourse. Most importantly the man-made recent changes in the structure and intensity of watercourses (and landscape in general) are a major factor impairing such analysis.

Usually, the settlement - watercourse relationship is expressed in some form of a metric giving shortest distance or so. We choose an approach used to study road networks in modern urban areas [e. g. @lin2020].

Most of the input data is derived from DIBAVOD data set A01 layer (https://www.dibavod.cz/).

Density (kernel smoothed intensity) is estimated using spatstat::density.psp function with sigma 2000 m.

# pkg raster
# hydro_dens <- raster::raster(
#   here::here(derived_data, "hydro", "density_raster.tif"))
# 
# hydro_dens_at_points <- hydro_dens %>% 
#   raster::extract(set_spat) %>% 
#   as_tibble() %>% 
#   rename(hydro_kde = value) %>% 
#   bind_cols(select(st_drop_geometry(set_spat), id))

# pkg stars
# hydro_dens <- stars::read_stars(
#   here::here(derived_data, "hydro", "density_raster.tif")
# ) %>% 
#   st_set_crs(value = st_crs(set_spat))
# 
# hydro_dens_at_points <- hydro_dens %>% 
#   stars::st_extract(set_spat) %>% 
#   st_drop_geometry() %>% 
#   rename(hydro_kde = density_raster.tif) %>% 
#   bind_cols(select(st_drop_geometry(set_spat), id))

# pkg terra, implementing buffer around settlements
# settlements represented as points

hydro_dens <- terra::rast(
  here::here(derived_data, "hydro", "density_raster.tif")
)

hydro_dens_at_points <- terra::extract(
  hydro_dens, 
  terra::vect(st_buffer(set_spat, dist = 2e3))
) %>% 
  group_by(ID) %>% 
  summarise(hydro_kde = mean(density_raster)) %>% 
  bind_cols(id = set_spat$id) %>% 
  select(-ID)

# bind to results
res1_long <- set_base$period1 %>% 
  prep_hydro_dens_res() %>% 
  bind_rows(res1_long)

res2_long <- set_base$period2 %>% 
  select(-facet) %>% 
  prep_hydro_dens_res() %>% 
  bind_rows(res2_long)
# hydro_dens %>% 
#   raster::mask(mask) %>% 
#   raster::rasterToPoints() %>%
#   as_tibble() %>%
#   rename(KDE = density_raster) %>% 
#   ggplot() +
#   geom_raster(aes(x, y, fill = KDE)) +
#   geom_sf(data = set_spat, alpha = 0.1, shape = 1, color = "white") +
#   theme_void()

# stars
# ggplot() +
#   stars::geom_stars(data = st_crop(hydro_dens, mask), sf = TRUE) +
#   geom_sf(data = set_spat, alpha = 0.1, shape = 1, color = "white") +
#   theme_void() +
#   labs(fill = "KDE")
rm(list = c("hydro_dens", "hydro_dens_at_points"))

Terrain surrounding the settlements

dem_terra <- terra::rast(here::here(derived_data, "archive/dem/dem_aster_sjtsk_stars.tif"))

Slope

dem_slope <- terra::terrain(dem_terra, v = "slope")

# slope in the vicinity of settlements - 2km
slope <- terra::extract(dem_slope, 
                        terra::vect(st_buffer(set_spat, dist = 2e3)))

# derive median aspect
mean_slope <- slope %>% 
  tibble() %>% 
  group_by(ID) %>% 
  summarise(mean_slope = mean(slope)) %>% 
  bind_cols(id = set_spat$id) %>% 
  mutate(reg = str_extract(id, "^."))

# shapiro.test(filter(mean_slope, reg == "B")$mean_slope)
# hist(filter(mean_slope, reg == "B")$mean_slope)
# qqnorm(filter(mean_slope, reg == "B")$mean_slope)
# 
# shapiro.test(filter(mean_slope, reg == "M")$mean_slope)
# hist(filter(mean_slope, reg == "M")$mean_slope)
# qqnorm(filter(mean_slope, reg == "M")$mean_slope)

# mean_slope %>% 
#   inner_join(settlements2, by = "id") %>% 
#   ggplot(aes(mean_slope)) + 
#   geom_histogram(color = "black", fill = "white") +
#   facet_grid(vars(period_label), vars(reg), scales = "free_y") +
#   theme_bw() +
#   labs(x = "slope (°)")

# mean_slope %>% 
#   inner_join(settlements2, by = "id") %>% 
#   ggplot(aes(mean_slope, period_label)) + 
#   geom_boxplot(color = "black", fill = "white") +
#   facet_wrap(vars(reg), scales = "free_y") +
#   theme_bw() +
#   labs(x = "slope (°)")
res1_long <- mean_slope %>% 
  select(-ID, -reg) %>% 
  inner_join(st_drop_geometry(select(settlements1, id, period))) %>% 
  transmute(id, 
            period,
            value = mean_slope, 
            variable = "slope") %>% 
  bind_rows(res1_long)

res2_long <- mean_slope %>% 
  select(-ID, -reg) %>% 
  inner_join(st_drop_geometry(select(settlements2, id, period))) %>% 
  transmute(id, 
            period,
            value = mean_slope, 
            variable = "slope") %>% 
  bind_rows(res2_long)
rm(list = c("dem_slope", "slope", "mean_slope"))
gc()
# dem_data <- read_csv(here::here(derived_data, "dem/vals_dem.csv"))
# 
# # bind to results
# res1_long <- set_base$period1 %>% 
# prep_dem_res() %>% 
# bind_rows(res1_long)
# 
# res2_long <- set_base$period2 %>% 
# select(-facet) %>% 
# prep_dem_res() %>% 
# bind_rows(res2_long)
# altitude <- set_spat %>% st_drop_geometry() %>% 
# as_tibble() %>% 
# select(id, altitude)
# 
# # bind to results
# res1_long <- set_base$period1 %>% 
# prep_alt_res() %>% 
# bind_rows(res1_long)
# 
# res2_long <- set_base$period2 %>% 
# select(-facet) %>% 
# prep_alt_res() %>% 
# bind_rows(res2_long)
# alt_plot <- set_spat %>% st_drop_geometry() %>% 
# as_tibble() %>% 
# select(id, altitude) %>% 
# left_join(dem_data)
# 
# alt_cor <- alt_plot %>% 
# summarise(cor = cor(altitude, dem)) %>% 
# pull(cor) %>% 
# round(2)
# 
# alt_plot %>% 
# ggplot(aes(altitude, dem)) +
# geom_point() +
# theme_minimal() +
# annotate(geom = "text", x = 175, y = 525, 
# label = paste("Cor. ", alt_cor))

Exploring aspect

Aspect is calculated from the DEM, 0 means the slope is facing North, 90 East, 180 South and 270 West. The settlements are buffered by 200 meters and the median aspect is taken. The vicinity of settlements is thus of interest, not the given point.

In both Morava river drainage basin and Eastern part of Bohemia, median aspect is normally distributed.

# shapiro.test(filter(med_asp, reg == "B")$med_asp)
# shapiro.test(filter(med_asp, reg == "M")$med_asp)
# # polar plot
# med_asp %>% 
#   ggplot(aes(med_asp)) + 
#   geom_histogram(color = "black", fill = "white") +
#   scale_x_continuous(limits = c(0, 359), 
#                      breaks = c(0, 90, 180, 270)) +
#   coord_polar(start = 0) + 
#   facet_wrap(vars(reg)) +
#   theme_bw() +
#   labs(x = "median aspect (°)")
# med_asp %>% 
#   inner_join(settlements2, by = "id") %>% 
#   ggplot(aes(med_asp)) + 
#   geom_histogram(color = "black", fill = "white") +
#   scale_x_continuous(limits = c(0, 360), 
#                      breaks = c(0, 90, 180, 270, 360)) +
#   facet_grid(vars(period_label), vars(reg), scales = "free_y") +
#   theme_bw() +
#   labs(x = "median aspect (°)")
rm(list = c("dem_asp", "asp", "med_asp"))

TPI

Terrain / Topographic roughness index

# dem_tpi <- terra::terrain(dem_terra, v = "TPI")
# 
# # tpi in the vicinity of settlements - 2km
# tpi <- terra::extract(dem_tpi, 
#                       terra::vect(st_buffer(set_spat, dist = 2e3)))
# 
# # derive median aspect
# mean_slope <- slope %>% 
#   tibble() %>% 
#   group_by(ID) %>% 
#   summarise(mean_slope = mean(slope)) %>% 
#   bind_cols(id = set_spat$id) %>% 
#   mutate(reg = str_extract(id, "^."))

# shapiro.test(filter(mean_slope, reg == "B")$mean_slope)
# hist(filter(mean_slope, reg == "B")$mean_slope)
# qqnorm(filter(mean_slope, reg == "B")$mean_slope)
# 
# shapiro.test(filter(mean_slope, reg == "M")$mean_slope)
# hist(filter(mean_slope, reg == "M")$mean_slope)
# qqnorm(filter(mean_slope, reg == "M")$mean_slope)

# mean_slope %>% 
#   inner_join(settlements2, by = "id") %>% 
#   ggplot(aes(mean_slope)) + 
#   geom_histogram(color = "black", fill = "white") +
#   facet_grid(vars(period_label), vars(reg), scales = "free_y") +
#   theme_bw() +
#   labs(x = "slope (°)")

# mean_slope %>% 
#   inner_join(settlements2, by = "id") %>% 
#   ggplot(aes(mean_slope, period_label)) + 
#   geom_boxplot(color = "black", fill = "white") +
#   facet_wrap(vars(reg), scales = "free_y") +
#   theme_bw() +
#   labs(x = "slope (°)")

Settlement 'hierarchy'

Minimal distance to fortified settlements in given regions is explored.

dist_fenced <- read_rds(here::here(derived_data, "fortif/distance_fortif.RDS"))

# results 1
res1_long <- dist_fenced$traditions %>% 
  prep_dist_fenced() %>% 
  bind_rows(res1_long)

# results 2
dist_fenced_p2 <- dist_fenced$groups %>% 
  prep_dist_fenced() %>% 
  bind_rows(res2_long)

Settlement 'continuity'

Is the same location settled in the previous period?

res1_long <- set_base$period1 %>%
settlement_continuity() %>% 
bind_rows(res1_long)

res2_long <- set_base$period2 %>%
settlement_continuity() %>% 
bind_rows(res2_long)
n_set_per1 <- set_base$period1 %>% 
add_region() %>% 
group_by(period_label, reg) %>% 
summarise(sum = n())

res1_long %>% 
plot_continuity(y = n_set_per1)
n_set_per2 <- set_base$period2 %>% 
add_region() %>% 
group_by(period_label, reg) %>% 
summarise(sum = n())

res2_long %>% 
plot_continuity(y = n_set_per2)

Arrangement along natural lines

Arrangement of settlements along natural 'terrain' lines or along waterways is explored.

along_lines <- read_rds(here::here(derived_data, "lines/linear_arrangement.RDS"))

along_lines1 <- along_lines$traditions %>% 
  mutate(chrono = str_remove(chrono, ".$")) %>% 
  group_by(id, chrono, period, variable) %>% 
  summarise(value = max(value)) %>% 
  ungroup(id, chrono, variable) %>% 
  mutate(value = value / n()) %>% 
  select(-chrono) %>% 
  group_by(variable, period, id) %>% 
  summarise(value = mean(value), .groups = "drop")

along_lines2 <- along_lines$groups %>% 
  distinct(id, chrono, period, variable, value) %>% 
  group_by(period) %>% 
  mutate(value = value / n()) %>% 
  select(-chrono) %>% 
  group_by(variable, period, id) %>% 
  summarise(value = mean(value), .groups = "drop")

# results 1
res1_long <- res1_long %>% 
  bind_rows(along_lines1)

# results 2
res2_long <- res2_long %>% 
  bind_rows(along_lines2)
rm(list = ls(pattern = "^rm"))
rm(list = ls(pattern = "^along"))
rm(list = ls(pattern = "line$"))
rm(list = ls(pattern = "violin$"))
rm(list = ls(pattern = "^n_set"))
rm(list = ls(pattern = "^hydro"))
rm(list = ls(pattern = "fenced$"))
rm(list = ls(pattern = "dem_data"))
rm(list = ls(pattern = "^alt"))

Synthesis

res1_wide <- res1_long %>% prep_wide_res()
# res1_long <- res1_wide %>% 
#   pivot_longer(-c(id, period), names_to = "variable", values_to = "value")

res2_wide <- res2_long %>% prep_wide_res()
# res2_long <- res2_wide %>% 
#   pivot_longer(-c(id, period), names_to = "variable", values_to = "value")

# # scale cols
# res1_wide <- res1_wide %>% 
#   mutate(across(where(is.numeric), scale))
# 
# res2_wide <- res2_wide %>% 
#   mutate(across(where(is.numeric), scale))

Violin plots

labs_variables <- c(
  "settlements_kde" = "Settlement density",
  "dist_fenced" = "Min. dist. to fenced set. (km)",
  "line_terrain" = "Along terrain line",
  "line_water" = "Along water line",
  # "cont" = "Settlement continuity",
  "hydro_kde" = "Watercourse density",
  # "dem" = "Altitude (DEM)", 
  "altitude" = "Altitude (map)",
  "slope" = "Slope (°)",
  "rm_dist_chipped" = "Distance to chipped RM",
  "rm_dist_polished" = "Distance to polished RM"
  # "rm_pc1" = "Distance to RM sources 1",
  # "rm_pc2" = "Distance to RM sources 2",
  # "rm_pc3" = "Distance to RM sources 3"
) %>% 
  map_chr(str_wrap, 12)
res1_violin <- res1_long %>% plot_violin()
res1_violin
res2_violin <- res2_long %>% plot_violin()
res2_violin
# ggsave(here::here("analysis/figures", "res1_violin.png"), res1_violin, 
#        width = 9, height = 12)
# ggsave(here::here("analysis/figures", "res2_violin.png"), res2_violin, 
#        width = 9, height = 12)

Line plots

res1_line <- res1_long %>% plot_lines()
res1_line
res2_line <- res2_long %>% plot_lines()
res2_line
# ggsave(here::here("analysis/figures", "res1_line.png"), res1_line, 
#        width = 7, height = 7)
# ggsave(here::here("analysis/figures", "res2_line.png"), res2_line, 
#        width = 7, height = 7)
# # fit PCA
# pca1_all_fit <- res1_wide %>%
#   # select(-cont) %>%
#   select(where(is.numeric)) %>%
#   scale(center = TRUE, scale = TRUE) %>%
#   prcomp()
# 
# # augmented fitted values
# pca1_all_aug <- pca1_all_fit %>%
#   broom::augment(res1_wide) %>%
#   select(-".rownames") %>%
#   rename_with(\(x) str_remove(x, ".fitted")) %>%
#   settlements::add_region() %>%
#   settlements::add_period_label() %>%
#   settlements::add_neo()
# 
# # rotation matrix
# pca1_all_rot <- pca1_all_fit %>%
#   broom::tidy(matrix = "rotation") %>%
#   mutate(PC = str_c("PC", PC)) %>%
#   pivot_wider(names_from = PC)
# 
# # var explained
# pca1_all_var_exp <- pca1_all_fit %>%
#   broom::augment(res1_wide) %>%
#   summarise(across(contains("PC"), var)) %>%
#   pivot_longer(cols = everything(), names_to = "pc", values_to = "variance") %>%
#   mutate(var_exp = variance/sum(variance),
#          cum_var_exp = cumsum(var_exp),
#          pc = str_replace(pc, ".fitted", "")) %>%
#   select(pc, var_exp, cum_var_exp)
# 
# # PC cum var explained over 0.9
# pca1_all_var_exp_over <- pca1_all_var_exp %>%
#   mutate(n = !(cum_var_exp >= 0.9)) %>%
#   pull(n) %>%
#   sum() + 1
# knitr::kable(pca1_all_var_exp, col.names = c("Comp.",
#                                              "Prop. of variance",
#                                              "Cumulative prop."))
# # PCA loadings
# pca1_all_loadings <- pca1_all_fit %>%
#   broom::tidy(matrix = "rotation") %>%
#   mutate(component = str_c("PC", PC),
#          component = fct_reorder(component, PC)) %>%
#   filter(PC <= pca1_all_var_exp_over) %>%
#   group_by(PC) %>%
#   top_n(8, abs(value)) %>%
#   ungroup() %>%
#   mutate(cor = if_else(value > 0, "positive", "negative"),
#          column = tidytext::reorder_within(column, abs(value), component))
# 
# pca1_all_loadings %>%
#   ggplot(aes(abs(value), column, fill = cor)) +
#   geom_col(color = "black") +
#   scale_fill_manual(values = c("white", "gray80")) +
#   facet_wrap(vars(component), scales = "free_y") +
#   tidytext::scale_y_reordered() +
#   theme_bw() +
#   labs(x = "Abs. contribution value", y = element_blank(), fill = "Value")
# plot_pca1_full <- function(x = "PC1", y = "PC2") {
# 
#   pcs <- c(x, y)
# 
#   pca1_all_plot <- pca1_all_aug %>%
#     ggplot(aes(!!sym(pcs[1]), !!sym(pcs[2]))) +
#     geom_point(data = select(pca1_all_aug, -period_label),
#                alpha = 0.05, size = 0.4) +
#     geom_point(aes(color = reg),
#                alpha = 0.8, size = 0.5, show.legend = FALSE) +
#     scale_color_manual(values = mycol[c(1, 2)]) +
#     facet_grid(vars(period_label), vars(reg)) +
#     theme_minimal()
# 
#   pca1_all_rot <-  pca1_all_rot %>%
#     ggplot(aes(!!sym(pcs[1]), !!sym(pcs[2]))) +
#     geom_segment(xend = 0, yend = 0, alpha = 0.4) +
#     geom_text(
#       aes(label = column),
#       hjust = 1, nudge_x = -0.02,
#       color = "#904C2F",
#       size = 2.6
#     ) +
#     theme_minimal()
# 
#   pca1_all_load <- pca1_all_loadings %>%
#     filter(component %in% pcs) %>%
#     ggplot(aes(abs(value), column, fill = cor)) +
#     geom_col(color = "black", show.legend = FALSE) +
#     scale_fill_manual(values = c("white", "gray80")) +
#     facet_wrap(vars(component), scales = "free_y") +
#     tidytext::scale_y_reordered() +
#     labs(x = "Abs. contribution value", y = element_blank(), fill = "Value") +
#     theme_minimal()
# 
#   pca1_all_plot / (pca1_all_rot | pca1_all_load) +
#     plot_layout(heights = c(6, 1))
# }
# plot_pca1_full()
# ggsave(here::here("analysis/figures/pca1_full_pc12.png"), height = 14)
# plot_pca1_full("PC3", "PC4")
# ggsave(here::here("analysis/figures/pca1_full_pc34.png"), height = 14)
# plot_pca1_full("PC5", "PC6")
# ggsave(here::here("analysis/figures/pca1_full_pc56.png"), height = 14)
# plot_pca1_full("PC7", "PC8")
# ggsave(here::here("analysis/figures/pca1_full_pc78.png"), height = 14)

Pottery groups

# # fit PCA
# pca2_all_fit <- res2_wide %>%
#   # select(-cont) %>%
#   select(where(is.numeric)) %>%
#   scale(center = TRUE, scale = TRUE) %>%
#   prcomp()
# 
# # augmented fitted values
# pca2_all_aug <- pca2_all_fit %>%
#   broom::augment(res2_wide) %>%
#   select(-".rownames") %>%
#   rename_with(\(x) str_remove(x, ".fitted")) %>%
#   settlements::add_region() %>%
#   settlements::add_period_label() %>%
#   settlements::add_neo()
# 
# # rotation matrix
# pca2_all_rot <- pca2_all_fit %>%
#   broom::tidy(matrix = "rotation") %>%
#   mutate(PC = str_c("PC", PC)) %>%
#   pivot_wider(names_from = PC)
# 
# # var explained
# pca2_all_var_exp <- pca2_all_fit %>%
#   broom::augment(res2_wide) %>%
#   summarise(across(contains("PC"), var)) %>%
#   pivot_longer(cols = everything(), names_to = "pc", values_to = "variance") %>%
#   mutate(var_exp = variance/sum(variance),
#          cum_var_exp = cumsum(var_exp),
#          pc = str_replace(pc, ".fitted", "")) %>%
#   select(pc, var_exp, cum_var_exp)
# 
# # PC cum var explained over 0.9
# pca2_all_var_exp_over <- pca2_all_var_exp %>%
#   mutate(n = !(cum_var_exp >= 0.9)) %>%
#   pull(n) %>%
#   sum() + 1
# knitr::kable(pca1_all_var_exp, col.names = c("Comp.",
#                                              "Prop. of variance",
#                                              "Cumulative prop."))
# # PCA loadings
# pca2_all_loadings <- pca2_all_fit %>%
#   broom::tidy(matrix = "rotation") %>%
#   mutate(component = str_c("PC", PC),
#          component = fct_reorder(component, PC)) %>%
#   filter(PC <= pca2_all_var_exp_over) %>%
#   group_by(PC) %>%
#   top_n(8, abs(value)) %>%
#   ungroup() %>%
#   mutate(cor = if_else(value > 0, "positive", "negative"),
#          column = tidytext::reorder_within(column, abs(value), component))
# 
# pca2_all_loadings %>%
#   ggplot(aes(abs(value), column, fill = cor)) +
#   geom_col(color = "black") +
#   scale_fill_manual(values = c("white", "gray80")) +
#   facet_wrap(vars(component), scales = "free_y") +
#   tidytext::scale_y_reordered() +
#   theme_bw() +
#   labs(x = "Abs. contribution value", y = element_blank(), fill = "Value")
# plot_pca2_full <- function(x = "PC1", y = "PC2") {
# 
#   pcs <- c(x, y)
# 
#   pca2_all_plot <- pca2_all_aug %>%
#     ggplot(aes(!!sym(pcs[1]), !!sym(pcs[2]))) +
#     geom_point(data = select(pca2_all_aug, -period_label),
#                alpha = 0.05, size = 0.4) +
#     geom_point(aes(color = reg),
#                alpha = 0.8, size = 0.5, show.legend = FALSE) +
#     scale_color_manual(values = mycol[c(1, 2)]) +
#     facet_grid(vars(period_label), vars(reg)) +
#     theme_minimal()
# 
#   pca2_all_rot <-  pca2_all_rot %>%
#     ggplot(aes(!!sym(pcs[1]), !!sym(pcs[2]))) +
#     geom_segment(xend = 0, yend = 0, alpha = 0.4) +
#     geom_text(
#       aes(label = column),
#       hjust = 1, nudge_x = -0.02,
#       color = "#904C2F",
#       size = 2.6
#     ) +
#     theme_minimal()
# 
#   pca2_all_load <- pca2_all_loadings %>%
#     filter(component %in% pcs) %>%
#     ggplot(aes(abs(value), column, fill = cor)) +
#     geom_col(color = "black", show.legend = FALSE) +
#     scale_fill_manual(values = c("white", "gray80")) +
#     facet_wrap(vars(component), scales = "free_y") +
#     tidytext::scale_y_reordered() +
#     labs(x = "Abs. contribution value", y = element_blank(), fill = "Value") +
#     theme_minimal()
# 
#   pca2_all_plot / (pca2_all_rot | pca2_all_load) +
#     plot_layout(heights = c(6, 1))
# }
# plot_pca2_full()
# ggsave(here::here("analysis/figures/pca2_full_pc12.png"), height = 14)
# plot_pca2_full("PC3", "PC4")
# ggsave(here::here("analysis/figures/pca2_full_pc34.png"), height = 14)
# plot_pca2_full("PC5", "PC6")
# ggsave(here::here("analysis/figures/pca2_full_pc56.png"), height = 14)
# plot_pca2_full("PC7", "PC8")
# ggsave(here::here("analysis/figures/pca2_full_pc78.png"), height = 14)

Principal components analysis

# traditions
# traditions, by periods
pca1_per <- res1_wide %>%
  add_region() %>% 
  group_by(reg, period) %>% 
  nest() %>% 
  nested_pca()

# traditions, by neo BC
pca1_neo <- res1_wide %>%
  add_region() %>% 
  add_neo() %>% 
  select(-period) %>% 
  group_by(reg, neo) %>% 
  nest() %>% 
  nested_pca()

# groups
# groups, by periods
pca2_per <- res2_wide %>%
  add_region() %>% 
  group_by(reg, period) %>% 
  nest() %>% 
  nested_pca()

# groups, by neo BC
pca2_neo <- res2_wide %>%
  add_region() %>% 
  add_neo() %>% 
  select(-period) %>% 
  group_by(reg, neo) %>% 
  nest() %>% 
  nested_pca()

# pca1_neo %>%
#   select(reg, neo, sdev) %>%
#   unnest(sdev) %>%
#   ungroup(neo, reg) %>%
#   ggplot(aes(factor(pc), cum_prop)) +
#   geom_point() +
#   geom_path(aes(group = neo)) +
#   geom_hline(yintercept = 0.9) +
#   facet_grid(vars(neo), vars(reg)) +
#   scale_y_continuous(limits = c(0, 1)) +
#   theme_minimal()

Variable labels in PCA plots:

altitude - alt.,
cont - cont.,
dist_fenced - d. fenced,
hydro_kde - hydro.,
line_terrain - t. line,
settlements_kde - dens.,
slope ~ slope,
rm_dist_chipped - d. ch.,
line_water - w. line,
rm_dist_polished - d. pol.

Pottery traditions in Neolithic B and C

pca1_neo %>% plot_nested_pca("neo")
pca1_neo %>% plot_nested_pca("neo", c("PC3", "PC4"))

Pottery groups in Neolithic B vs Neolithic C

pca2_neo %>% plot_nested_pca("neo")
pca2_neo %>% plot_nested_pca("neo", c("PC3", "PC4"))

Pottery traditions across periods

pca1_per %>% 
  add_period_label() %>% 
  plot_nested_pca("period_label")

# ggsave(here::here("analysis/paper/temp/pca_trad_1.png"), width = 10, height = 21)
pca1_per %>% 
  add_period_label() %>% 
  plot_nested_pca("period_label", c("PC3", "PC4"))

# ggsave(here::here("analysis/paper/temp/pca_trad_2.png"), width = 10, height = 14)

Pottery groups across periods

pca2_per %>% 
  add_period_label() %>% 
  plot_nested_pca("period_label")

# ggsave(here::here("analysis/paper/temp/pca_skup_1.png"), width = 10, height = 21)
pca2_per %>% 
  add_period_label() %>% 
  plot_nested_pca("period_label", c("PC3", "PC4"))

# ggsave(here::here("analysis/paper/temp/pca_skup_2.png"), width = 10, height = 21)

Variables clustering

Pottery traditions

hclust1_neo <- pca1_neo %>% 
  hclust_variables()

hclust_plot_dendro(hclust1_neo, c("reg", "neo"))

Pottery groups

hclust2_neo <- pca2_neo %>% 
  hclust_variables()

hclust_plot_dendro(hclust2_neo, c("reg", "neo"))
# res1_long %>%
# distinct(id, period, variable, .keep_all = TRUE) %>% 
# pivot_wider(names_from = variable, values_from = value) %>% 
# arrange(id, period) %>% 
# write_csv(here::here(derived_data, "results", "results1.csv"))
# 
# res2_long %>% 
# distinct(id, period, variable, .keep_all = TRUE) %>% 
# pivot_wider(names_from = variable, values_from = value) %>% 
# arrange(id, period) %>% 
# write_csv(here::here(derived_data, "results", "results2.csv"))

\newpage

References

::: {#refs} :::

\newpage

Colophon

This report was generated on r Sys.time() using the following computational environment and dependencies:

# which R packages and versions?
if ("devtools" %in% installed.packages()) devtools::session_info()

The current Git commit details are:

# what commit is this file at? 
if ("git2r" %in% installed.packages() & git2r::in_repository(path = ".")) git2r::repository(here::here())  


petrpajdla/settlements documentation built on June 27, 2022, 10:21 p.m.