knitr::opts_chunk$set( collapse = FALSE, warning = FALSE, message = FALSE, echo = FALSE, cache = TRUE, comment = "#>", # fig.path = "../figures/", dpi = 300 )
library(sf) library(tidyverse) library(spatstat) library(patchwork) devtools::load_all(".")
The analysis is divided into two scales:
The study area is broadly defined as Easter part of Bohemia (abbreviated B) and Morava river catchment (M). It spans across the area of the Czech Republic and bordering regions of Austria and Slovakia.
derived_data <- "analysis/data/derived_data" input_data <- "analysis/data/input_data" path_maps <- here::here(derived_data, "maps/") path_data <- here::here(derived_data) path_temp <- here::here(derived_data, "temp/") # mask mask <- st_read(paste0(path_maps, "mask.geojson"), quiet = TRUE) bbox <- st_read(paste0(path_maps, "bbox.geojson"), quiet = TRUE) # rivers + admin for plots admin0 <- st_read(paste0(path_maps, "admin0.geojson"), quiet = TRUE) admin1 <- st_read(paste0(path_maps, "admin1.geojson"), quiet = TRUE) rivers0 <- st_read(paste0(path_maps, "rivers0.geojson"), quiet = TRUE) rivers1 <- st_read(paste0(path_maps, "rivers1.geojson"), quiet = TRUE) # labels labs_chrono <- read_rds(here::here(input_data, "chrono_labels.RDS")) # database set_base <- read_rds(here::here(input_data, "settlements.RDS")) set_spat <- st_read(here::here(input_data, "settlements_sf.geojson"), quiet = TRUE) settlements1 <- set_spat %>% right_join(set_base$period1, by = "id") %>% mutate(label = labs_chrono$chrono1[chrono]) settlements2 <- set_spat %>% right_join(set_base$period2, by = "id") %>% mutate(label = labs_chrono$chrono2[chrono]) # colors mycol <- c("B" = "#7FC97F", "M" = "#BEAED4", "b" = "#7FC97F", "m" = "#BEAED4" )
set_base$period1 %>% table_nr_set()
settlements1 %>% plot_nr_set()
set_base$period2 %>% table_nr_set()
settlements2 %>% plot_nr_set()
Settlement density is estimated using KDE. Kernel size of 4 km is used. This value is completely arbitrary but is selected because settlement clusters are nicely highlighted at this scale.
res1_long <- settlements1 %>% est_kde()
settlements1 %>% plot_kde()
res2_long <- settlements2 %>% est_kde()
settlements2 %>% plot_kde()
Raw material sources are defined as either points or lines. Each raw material can be identified by single or multiple simple features. If the location of source used in the Neolithic period is unknown or several sources or even modes of procurement (e. g. from river bed etc.) are possible, the most probable are included.
Minimum, mean, median and maximum distances are calculated, median is used as a result.
rm_pts <- st_read(here::here(derived_data, "rm_points.geojson"), quiet = TRUE) %>% add_rm_type() rm_lns <- st_read(here::here(derived_data, "rm_lines.geojson"), quiet = TRUE) %>% add_rm_type()
ggplot() + geom_sf(data = mask, fill = "gray80", color = NA, alpha = 0.4) + geom_sf(data = rivers1, color = "gray60", size = 0.4, alpha = 0.4) + geom_sf(data = rivers0, color = "gray60", size = 0.6, alpha = 0.4) + geom_sf(data = set_spat, alpha = 0.1, color = "gray80") + geom_sf(data = rm_pts, aes(color = rm), shape = 4) + geom_sf(data = rm_lns, aes(color = rm)) + scale_x_continuous(expand = c(0, 0)) + scale_y_continuous(expand = c(0, 0)) + facet_wrap(vars(label)) + theme_void() + theme(panel.border = element_rect(color = "black", fill = NA)) + labs(color = "Raw material")
# calculated separately in a script `rm_distance.R` # dist_pts <- dist_to_rm(set_spat, rm_pts, "rm") # dist_lns <- dist_to_rm(set_spat, rm_lns, "rm") rm_dist <- read_csv(paste0(path_temp, "rm_dist.csv")) %>% mutate(rm = str_to_lower(rm))
# rm_dist %>% # ggplot(aes(x = min, y = rm, fill = region)) + # geom_violin() + # scale_fill_brewer(palette = "Accent") + # facet_wrap(~type, scales = "free_y") + # labs(x = "min. dist. (km)", y = "raw material", fill = "Region") + # theme_minimal() + # theme(legend.position = "bottom", # panel.border = element_rect(color = "black", fill = NA))
Hypothesis: In Morava river catchment settlement position is directly associated with raw material source.
We compare settlement density (KDE) with distance to chosen raw materials. There is no strong evidence that the distribution of settlements is associated with distance to any of the key raw material sources occurring in the region in any of the studied periods. There are some mild negative correlations in the period 4.8 -- 4.2 k BCE for polished stone tools materials. Strongest connection is observed between given pottery groups in period 4.8 -- 4.6 k BCE in case of ad/pmd/zelesice raw material sources.
res1_long %>% plot_dist_lm()
res1_long %>% tab_dist_lm()
res2_long %>% plot_dist_lm()
res2_long %>% tab_dist_lm()
# 0 = not used # 0.5 = somewhat used # 1 = used rm_b <- readxl::read_xlsx(here::here("analysis/data/raw_data", "sidliste_vs_zdroje.xlsx"), sheet = "B") rm_m <- readxl::read_xlsx(here::here("analysis/data/raw_data", "sidliste_vs_zdroje.xlsx"), sheet = "M") rm_relevance <- bind_rows(b = rm_b, m = rm_m, .id = "reg") %>% mutate(across(starts_with("p"), function(x) x * 0.5)) %>% pivot_longer(starts_with("p"), names_to = "period", values_to = "relevance") %>% rename(rm = surovina) rm_dist_rel_p1 <- set_base$period2 %>% filter_rel_dist_rm() rm_dist_rel_p2 <- set_base$period2 %>% filter_rel_dist_rm()
rm_dist_rel_p1 %>% plot_rm_reg_dist("m")
rm_dist_rel_p1 %>% plot_rm_reg_dist("b")
rm_dist_rel_p2 %>% plot_rm_reg_dist("m")
rm_dist_rel_p2 %>% plot_rm_reg_dist("b")
rm_types <- rm_dist %>% select(rm, type) %>% distinct() rm_types_tab <- rm_types %>% pull(type) names(rm_types_tab) <- rm_types %>% pull(rm) res1_long <- rm_dist_rel_p1 %>% prep_dist_rel_rm() %>% bind_rows(res1_long) res2_long <- rm_dist_rel_p2 %>% prep_dist_rel_rm() %>% bind_rows(res2_long)
rm_dist_rel_p1 %>% count_dist_rel_rm() %>% plot_dist_rel_rm()
rm_dist_rel_p2 %>% count_dist_rel_rm() %>% plot_dist_rel_rm()
rm(list = ls(pattern = "^rm_"))
In settlement structure studies it is often assumed that past human settlements have some kind of a relation to the network of watercourses. Numerous problems are faced while assessing the relationship between settlements and close watercourse. Most importantly the man-made recent changes in the structure and intensity of watercourses (and landscape in general) are a major factor impairing such analysis.
Usually, the settlement - watercourse relationship is expressed in some form of a metric giving shortest distance or so. We choose an approach used to study road networks in modern urban areas [e. g. @lin2020].
Most of the input data is derived from DIBAVOD data set A01 layer (https://www.dibavod.cz/).
Density (kernel smoothed intensity) is estimated using spatstat::density.psp
function with sigma 2000 m.
# pkg raster # hydro_dens <- raster::raster( # here::here(derived_data, "hydro", "density_raster.tif")) # # hydro_dens_at_points <- hydro_dens %>% # raster::extract(set_spat) %>% # as_tibble() %>% # rename(hydro_kde = value) %>% # bind_cols(select(st_drop_geometry(set_spat), id)) # pkg stars # hydro_dens <- stars::read_stars( # here::here(derived_data, "hydro", "density_raster.tif") # ) %>% # st_set_crs(value = st_crs(set_spat)) # # hydro_dens_at_points <- hydro_dens %>% # stars::st_extract(set_spat) %>% # st_drop_geometry() %>% # rename(hydro_kde = density_raster.tif) %>% # bind_cols(select(st_drop_geometry(set_spat), id)) # pkg terra, implementing buffer around settlements # settlements represented as points hydro_dens <- terra::rast( here::here(derived_data, "hydro", "density_raster.tif") ) hydro_dens_at_points <- terra::extract( hydro_dens, terra::vect(st_buffer(set_spat, dist = 2e3)) ) %>% group_by(ID) %>% summarise(hydro_kde = mean(density_raster)) %>% bind_cols(id = set_spat$id) %>% select(-ID) # bind to results res1_long <- set_base$period1 %>% prep_hydro_dens_res() %>% bind_rows(res1_long) res2_long <- set_base$period2 %>% select(-facet) %>% prep_hydro_dens_res() %>% bind_rows(res2_long)
# hydro_dens %>% # raster::mask(mask) %>% # raster::rasterToPoints() %>% # as_tibble() %>% # rename(KDE = density_raster) %>% # ggplot() + # geom_raster(aes(x, y, fill = KDE)) + # geom_sf(data = set_spat, alpha = 0.1, shape = 1, color = "white") + # theme_void() # stars # ggplot() + # stars::geom_stars(data = st_crop(hydro_dens, mask), sf = TRUE) + # geom_sf(data = set_spat, alpha = 0.1, shape = 1, color = "white") + # theme_void() + # labs(fill = "KDE")
rm(list = c("hydro_dens", "hydro_dens_at_points"))
dem_terra <- terra::rast(here::here(derived_data, "archive/dem/dem_aster_sjtsk_stars.tif"))
dem_slope <- terra::terrain(dem_terra, v = "slope") # slope in the vicinity of settlements - 2km slope <- terra::extract(dem_slope, terra::vect(st_buffer(set_spat, dist = 2e3))) # derive median aspect mean_slope <- slope %>% tibble() %>% group_by(ID) %>% summarise(mean_slope = mean(slope)) %>% bind_cols(id = set_spat$id) %>% mutate(reg = str_extract(id, "^.")) # shapiro.test(filter(mean_slope, reg == "B")$mean_slope) # hist(filter(mean_slope, reg == "B")$mean_slope) # qqnorm(filter(mean_slope, reg == "B")$mean_slope) # # shapiro.test(filter(mean_slope, reg == "M")$mean_slope) # hist(filter(mean_slope, reg == "M")$mean_slope) # qqnorm(filter(mean_slope, reg == "M")$mean_slope) # mean_slope %>% # inner_join(settlements2, by = "id") %>% # ggplot(aes(mean_slope)) + # geom_histogram(color = "black", fill = "white") + # facet_grid(vars(period_label), vars(reg), scales = "free_y") + # theme_bw() + # labs(x = "slope (°)") # mean_slope %>% # inner_join(settlements2, by = "id") %>% # ggplot(aes(mean_slope, period_label)) + # geom_boxplot(color = "black", fill = "white") + # facet_wrap(vars(reg), scales = "free_y") + # theme_bw() + # labs(x = "slope (°)")
res1_long <- mean_slope %>% select(-ID, -reg) %>% inner_join(st_drop_geometry(select(settlements1, id, period))) %>% transmute(id, period, value = mean_slope, variable = "slope") %>% bind_rows(res1_long) res2_long <- mean_slope %>% select(-ID, -reg) %>% inner_join(st_drop_geometry(select(settlements2, id, period))) %>% transmute(id, period, value = mean_slope, variable = "slope") %>% bind_rows(res2_long)
rm(list = c("dem_slope", "slope", "mean_slope")) gc()
# dem_data <- read_csv(here::here(derived_data, "dem/vals_dem.csv")) # # # bind to results # res1_long <- set_base$period1 %>% # prep_dem_res() %>% # bind_rows(res1_long) # # res2_long <- set_base$period2 %>% # select(-facet) %>% # prep_dem_res() %>% # bind_rows(res2_long)
# altitude <- set_spat %>% st_drop_geometry() %>% # as_tibble() %>% # select(id, altitude) # # # bind to results # res1_long <- set_base$period1 %>% # prep_alt_res() %>% # bind_rows(res1_long) # # res2_long <- set_base$period2 %>% # select(-facet) %>% # prep_alt_res() %>% # bind_rows(res2_long)
# alt_plot <- set_spat %>% st_drop_geometry() %>% # as_tibble() %>% # select(id, altitude) %>% # left_join(dem_data) # # alt_cor <- alt_plot %>% # summarise(cor = cor(altitude, dem)) %>% # pull(cor) %>% # round(2) # # alt_plot %>% # ggplot(aes(altitude, dem)) + # geom_point() + # theme_minimal() + # annotate(geom = "text", x = 175, y = 525, # label = paste("Cor. ", alt_cor))
Aspect is calculated from the DEM, 0 means the slope is facing North, 90 East, 180 South and 270 West. The settlements are buffered by 200 meters and the median aspect is taken. The vicinity of settlements is thus of interest, not the given point.
In both Morava river drainage basin and Eastern part of Bohemia, median aspect is normally distributed.
# shapiro.test(filter(med_asp, reg == "B")$med_asp) # shapiro.test(filter(med_asp, reg == "M")$med_asp)
# # polar plot # med_asp %>% # ggplot(aes(med_asp)) + # geom_histogram(color = "black", fill = "white") + # scale_x_continuous(limits = c(0, 359), # breaks = c(0, 90, 180, 270)) + # coord_polar(start = 0) + # facet_wrap(vars(reg)) + # theme_bw() + # labs(x = "median aspect (°)")
# med_asp %>% # inner_join(settlements2, by = "id") %>% # ggplot(aes(med_asp)) + # geom_histogram(color = "black", fill = "white") + # scale_x_continuous(limits = c(0, 360), # breaks = c(0, 90, 180, 270, 360)) + # facet_grid(vars(period_label), vars(reg), scales = "free_y") + # theme_bw() + # labs(x = "median aspect (°)")
rm(list = c("dem_asp", "asp", "med_asp"))
Terrain / Topographic roughness index
# dem_tpi <- terra::terrain(dem_terra, v = "TPI") # # # tpi in the vicinity of settlements - 2km # tpi <- terra::extract(dem_tpi, # terra::vect(st_buffer(set_spat, dist = 2e3))) # # # derive median aspect # mean_slope <- slope %>% # tibble() %>% # group_by(ID) %>% # summarise(mean_slope = mean(slope)) %>% # bind_cols(id = set_spat$id) %>% # mutate(reg = str_extract(id, "^.")) # shapiro.test(filter(mean_slope, reg == "B")$mean_slope) # hist(filter(mean_slope, reg == "B")$mean_slope) # qqnorm(filter(mean_slope, reg == "B")$mean_slope) # # shapiro.test(filter(mean_slope, reg == "M")$mean_slope) # hist(filter(mean_slope, reg == "M")$mean_slope) # qqnorm(filter(mean_slope, reg == "M")$mean_slope) # mean_slope %>% # inner_join(settlements2, by = "id") %>% # ggplot(aes(mean_slope)) + # geom_histogram(color = "black", fill = "white") + # facet_grid(vars(period_label), vars(reg), scales = "free_y") + # theme_bw() + # labs(x = "slope (°)") # mean_slope %>% # inner_join(settlements2, by = "id") %>% # ggplot(aes(mean_slope, period_label)) + # geom_boxplot(color = "black", fill = "white") + # facet_wrap(vars(reg), scales = "free_y") + # theme_bw() + # labs(x = "slope (°)")
Minimal distance to fortified settlements in given regions is explored.
dist_fenced <- read_rds(here::here(derived_data, "fortif/distance_fortif.RDS")) # results 1 res1_long <- dist_fenced$traditions %>% prep_dist_fenced() %>% bind_rows(res1_long) # results 2 dist_fenced_p2 <- dist_fenced$groups %>% prep_dist_fenced() %>% bind_rows(res2_long)
Is the same location settled in the previous period?
res1_long <- set_base$period1 %>% settlement_continuity() %>% bind_rows(res1_long) res2_long <- set_base$period2 %>% settlement_continuity() %>% bind_rows(res2_long)
n_set_per1 <- set_base$period1 %>% add_region() %>% group_by(period_label, reg) %>% summarise(sum = n()) res1_long %>% plot_continuity(y = n_set_per1)
n_set_per2 <- set_base$period2 %>% add_region() %>% group_by(period_label, reg) %>% summarise(sum = n()) res2_long %>% plot_continuity(y = n_set_per2)
Arrangement of settlements along natural 'terrain' lines or along waterways is explored.
along_lines <- read_rds(here::here(derived_data, "lines/linear_arrangement.RDS")) along_lines1 <- along_lines$traditions %>% mutate(chrono = str_remove(chrono, ".$")) %>% group_by(id, chrono, period, variable) %>% summarise(value = max(value)) %>% ungroup(id, chrono, variable) %>% mutate(value = value / n()) %>% select(-chrono) %>% group_by(variable, period, id) %>% summarise(value = mean(value), .groups = "drop") along_lines2 <- along_lines$groups %>% distinct(id, chrono, period, variable, value) %>% group_by(period) %>% mutate(value = value / n()) %>% select(-chrono) %>% group_by(variable, period, id) %>% summarise(value = mean(value), .groups = "drop") # results 1 res1_long <- res1_long %>% bind_rows(along_lines1) # results 2 res2_long <- res2_long %>% bind_rows(along_lines2)
rm(list = ls(pattern = "^rm")) rm(list = ls(pattern = "^along")) rm(list = ls(pattern = "line$")) rm(list = ls(pattern = "violin$")) rm(list = ls(pattern = "^n_set")) rm(list = ls(pattern = "^hydro")) rm(list = ls(pattern = "fenced$")) rm(list = ls(pattern = "dem_data")) rm(list = ls(pattern = "^alt"))
res1_wide <- res1_long %>% prep_wide_res() # res1_long <- res1_wide %>% # pivot_longer(-c(id, period), names_to = "variable", values_to = "value") res2_wide <- res2_long %>% prep_wide_res() # res2_long <- res2_wide %>% # pivot_longer(-c(id, period), names_to = "variable", values_to = "value") # # scale cols # res1_wide <- res1_wide %>% # mutate(across(where(is.numeric), scale)) # # res2_wide <- res2_wide %>% # mutate(across(where(is.numeric), scale))
labs_variables <- c( "settlements_kde" = "Settlement density", "dist_fenced" = "Min. dist. to fenced set. (km)", "line_terrain" = "Along terrain line", "line_water" = "Along water line", # "cont" = "Settlement continuity", "hydro_kde" = "Watercourse density", # "dem" = "Altitude (DEM)", "altitude" = "Altitude (map)", "slope" = "Slope (°)", "rm_dist_chipped" = "Distance to chipped RM", "rm_dist_polished" = "Distance to polished RM" # "rm_pc1" = "Distance to RM sources 1", # "rm_pc2" = "Distance to RM sources 2", # "rm_pc3" = "Distance to RM sources 3" ) %>% map_chr(str_wrap, 12)
res1_violin <- res1_long %>% plot_violin() res1_violin
res2_violin <- res2_long %>% plot_violin() res2_violin
# ggsave(here::here("analysis/figures", "res1_violin.png"), res1_violin, # width = 9, height = 12) # ggsave(here::here("analysis/figures", "res2_violin.png"), res2_violin, # width = 9, height = 12)
res1_line <- res1_long %>% plot_lines() res1_line
res2_line <- res2_long %>% plot_lines() res2_line
# ggsave(here::here("analysis/figures", "res1_line.png"), res1_line, # width = 7, height = 7) # ggsave(here::here("analysis/figures", "res2_line.png"), res2_line, # width = 7, height = 7)
# # fit PCA # pca1_all_fit <- res1_wide %>% # # select(-cont) %>% # select(where(is.numeric)) %>% # scale(center = TRUE, scale = TRUE) %>% # prcomp() # # # augmented fitted values # pca1_all_aug <- pca1_all_fit %>% # broom::augment(res1_wide) %>% # select(-".rownames") %>% # rename_with(\(x) str_remove(x, ".fitted")) %>% # settlements::add_region() %>% # settlements::add_period_label() %>% # settlements::add_neo() # # # rotation matrix # pca1_all_rot <- pca1_all_fit %>% # broom::tidy(matrix = "rotation") %>% # mutate(PC = str_c("PC", PC)) %>% # pivot_wider(names_from = PC) # # # var explained # pca1_all_var_exp <- pca1_all_fit %>% # broom::augment(res1_wide) %>% # summarise(across(contains("PC"), var)) %>% # pivot_longer(cols = everything(), names_to = "pc", values_to = "variance") %>% # mutate(var_exp = variance/sum(variance), # cum_var_exp = cumsum(var_exp), # pc = str_replace(pc, ".fitted", "")) %>% # select(pc, var_exp, cum_var_exp) # # # PC cum var explained over 0.9 # pca1_all_var_exp_over <- pca1_all_var_exp %>% # mutate(n = !(cum_var_exp >= 0.9)) %>% # pull(n) %>% # sum() + 1
# knitr::kable(pca1_all_var_exp, col.names = c("Comp.", # "Prop. of variance", # "Cumulative prop."))
# # PCA loadings # pca1_all_loadings <- pca1_all_fit %>% # broom::tidy(matrix = "rotation") %>% # mutate(component = str_c("PC", PC), # component = fct_reorder(component, PC)) %>% # filter(PC <= pca1_all_var_exp_over) %>% # group_by(PC) %>% # top_n(8, abs(value)) %>% # ungroup() %>% # mutate(cor = if_else(value > 0, "positive", "negative"), # column = tidytext::reorder_within(column, abs(value), component)) # # pca1_all_loadings %>% # ggplot(aes(abs(value), column, fill = cor)) + # geom_col(color = "black") + # scale_fill_manual(values = c("white", "gray80")) + # facet_wrap(vars(component), scales = "free_y") + # tidytext::scale_y_reordered() + # theme_bw() + # labs(x = "Abs. contribution value", y = element_blank(), fill = "Value")
# plot_pca1_full <- function(x = "PC1", y = "PC2") { # # pcs <- c(x, y) # # pca1_all_plot <- pca1_all_aug %>% # ggplot(aes(!!sym(pcs[1]), !!sym(pcs[2]))) + # geom_point(data = select(pca1_all_aug, -period_label), # alpha = 0.05, size = 0.4) + # geom_point(aes(color = reg), # alpha = 0.8, size = 0.5, show.legend = FALSE) + # scale_color_manual(values = mycol[c(1, 2)]) + # facet_grid(vars(period_label), vars(reg)) + # theme_minimal() # # pca1_all_rot <- pca1_all_rot %>% # ggplot(aes(!!sym(pcs[1]), !!sym(pcs[2]))) + # geom_segment(xend = 0, yend = 0, alpha = 0.4) + # geom_text( # aes(label = column), # hjust = 1, nudge_x = -0.02, # color = "#904C2F", # size = 2.6 # ) + # theme_minimal() # # pca1_all_load <- pca1_all_loadings %>% # filter(component %in% pcs) %>% # ggplot(aes(abs(value), column, fill = cor)) + # geom_col(color = "black", show.legend = FALSE) + # scale_fill_manual(values = c("white", "gray80")) + # facet_wrap(vars(component), scales = "free_y") + # tidytext::scale_y_reordered() + # labs(x = "Abs. contribution value", y = element_blank(), fill = "Value") + # theme_minimal() # # pca1_all_plot / (pca1_all_rot | pca1_all_load) + # plot_layout(heights = c(6, 1)) # }
# plot_pca1_full() # ggsave(here::here("analysis/figures/pca1_full_pc12.png"), height = 14)
# plot_pca1_full("PC3", "PC4") # ggsave(here::here("analysis/figures/pca1_full_pc34.png"), height = 14)
# plot_pca1_full("PC5", "PC6") # ggsave(here::here("analysis/figures/pca1_full_pc56.png"), height = 14)
# plot_pca1_full("PC7", "PC8") # ggsave(here::here("analysis/figures/pca1_full_pc78.png"), height = 14)
# # fit PCA # pca2_all_fit <- res2_wide %>% # # select(-cont) %>% # select(where(is.numeric)) %>% # scale(center = TRUE, scale = TRUE) %>% # prcomp() # # # augmented fitted values # pca2_all_aug <- pca2_all_fit %>% # broom::augment(res2_wide) %>% # select(-".rownames") %>% # rename_with(\(x) str_remove(x, ".fitted")) %>% # settlements::add_region() %>% # settlements::add_period_label() %>% # settlements::add_neo() # # # rotation matrix # pca2_all_rot <- pca2_all_fit %>% # broom::tidy(matrix = "rotation") %>% # mutate(PC = str_c("PC", PC)) %>% # pivot_wider(names_from = PC) # # # var explained # pca2_all_var_exp <- pca2_all_fit %>% # broom::augment(res2_wide) %>% # summarise(across(contains("PC"), var)) %>% # pivot_longer(cols = everything(), names_to = "pc", values_to = "variance") %>% # mutate(var_exp = variance/sum(variance), # cum_var_exp = cumsum(var_exp), # pc = str_replace(pc, ".fitted", "")) %>% # select(pc, var_exp, cum_var_exp) # # # PC cum var explained over 0.9 # pca2_all_var_exp_over <- pca2_all_var_exp %>% # mutate(n = !(cum_var_exp >= 0.9)) %>% # pull(n) %>% # sum() + 1
# knitr::kable(pca1_all_var_exp, col.names = c("Comp.", # "Prop. of variance", # "Cumulative prop."))
# # PCA loadings # pca2_all_loadings <- pca2_all_fit %>% # broom::tidy(matrix = "rotation") %>% # mutate(component = str_c("PC", PC), # component = fct_reorder(component, PC)) %>% # filter(PC <= pca2_all_var_exp_over) %>% # group_by(PC) %>% # top_n(8, abs(value)) %>% # ungroup() %>% # mutate(cor = if_else(value > 0, "positive", "negative"), # column = tidytext::reorder_within(column, abs(value), component)) # # pca2_all_loadings %>% # ggplot(aes(abs(value), column, fill = cor)) + # geom_col(color = "black") + # scale_fill_manual(values = c("white", "gray80")) + # facet_wrap(vars(component), scales = "free_y") + # tidytext::scale_y_reordered() + # theme_bw() + # labs(x = "Abs. contribution value", y = element_blank(), fill = "Value")
# plot_pca2_full <- function(x = "PC1", y = "PC2") { # # pcs <- c(x, y) # # pca2_all_plot <- pca2_all_aug %>% # ggplot(aes(!!sym(pcs[1]), !!sym(pcs[2]))) + # geom_point(data = select(pca2_all_aug, -period_label), # alpha = 0.05, size = 0.4) + # geom_point(aes(color = reg), # alpha = 0.8, size = 0.5, show.legend = FALSE) + # scale_color_manual(values = mycol[c(1, 2)]) + # facet_grid(vars(period_label), vars(reg)) + # theme_minimal() # # pca2_all_rot <- pca2_all_rot %>% # ggplot(aes(!!sym(pcs[1]), !!sym(pcs[2]))) + # geom_segment(xend = 0, yend = 0, alpha = 0.4) + # geom_text( # aes(label = column), # hjust = 1, nudge_x = -0.02, # color = "#904C2F", # size = 2.6 # ) + # theme_minimal() # # pca2_all_load <- pca2_all_loadings %>% # filter(component %in% pcs) %>% # ggplot(aes(abs(value), column, fill = cor)) + # geom_col(color = "black", show.legend = FALSE) + # scale_fill_manual(values = c("white", "gray80")) + # facet_wrap(vars(component), scales = "free_y") + # tidytext::scale_y_reordered() + # labs(x = "Abs. contribution value", y = element_blank(), fill = "Value") + # theme_minimal() # # pca2_all_plot / (pca2_all_rot | pca2_all_load) + # plot_layout(heights = c(6, 1)) # }
# plot_pca2_full() # ggsave(here::here("analysis/figures/pca2_full_pc12.png"), height = 14)
# plot_pca2_full("PC3", "PC4") # ggsave(here::here("analysis/figures/pca2_full_pc34.png"), height = 14)
# plot_pca2_full("PC5", "PC6") # ggsave(here::here("analysis/figures/pca2_full_pc56.png"), height = 14)
# plot_pca2_full("PC7", "PC8") # ggsave(here::here("analysis/figures/pca2_full_pc78.png"), height = 14)
# traditions # traditions, by periods pca1_per <- res1_wide %>% add_region() %>% group_by(reg, period) %>% nest() %>% nested_pca() # traditions, by neo BC pca1_neo <- res1_wide %>% add_region() %>% add_neo() %>% select(-period) %>% group_by(reg, neo) %>% nest() %>% nested_pca() # groups # groups, by periods pca2_per <- res2_wide %>% add_region() %>% group_by(reg, period) %>% nest() %>% nested_pca() # groups, by neo BC pca2_neo <- res2_wide %>% add_region() %>% add_neo() %>% select(-period) %>% group_by(reg, neo) %>% nest() %>% nested_pca() # pca1_neo %>% # select(reg, neo, sdev) %>% # unnest(sdev) %>% # ungroup(neo, reg) %>% # ggplot(aes(factor(pc), cum_prop)) + # geom_point() + # geom_path(aes(group = neo)) + # geom_hline(yintercept = 0.9) + # facet_grid(vars(neo), vars(reg)) + # scale_y_continuous(limits = c(0, 1)) + # theme_minimal()
Variable labels in PCA plots:
altitude - alt.,
cont - cont.,
dist_fenced - d. fenced,
hydro_kde - hydro.,
line_terrain - t. line,
settlements_kde - dens.,
slope ~ slope,
rm_dist_chipped - d. ch.,
line_water - w. line,
rm_dist_polished - d. pol.
pca1_neo %>% plot_nested_pca("neo")
pca1_neo %>% plot_nested_pca("neo", c("PC3", "PC4"))
pca2_neo %>% plot_nested_pca("neo")
pca2_neo %>% plot_nested_pca("neo", c("PC3", "PC4"))
pca1_per %>% add_period_label() %>% plot_nested_pca("period_label") # ggsave(here::here("analysis/paper/temp/pca_trad_1.png"), width = 10, height = 21)
pca1_per %>% add_period_label() %>% plot_nested_pca("period_label", c("PC3", "PC4")) # ggsave(here::here("analysis/paper/temp/pca_trad_2.png"), width = 10, height = 14)
pca2_per %>% add_period_label() %>% plot_nested_pca("period_label") # ggsave(here::here("analysis/paper/temp/pca_skup_1.png"), width = 10, height = 21)
pca2_per %>% add_period_label() %>% plot_nested_pca("period_label", c("PC3", "PC4")) # ggsave(here::here("analysis/paper/temp/pca_skup_2.png"), width = 10, height = 21)
hclust1_neo <- pca1_neo %>% hclust_variables() hclust_plot_dendro(hclust1_neo, c("reg", "neo"))
hclust2_neo <- pca2_neo %>% hclust_variables() hclust_plot_dendro(hclust2_neo, c("reg", "neo"))
# res1_long %>% # distinct(id, period, variable, .keep_all = TRUE) %>% # pivot_wider(names_from = variable, values_from = value) %>% # arrange(id, period) %>% # write_csv(here::here(derived_data, "results", "results1.csv")) # # res2_long %>% # distinct(id, period, variable, .keep_all = TRUE) %>% # pivot_wider(names_from = variable, values_from = value) %>% # arrange(id, period) %>% # write_csv(here::here(derived_data, "results", "results2.csv"))
\newpage
::: {#refs} :::
\newpage
This report was generated on r Sys.time()
using the following computational
environment and dependencies:
# which R packages and versions? if ("devtools" %in% installed.packages()) devtools::session_info()
The current Git commit details are:
# what commit is this file at? if ("git2r" %in% installed.packages() & git2r::in_repository(path = ".")) git2r::repository(here::here())
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.