This document reproduces the figures illustrating the geochemical data in Kwak and Marwick 'Food processing and pottery use in the Early Bronze Age, Central Korean Peninsula'

library(kwakmarwick2015)
knitr::opts_chunk$set(cache = TRUE)

Figures

The density distribution of the radiocarbon dates from the two sites.

# read data in, three columns Name = lab code, Date = radiocarbon age, Uncertainty =  error
dates_KM <- read.csv('data/radiocarbon_KM.csv', stringsAsFactors = FALSE)
dates_SS <- read.csv('data/radiocarbon_SS.csv', stringsAsFactors = FALSE)

# make date names
dates_KM$Name <- paste0("KM_", dates_KM[,1])
dates_SS$Name <- paste0("SS_", dates_SS[,1])
# remove spaces
dates_KM$Name <- gsub("[[:space:]]", "_", dates_KM$Name)
dates_SS$Name <- gsub("[[:space:]]", "_", dates_SS$Name)

# split age and error term
library(stringr)

# KM
date_split  <- str_split_fixed(dates_KM$C14.date..BP..uncalibrated., "pm", 2)
date_split_Age <- as.numeric(gsub("[^0-9]", "", date_split[, 1]))
date_split_Error <- as.numeric(gsub("[^0-9]", "", date_split[, 2]))
# combine into data frame
dates_KM <- data.frame(Name = dates_KM$Name,
                       Date = date_split_Age,
                       Uncertainty = date_split_Error)
# SS
date_split  <- str_split_fixed(dates_SS$C14.date..BP..uncalibrated., "\\?\\?", 2)
date_split_Age <- as.numeric(gsub("[^0-9]", "", date_split[, 1]))
date_split_Error <- as.numeric(gsub("[^0-9]", "", date_split[, 2]))
# combine into data frame
dates_SS <- data.frame(Name = dates_SS$Name,
                       Date = date_split_Age,
                       Uncertainty = date_split_Error)

# combine all dates into one
dates <- rbind(dates_KM, dates_SS)
dates <- dates[complete.cases(dates), ]

# use the R package BChron...
library(Bchron)

# remove NAs
dates_SS <- dates_SS[complete.cases(dates_SS),]

# get estimation of activity through age as proxy

Dens_KM = BchronDensity(ages=dates_KM$Date, ageSds=dates_KM$Uncertainty, calCurves=rep('intcal13', nrow(dates_KM)))

Dens_SS = BchronDensity(ages=dates_SS$Date, ageSds=dates_SS$Uncertainty, calCurves=rep('intcal13', nrow(dates_SS)))

# get line for plotting

Dens_KM_plot <- BchronDensity_plot_params("KM", Dens_KM)
Dens_SS_plot <- BchronDensity_plot_params("SS", Dens_SS)

plot_all <- rbind(Dens_KM_plot, Dens_SS_plot)
library(ggplot2)
library(viridis)
ggplot(plot_all, aes(dateGrid, densFinal, colour = ID)) +
  geom_line(size = 0.5) +
  theme_minimal(base_size = 14) +
  xlab("calibrated years BP") +
  ylab("Density") +
  xlim(1000,4000) +
  scale_color_manual(values = viridis(length(unique(plot_all$ID))))

The table summarising counts and concentrations of residues in the samples

# we follow the example of Copley, doi:10.1016/j.jas.2004.07.004
# read in data
CSIA_KM <- read.csv('data/CSIA_KM.csv', stringsAsFactors = FALSE)
CSIA_SS <- read.csv('data/CSIA_SS.csv', stringsAsFactors = FALSE)

site_names <- c("Kimpo-Yngchon", "Sosa-Dong")
containing_lipid_residues <- c(nrow(CSIA_KM), nrow(CSIA_SS))
total_sherds <- c(49, 37)
percentage_containing_lipid_residues <- round((containing_lipid_residues / total_sherds * 100), 0)
lipid_concentrations_ng_max <- c(max(CSIA_KM$Lipid.concentration..ng.g.1.), 
                                 max(CSIA_SS$Lipid.concentration..ng.g.1.))
lipid_concentrations_ng_mean <- c(mean(CSIA_KM$Lipid.concentration..ng.g.1.), 
                                  mean(CSIA_SS$Lipid.concentration..ng.g.1.))
lipid_concentrations_ug_max <-  round(lipid_concentrations_ng_max / 10e2, 3)
lipid_concentrations_ug_mean <- round(lipid_concentrations_ng_mean / 10e2, 3)

summary_table <- data.frame( total_sherds,
                             containing_lipid_residues,
                             percentage_containing_lipid_residues,
                             lipid_concentrations_ug_max, 
                             lipid_concentrations_ug_mean)

# following the example in https://cran.r-project.org/web/packages/htmlTable/vignettes/tables.html
library(htmlTable)
htmlTable(summary_table,
          header = c("Total", "Containing lipid residues", "Percentage containing residues", 
                     "Maximum", "Mean"),
          rnames = site_names,
          cgroup = c("Number of sherds", "", "Lipid concentrations (&mu;g g<sup>−1</sup>)"),
          n.cgroup = c(2, 1, 2), 
          caption="Number of sherds containing lipids and their concentrations")   
# table of Potsherd number, Laboratory code,    Period, Wall, decoration, technique Diameter at mouth (cm), Part of vessel, Lipid concentration (μg g−1),   δ13C16.0,   δ13C18.0,   Δ13C following http://www.nature.com/nature/journal/v486/n7403/fig_tab/nature11186_T1.html
table_of_concs <- data.frame(rbind(
  with(CSIA_KM, cbind(Sample.No., C16.0, C18.0,  Lipid.concentration..ng.g.1.)),
  with(CSIA_SS, cbind(Sample.No., C16.0, C18.0,  Lipid.concentration..ng.g.1.))
), stringsAsFactors = FALSE)


# get sample location and body part
sampling_KM <- read.csv('data/sampling_Kimpo.csv', stringsAsFactors = FALSE)
sampling_SS <- read.csv('data/sampling_Sosa.csv', stringsAsFactors = FALSE)
table_of_samples <- data.frame(rbind(sampling_KM, sampling_SS))

# join concs to sample location and body part
table_of_details <- merge(table_of_samples, table_of_concs, by = 'Sample.No.')

# clean it a bit...
remove <- c("/", "No.")
table_of_details$Location.house.pit.No. <- gsub(paste(remove, collapse = "|"), "", table_of_details$Location.house.pit.No.)
table_of_details$Location.house.pit.No. <- gsub("  ", "", table_of_details$Location.house.pit.No.)

# make concs readable in ug/g and remove ng
table_of_details$Lipid.concentration..ug.g.1. <- sprintf("%.3f", as.numeric(as.character(table_of_details$Lipid.concentration..ng.g.1.)) / 10e2)
table_of_details$Lipid.concentration..ng.g.1. <- NULL

# add big delta
table_of_details$big_delta <- round(
                               as.numeric(as.character(table_of_details$C18.0)) - 
                               as.numeric(as.character(table_of_details$C16.0)), 2)

# compute ranges of isotope values
ranges_of_dC <- with(table_of_details, data.frame(range(C16.0), range(C18.0)))

# make readable column headings (use HTML codes for greeks)
names(table_of_details) <- c("Laboratory <br>code", 
                             "Provenance", 
                             "Part of <br>vessel", 
                             paste0("&delta;","<sup>13</sup>C 16.0"),
                             paste0("&delta;","<sup>13</sup>C 18.0"),
                             "Lipid <br>concentration <br>(&mu;g g<sup>−1</sup>)",
                             "&Delta;<sup>13</sup>C") 

library(htmlTable)
htmlTable(table_of_details,
          header = names(table_of_details),
          caption="Potsherds selected for isotopic analyses") 
# Is the recovery rate signficantly different?
# chi-square to test if recovery rate is different 
# From Agresti(2007) p.39
M <- as.table(cbind((total_sherds - containing_lipid_residues),     
                     containing_lipid_residues))
dimnames(M) <- list(site = c("KM", "SS"),
                    condition = c("not_containing","containing"))

chi_sq_M <- chisq.test(M, simulate.p.value = TRUE, B = 10000)
chi_sq_M_chivalue <- unname(round(chi_sq_M$statistic, 3))
chi_sq_M_pvalue <- round(chi_sq_M$p.value, 3)
chi_df <- (nrow(M)-1) * (ncol(M)-1)

# Are the concentrations of lipids different between the two sites?
concs <- stack(list(KM = CSIA_KM$Lipid.concentration..ng.g.1., 
                    SS = CSIA_SS$Lipid.concentration..ng.g.1.))
# plot
ggplot(concs, aes(y = values, x = ind)) +
  geom_violin() +
  ylab("Lipid concentration (ng/g)") +
  xlab("Site") +
  theme_minimal()

# t-test
lipid_concentrations_diff <- t.test(CSIA_KM$Lipid.concentration..ng.g.1., 
                                    CSIA_SS$Lipid.concentration..ng.g.1.)
# test value
lipid_concentrations_diff_t <- unname(round(lipid_concentrations_diff$statistic, 3))
# p value
lipid_concentrations_diff_p <- unname(round(lipid_concentrations_diff$p.value, 3))

There is a difference in the recovery rate at the two sites (chi(r chi_df, n = r sum(M)) = r chi_sq_M_chivalue, p = r chi_sq_M_pvalue), with significantly more samples from Sosa-Dong containing residues. However, samples from Sosa-Dong have significantly lower concentrations per sherd than Kimpo-Yngchon (t(n = r nrow(concs)) = r lipid_concentrations_diff_t, p = r lipid_concentrations_diff_p).

The chromatogram of the GC-MS analysis from one of our samples from the Sosa-Dong site (SOS049). Due to degradation, we usually observe medium- and long-chain saturated fatty acids. 5-α Cholestane was added as an internal standard (132 ng / microliter).

# Input data from csv file
SOS049 <-  read.csv(file="data/GC-MS_exported_data_Figure5a.CSV", header=TRUE, sep=",")
make_retention_times_plot(SOS049)

The second chromatogram, sample KIM061

# Input data from csv file
KIM061 <- read.csv(file="data/GC-MS_exported_data_Figure5b.CSV", header=TRUE, sep=",") 
make_retention_times_plot_no_labels(KIM061)

The results of CSIA by GC-C-IRMS of the samples from the Sosa-Dong (SO) and Kimpo-Yangchon (KI) site using the reference from Craig et al., 2011 and Steele et al., 2010.

# combine both into one dataframe
CSIA_KM_and_SS <- rbind(CSIA_KM, CSIA_SS)
# get site ID to control plotting colour and shape
CSIA_KM_and_SS$ID <- substr(CSIA_KM_and_SS$Sample.No., 1, 2)
# draw plot
make_C16_C18_scatter_plot(CSIA_KM_and_SS) # + 
   # geom_text(aes(label = Sample.No.), hjust = 0, vjust = 0)

The big delta plot

# big_delta <- d18 - d16
big_delta <- CSIA_KM_and_SS$C18.0 - CSIA_KM_and_SS$C16.0
big_delta <- data.frame(big_delta = big_delta, 
                        C16.0 = CSIA_KM_and_SS$C16.0, 
                        Site = CSIA_KM_and_SS$ID,
                        sample_ID = CSIA_KM_and_SS$Sample.No.)
# plot
ggplot(big_delta, aes(C16.0, big_delta, colour = Site)) +
  geom_point(size = 2) + 
  xlab(bquote(delta*{}^13*"C 16:0 \u2030")) +
  ylab(bquote(Delta*{}^13*"C \u2030")) +
  xlim(-40,-10) +
  theme_minimal(base_size = 14) +
  scale_y_continuous(breaks=seq(-10, 5, 1)) +
  scale_color_manual(values = viridis(length(unique(big_delta$Site)))) # +
  # geom_text(aes(label = sample_ID), hjust = 0, vjust = 0)
  # SOS033 is the outlier at -8 Delta C18

Here are the details of the computational environment that our analysis was conducted in:

sessionInfo()


benmarwick/kwakmarwickaas2015 documentation built on May 12, 2019, 1:02 p.m.