In benmarwick/kwakmarwickaas2015: Kwak & Marwick JAS Paper

This document reproduces the figures illustrating the geochemical data in Kwak and Marwick 'Food processing and pottery use in the Early Bronze Age, Central Korean Peninsula'

library(kwakmarwick2015)
knitr::opts_chunk$set(cache = TRUE)

Figures

The density distribution of the radiocarbon dates from the two sites.

# read data in, three columns Name = lab code, Date = radiocarbon age, Uncertainty =  error
dates_KM <- read.csv('data/radiocarbon_KM.csv', stringsAsFactors = FALSE)
dates_SS <- read.csv('data/radiocarbon_SS.csv', stringsAsFactors = FALSE)

# make date names
dates_KM$Name <- paste0("KM_", dates_KM[,1])
dates_SS$Name <- paste0("SS_", dates_SS[,1])
# remove spaces
dates_KM$Name <- gsub("[[:space:]]", "_", dates_KM$Name)
dates_SS$Name <- gsub("[[:space:]]", "_", dates_SS$Name)

# split age and error term
library(stringr)

# KM
date_split  <- str_split_fixed(dates_KM$C14.date..BP..uncalibrated., "pm", 2)
date_split_Age <- as.numeric(gsub("[^0-9]", "", date_split[, 1]))
date_split_Error <- as.numeric(gsub("[^0-9]", "", date_split[, 2]))
# combine into data frame
dates_KM <- data.frame(Name = dates_KM$Name,
                       Date = date_split_Age,
                       Uncertainty = date_split_Error)
# SS
date_split  <- str_split_fixed(dates_SS$C14.date..BP..uncalibrated., "\\?\\?", 2)
date_split_Age <- as.numeric(gsub("[^0-9]", "", date_split[, 1]))
date_split_Error <- as.numeric(gsub("[^0-9]", "", date_split[, 2]))
# combine into data frame
dates_SS <- data.frame(Name = dates_SS$Name,
                       Date = date_split_Age,
                       Uncertainty = date_split_Error)

# combine all dates into one
dates <- rbind(dates_KM, dates_SS)
dates <- dates[complete.cases(dates), ]

# use the R package BChron...
library(Bchron)

# remove NAs
dates_SS <- dates_SS[complete.cases(dates_SS),]

# get estimation of activity through age as proxy

Dens_KM = BchronDensity(ages=dates_KM$Date, ageSds=dates_KM$Uncertainty, calCurves=rep('intcal13', nrow(dates_KM)))

Dens_SS = BchronDensity(ages=dates_SS$Date, ageSds=dates_SS$Uncertainty, calCurves=rep('intcal13', nrow(dates_SS)))

# get line for plotting

Dens_KM_plot <- BchronDensity_plot_params("KM", Dens_KM)
Dens_SS_plot <- BchronDensity_plot_params("SS", Dens_SS)

plot_all <- rbind(Dens_KM_plot, Dens_SS_plot)

library(ggplot2)
library(viridis)
ggplot(plot_all, aes(dateGrid, densFinal, colour = ID)) +
  geom_line(size = 0.5) +
  theme_minimal(base_size = 14) +
  xlab("calibrated years BP") +
  ylab("Density") +
  xlim(1000,4000) +
  scale_color_manual(values = viridis(length(unique(plot_all$ID))))

The table summarising counts and concentrations of residues in the samples

# we follow the example of Copley, doi:10.1016/j.jas.2004.07.004
# read in data
CSIA_KM <- read.csv('data/CSIA_KM.csv', stringsAsFactors = FALSE)
CSIA_SS <- read.csv('data/CSIA_SS.csv', stringsAsFactors = FALSE)

site_names <- c("Kimpo-Yngchon", "Sosa-Dong")
containing_lipid_residues <- c(nrow(CSIA_KM), nrow(CSIA_SS))
total_sherds <- c(49, 37)
percentage_containing_lipid_residues <- round((containing_lipid_residues / total_sherds * 100), 0)
lipid_concentrations_ng_max <- c(max(CSIA_KM$Lipid.concentration..ng.g.1.), 
                                 max(CSIA_SS$Lipid.concentration..ng.g.1.))
lipid_concentrations_ng_mean <- c(mean(CSIA_KM$Lipid.concentration..ng.g.1.), 
                                  mean(CSIA_SS$Lipid.concentration..ng.g.1.))
lipid_concentrations_ug_max <-  round(lipid_concentrations_ng_max / 10e2, 3)
lipid_concentrations_ug_mean <- round(lipid_concentrations_ng_mean / 10e2, 3)

summary_table <- data.frame( total_sherds,
                             containing_lipid_residues,
                             percentage_containing_lipid_residues,
                             lipid_concentrations_ug_max, 
                             lipid_concentrations_ug_mean)

# following the example in https://cran.r-project.org/web/packages/htmlTable/vignettes/tables.html
library(htmlTable)
htmlTable(summary_table,
          header = c("Total", "Containing lipid residues", "Percentage containing residues", 
                     "Maximum", "Mean"),
          rnames = site_names,
          cgroup = c("Number of sherds", "", "Lipid concentrations (&mu;g g<sup>−1</sup>)"),
          n.cgroup = c(2, 1, 2), 
          caption="Number of sherds containing lipids and their concentrations")

# table of Potsherd number, Laboratory code,    Period, Wall, decoration, technique Diameter at mouth (cm), Part of vessel, Lipid concentration (μg g−1),   δ13C16.0,   δ13C18.0,   Δ13C following http://www.nature.com/nature/journal/v486/n7403/fig_tab/nature11186_T1.html
table_of_concs <- data.frame(rbind(
  with(CSIA_KM, cbind(Sample.No., C16.0, C18.0,  Lipid.concentration..ng.g.1.)),
  with(CSIA_SS, cbind(Sample.No., C16.0, C18.0,  Lipid.concentration..ng.g.1.))
), stringsAsFactors = FALSE)


# get sample location and body part
sampling_KM <- read.csv('data/sampling_Kimpo.csv', stringsAsFactors = FALSE)
sampling_SS <- read.csv('data/sampling_Sosa.csv', stringsAsFactors = FALSE)
table_of_samples <- data.frame(rbind(sampling_KM, sampling_SS))

# join concs to sample location and body part
table_of_details <- merge(table_of_samples, table_of_concs, by = 'Sample.No.')

# clean it a bit...
remove <- c("/", "No.")
table_of_details$Location.house.pit.No. <- gsub(paste(remove, collapse = "|"), "", table_of_details$Location.house.pit.No.)
table_of_details$Location.house.pit.No. <- gsub("  ", "", table_of_details$Location.house.pit.No.)

# make concs readable in ug/g and remove ng
table_of_details$Lipid.concentration..ug.g.1. <- sprintf("%.3f", as.numeric(as.character(table_of_details$Lipid.concentration..ng.g.1.)) / 10e2)
table_of_details$Lipid.concentration..ng.g.1. <- NULL

# add big delta
table_of_details$big_delta <- round(
                               as.numeric(as.character(table_of_details$C18.0)) - 
                               as.numeric(as.character(table_of_details$C16.0)), 2)

# compute ranges of isotope values
ranges_of_dC <- with(table_of_details, data.frame(range(C16.0), range(C18.0)))

# make readable column headings (use HTML codes for greeks)
names(table_of_details) <- c("Laboratory <br>code", 
                             "Provenance", 
                             "Part of <br>vessel", 
                             paste0("&delta;","<sup>13</sup>C 16.0"),
                             paste0("&delta;","<sup>13</sup>C 18.0"),
                             "Lipid <br>concentration <br>(&mu;g g<sup>−1</sup>)",
                             "&Delta;<sup>13</sup>C") 

library(htmlTable)
htmlTable(table_of_details,
          header = names(table_of_details),
          caption="Potsherds selected for isotopic analyses")

# Is the recovery rate signficantly different?
# chi-square to test if recovery rate is different 
# From Agresti(2007) p.39
M <- as.table(cbind((total_sherds - containing_lipid_residues),     
                     containing_lipid_residues))
dimnames(M) <- list(site = c("KM", "SS"),
                    condition = c("not_containing","containing"))

chi_sq_M <- chisq.test(M, simulate.p.value = TRUE, B = 10000)
chi_sq_M_chivalue <- unname(round(chi_sq_M$statistic, 3))
chi_sq_M_pvalue <- round(chi_sq_M$p.value, 3)
chi_df <- (nrow(M)-1) * (ncol(M)-1)

# Are the concentrations of lipids different between the two sites?
concs <- stack(list(KM = CSIA_KM$Lipid.concentration..ng.g.1., 
                    SS = CSIA_SS$Lipid.concentration..ng.g.1.))
# plot
ggplot(concs, aes(y = values, x = ind)) +
  geom_violin() +
  ylab("Lipid concentration (ng/g)") +
  xlab("Site") +
  theme_minimal()

# t-test
lipid_concentrations_diff <- t.test(CSIA_KM$Lipid.concentration..ng.g.1., 
                                    CSIA_SS$Lipid.concentration..ng.g.1.)
# test value
lipid_concentrations_diff_t <- unname(round(lipid_concentrations_diff$statistic, 3))
# p value
lipid_concentrations_diff_p <- unname(round(lipid_concentrations_diff$p.value, 3))

There is a difference in the recovery rate at the two sites (chi(r chi_df, n = r sum(M)) = r chi_sq_M_chivalue, p = r chi_sq_M_pvalue), with significantly more samples from Sosa-Dong containing residues. However, samples from Sosa-Dong have significantly lower concentrations per sherd than Kimpo-Yngchon (t(n = r nrow(concs)) = r lipid_concentrations_diff_t, p = r lipid_concentrations_diff_p).

The chromatogram of the GC-MS analysis from one of our samples from the Sosa-Dong site (SOS049). Due to degradation, we usually observe medium- and long-chain saturated fatty acids. 5-α Cholestane was added as an internal standard (132 ng / microliter).

# Input data from csv file
SOS049 <-  read.csv(file="data/GC-MS_exported_data_Figure5a.CSV", header=TRUE, sep=",")
make_retention_times_plot(SOS049)

The second chromatogram, sample KIM061

# Input data from csv file
KIM061 <- read.csv(file="data/GC-MS_exported_data_Figure5b.CSV", header=TRUE, sep=",") 
make_retention_times_plot_no_labels(KIM061)

The results of CSIA by GC-C-IRMS of the samples from the Sosa-Dong (SO) and Kimpo-Yangchon (KI) site using the reference from Craig et al., 2011 and Steele et al., 2010.

# combine both into one dataframe
CSIA_KM_and_SS <- rbind(CSIA_KM, CSIA_SS)
# get site ID to control plotting colour and shape
CSIA_KM_and_SS$ID <- substr(CSIA_KM_and_SS$Sample.No., 1, 2)
# draw plot
make_C16_C18_scatter_plot(CSIA_KM_and_SS) # + 
   # geom_text(aes(label = Sample.No.), hjust = 0, vjust = 0)

The big delta plot

# big_delta <- d18 - d16
big_delta <- CSIA_KM_and_SS$C18.0 - CSIA_KM_and_SS$C16.0
big_delta <- data.frame(big_delta = big_delta, 
                        C16.0 = CSIA_KM_and_SS$C16.0, 
                        Site = CSIA_KM_and_SS$ID,
                        sample_ID = CSIA_KM_and_SS$Sample.No.)
# plot
ggplot(big_delta, aes(C16.0, big_delta, colour = Site)) +
  geom_point(size = 2) + 
  xlab(bquote(delta*{}^13*"C 16:0 \u2030")) +
  ylab(bquote(Delta*{}^13*"C \u2030")) +
  xlim(-40,-10) +
  theme_minimal(base_size = 14) +
  scale_y_continuous(breaks=seq(-10, 5, 1)) +
  scale_color_manual(values = viridis(length(unique(big_delta$Site)))) # +
  # geom_text(aes(label = sample_ID), hjust = 0, vjust = 0)
  # SOS033 is the outlier at -8 Delta C18