require(knitr)
require(printr) # devtools::install_github("yihui/printr")
require(rmarkdown)
require(ggplot2)
require(reshape2)
require(plyr)
require(dplyr)
require(vcd)
require(rjags)
require(runjags)
require(coda)
require(BEST)
require(lattice)
require(xtable)
require(data.table)
require(git2r)
require(dependencies) # devtools::install_github("ropensci/dependencies")
require(Bchron)
require(JerimalaiStoneArtefacts)
opts_chunk$set(tidy=FALSE, warning=FALSE, message=FALSE, 
               comment = NA, verbose = TRUE, echo=TRUE,
               cache = TRUE, quiet = TRUE)
theme_set(theme_bw(base_size=14))
runjags.options( inits.warning=FALSE , rng.warning=FALSE )
# custom functions to save figures at JAS size & resolution
# cf http://www.elsevier.com/author-schemas/artwork-and-media-instructions/artwork-sizing
jhe_190mm_ggsave <- function(filename = default_name(plot), 
                            width= 190, 
                            units = "mm", 
                            dpi= 1000, ...) {
  ggsave(filename=filename, width=width, dpi=dpi, units=units, ...)
}

jhe_90mm_ggsave <- function(filename = default_name(plot), 
                            width= 90, 
                            units = "mm", 
                            dpi= 1000, ...) {
  ggsave(filename=filename, width=width, dpi=dpi, units=units, ...)
}
# dates
dates <- read.csv("data/Jerimalai_dates_Square_B.csv", as.is = TRUE)
# complete flake data
flakes <- read.csv("data/JB_Chert_Flakes_and_Retouch.csv")
# core type data
core_types <- read.csv("data/Jerimalai_cores_techno_metrics.csv")
# spit depths
depths <- read.csv("data/Jerimalai_spit_depths_Square_B.csv")
# sediment volumes
vols <- read.csv("data/Artefact densities with soil volumes Sq B.csv", skip = 1)
# all artefacts
all <- read.csv("data/Jerimalai_All_Artefacts_Square_B.csv")
# techno types
cores <- read.csv("data/Jerimalai_tech_table_cores.csv")
types <-  na.omit(read.csv("data/Jerimalai_tech_table_types.csv"))
retouch <- read.csv("data/Jerimalai_tech_table_retouch.csv")
features <- read.csv("data/Jerimalai_tech_table_features.csv")
ground <- read.csv("data/Jerimalai_tech_table_ground.csv")
# retouch indices
retouch_indices <- read.csv("data/Jerimalai_retouch_indices.csv")

Summary of results for Stone Artefacts from Jeremalai Square A

sq_a_all <- read.csv("data/Jerimalai_All_Artefacts_Square_A.csv") %>% 
  filter(Square == "A")

Chronology

Radiocarbon ages from square A have fewer Pleistocene dates than square B and an inversion of samples from spits 38 and 27. These details make it difficult to group excavated spits into analytical units for comparison with square B. The stone artefact assemblage is also smaller than from square B. Due to these limitations, we took a conservative approach and aggreagates spits from square A into two groups: a Pleistocene deposit that includes spits 61-26 and a Holocene deposit that includes spits 25-1.

sq_a_radiocarbon <- read.csv("data/Jerimalai_dates_Square_A.csv")
kable(sq_a_radiocarbon, caption = "Radiocarbon ages from Jerimalai square A")

Results: Raw materials

The output from the code chunk below shows that chert strongly dominates raw material choices in square A, just as it did in square B.

# subset flakes
sq_a_flakes <- sq_a_all %>% 
  filter(Artclas == "Flake")


# In Square A, Phase I corresponds to Spits 46-39 and has an associated date of 42 ka  from Spit 39 (Table 1). Phase II is comprised of Spits 38-26 with associated ages of between 17-10 ka (with one inversion noted in Spit 27 with a date of 22 ka). Phase III consists of Spits 25-6 and is associated with ages of 6.4-5.5 ka, and Phase IV is covered by Spits 5-1 with associated ages of 3-0 ka. 

# here's a function to assign phases based on spit numbers
  sq_a_makephases <- function(x) {ifelse(x >= 1 & x <= 5, 4,
                                  ifelse(x >= 6 & x <= 25, 3,
                                  ifelse(x >= 26 & x <= 38, 2,
                                  ifelse(x >= 39 & x <= 46, 1, NA))))}
# apply phase numbers
sq_a_flakes <- sq_a_flakes[!(is.na(sq_a_flakes$Spit)),]
sq_a_flakes$phase  <- sq_a_makephases(sq_a_flakes$Spit)
# raw material
raw <- dcast(sq_a_flakes, Material ~ phase) 
# subset dominant raw materials
raw[,2:3] <- sapply(raw[,2:3], as.numeric)
dom <- raw[rowSums(raw[,2:3]) > 10,]
colnames(dom) <- c("raw material", "phase 1", "phase 2", "phase 3", "phase 4")
kable(dom, caption = "Square A: Frequencies of dominant raw materials by depositional phase")
data <- melt(as.matrix(dom),  varnames=c("raw_material", "phase"), value.name="Freq")
myDataFrame = data
yName=names(myDataFrame)[3]   # Freq
x1Name=names(myDataFrame)[2]  # phase 
x2Name=names(myDataFrame)[1]  # raw material
x1contrasts = list( 
  list( c("phase 1")  , c("phase 2") , compVal=0.0 , ROPE=c(-0.1,0.1) ),
  list( c("phase 2")  , c("phase 3") , compVal=0.0 , ROPE=c(-0.1,0.1) ),
  list( c("phase 3")  , c("phase 4") , compVal=0.0 , ROPE=c(-0.1,0.1) ))

numSavedSteps = 12000 # MCMC parameters
thinSteps = 10        # MCMC parameters

mcmcCoda = genMCMC_cont_table( datFrm=myDataFrame , 
                    yName=yName , x1Name=x1Name , x2Name=x2Name ,
                    numSavedSteps=numSavedSteps , thinSteps=thinSteps , 
                    )

# Get summary statistics of chain:
summaryInfo = smryMCMC_cont_table( mcmcCoda , 
                        datFrm=myDataFrame , x1Name=x1Name , x2Name=x2Name ,
                        x1contrasts=x1contrasts 
                         )

# #  Display posterior information (not easy to read unless using interactively):
#     plotMCMC_cont_table( mcmcCoda , 
#               datFrm=myDataFrame , yName=yName , x1Name=x1Name , 
#                x2Name=x2Name ,
#               x1contrasts=x1contrasts  )

# subset summaryInfo to show HDI interval for interactions
HDI_intervals_for_interactions <- summaryInfo[c((nrow(summaryInfo) - length(x1contrasts) + 1):nrow(summaryInfo)), c("HDIlow", "HDIhigh")]

kable(round(HDI_intervals_for_interactions,3), caption = "Square A: Highest density intervals (95%) for the posterior distributions of the interactions between phases and raw material frequencies.")
# here is the frequentist equivalent 
sq_A_raw_material_by_phase_nhst <- assocstats(as.matrix(dom[,-1])) 

The Bayesian test includes zero in the HDI, indicating a non-credible difference. The t-test In the code chunk above we obtain the results of a frequentist chisquare test which returns a statistically significant result: chi = r sq_A_raw_material_by_phase_nhst$chisq_tests[2, 1], df = r sq_A_raw_material_by_phase_nhst$chisq_tests[2, 2], p = r sq_A_raw_material_by_phase_nhst$chisq_tests[2, 3]. This shows a statistically significant change in flake raw material, however the low Cramer's V indicates a very small effect size. This is consistant with what we see in the Bayesian HDI intervals, and we interpret these results as an insubstantial increase in the very small proportion of artefacts made from volcanic rock.

Results: Discard rates

The code chunk below generates the figure that shows discard rates of chert artefacts over time at Jerimalai square A. Each point is an excavation unit. The blue line is a locally weighted regression line (span = 0.4) to aid in visualising the trend. Artefact discard at square A peaksat aroudn 0.7m below the surface, an equivalent depth to the peak in square B. This corresponds to around spits 33-34, which lies between the two inverted dates (Wk-17830, Wk-19227). This means that we can only offer a wide interval of 20-15 ka BP for the age of the peak discard at square A. This is broadly consistent with a post-LGM increase in discard observed at square B.

# spit depths
sq_a_depths <- read.csv("data/Jerimalai_spit_depths_Square_A.csv")
# sediment volumes
vols <- read.csv("data/Artefact densities with soil volumes Sq B.csv", skip = 1)
# put depths on lithic data
sq_a_flakes$depth <- sq_a_depths$Depth.bs..m[match(sq_a_flakes$Spit,sq_a_depths$Spit.no)]
# discard rates
sq_a_discard <- aggregate(Weight ~ depth + Spit, sq_a_flakes, length)
# sediment volumes: put volumes on
sq_a_discard$sedvol <- vols$Soil.1[match(sq_a_discard$Spit, vols$X.3)]
# put spit thickesses on
sq_a_discard$thick <- c(0.018, diff(sq_a_discard$depth)) # add first value from depth_and_dates.xsl
# compute artefacts per kg of sediment
sq_a_discard$kgsed <- with(sq_a_discard, Weight / sedvol) # weight is count of artefacts that have a weight
# compute artefact per cubic meter (spit thickess)
sq_a_discard$cubmet <- with(sq_a_discard, Weight / thick)
# seems we have an unusually extreme value in spit 34
# omit - perhaps a data collection typo
# discard <- discard[discard$Spit != 34, ]
# Plot
ggplot(sq_a_discard, (aes(depth, cubmet))) +
  geom_point() +
  stat_smooth(span = 0.5, se = FALSE) +
  xlab("Depth below surface (m)") +
  ylab("Number of chert flakes per \ncubic meter of deposit") +
  coord_flip() +
  scale_x_reverse()

Results: Taphonomy

The code chunk below computes a Bayesian Poisson exponential ANOVA to investigate differences in flake breakage classes over time.

sq_a_allchert <- sq_a_all[sq_a_all$Material == 'Chert', ]
sq_a_allchert$phase <- sq_a_makephases(sq_a_allchert$Spit)
# make Artclass that is long and transv breaks
sq_a_allchert$Artclas <- ifelse(sq_a_allchert$Breaks == "", 
       as.character(sq_a_allchert$Artclas), 
       paste(sq_a_allchert$Artclas, sq_a_allchert$Breaks, sep = "-"))
sq_a_taph <- data.frame(table(sq_a_allchert$Artclas))
# use regex to get broken flakes -b- 
sq_a_broken <- sq_a_allchert[grep("-b", sq_a_allchert$Artclas), ]
# get counts of broken to complete per phase
# flake to -b-
sq_a_breaks <- dcast(sq_a_allchert, Artclas ~ phase)[-1,]
sq_a_breaks <- sq_a_breaks[sq_a_breaks$Artclas =="flake" | grepl("-b-", sq_a_breaks$Artclas), ]
sq_a_allchert$Artclas <- tolower(sq_a_allchert$Artclas)
sq_a_allchert$breakt <- "" # create variable to fill
sq_a_allchert$breakt[grep("trans", sq_a_allchert$Artclas)] <- "trans"
sq_a_allchert$breakt[grep("long", sq_a_allchert$Artclas)] <- "long"
# per depositional phase
sq_a_breakt <- dcast(sq_a_allchert, breakt ~ phase)[-1,]
# add complete flake counts
sq_a_breakt <- rbind( sq_a_breakt , setNames( sq_a_breaks[1, ] , names( sq_a_breakt ) ) )
# shift rownames out and delete them
rownames(sq_a_breakt) <- sq_a_breakt[,1]
sq_a_breakt <- sq_a_breakt[,-1]
# do bayesian contingency table test
colnames(sq_a_breakt) <- c("phase 1", "phase 2", "phase 3", "phase 4")
data <- melt(as.matrix(sq_a_breakt),  varnames=c("breakt", "phase"), value.name="Freq")
myDataFrame = data
yName=names(myDataFrame)[3]   # Freq 
x1Name=names(myDataFrame)[2]  # phase
x2Name=names(myDataFrame)[1]  # break type
x1contrasts = list( 
  list( c("phase 1") , c("phase 2") , compVal=0.0 , ROPE=c(-0.1,0.1)),
  list( c("phase 2") , c("phase 3") , compVal=0.0 , ROPE=c(-0.1,0.1)),
  list( c("phase 3") , c("phase 4") , compVal=0.0 , ROPE=c(-0.1,0.1))
  )
numSavedSteps = 12000 # MCMC parameters
thinSteps = 10        # MCMC parameters

mcmcCoda = genMCMC_cont_table( datFrm=myDataFrame , 
                    yName=yName , x1Name=x1Name , x2Name=x2Name ,
                    numSavedSteps=numSavedSteps , thinSteps=thinSteps , 
                    )

# Get summary statistics of chain:
summaryInfo = smryMCMC_cont_table( mcmcCoda , 
                        datFrm=myDataFrame , x1Name=x1Name , x2Name=x2Name ,
                        x1contrasts=x1contrasts 
                         )

# #  Display posterior information (not easy to read unless using interactively):
#     plotMCMC_cont_table( mcmcCoda , 
#               datFrm=myDataFrame , yName=yName , x1Name=x1Name , 
#                x2Name=x2Name ,
#               x1contrasts=x1contrasts  )

# table of raw counts
kable(sq_a_breakt, caption = "Square A: Table of frequencies of each class of breakage by phase")

# subset summaryInfo to show HDI interval for interactions
HDI_intervals_for_interactions <-  summaryInfo[c((nrow(summaryInfo) - length(x1contrasts) + 1):nrow(summaryInfo)), c("HDIlow", "HDIhigh")]
kable(round(HDI_intervals_for_interactions,3), caption = "Square A: Highest density intervals (95%) for the posterior distributions of the interactions between phases and flake breakage classes.")

The above code chunk returns results that there is no credible difference in the frequences of flake breakage types by phase.

The code chunk below computes the frequentist equivalent, the chi-square test and the Cramer's V for effect size.

artefact_taphonomy_nhst <- assocstats(as.matrix(sq_a_breakt))

The code chunk below summarises the frequencies of heat-treated flakes at Jerimalai square A

check <- sum(sq_a_flakes$Heat, na.rm = TRUE) / nrow(sq_a_flakes)
heat_sqa <- aggregate(Heat ~ phase, sq_a_flakes, length)
total <- aggregate(Spit ~ phase, sq_a_flakes, length)
heat_sqa$Not_heat <- total$Spit - heat_sqa$Heat
# show proportions that are heat-treated
max_heat <- max(heat_sqa$Heat / total$Spit)
min_heat <- min(heat_sqa$Heat / total$Spit)
heat_t <- t(heat_sqa)
# do bayesian contingency table test
names(heat_t) <- c("phase 1", "phase 2", "phase 3", "phase 4")
heat <- heat_t[-1,]

The code chunk below computes a Bayesian Poisson exponential ANOVA to investigate differences in heat treatment by phase.

data <- melt(as.matrix(heat),  varnames=c("heat", "phase"), value.name="Freq")
myDataFrame = data
yName=names(myDataFrame)[3]   # Freq 
x1Name=names(myDataFrame)[2]  # phase
x2Name=names(myDataFrame)[1]  # heat
x1contrasts = list( 
  list( c("phase 1") , c("phase 2") , compVal=0.0 , ROPE=c(-0.1,0.1) ), 
  list( c("phase 2") , c("phase 3") , compVal=0.0 , ROPE=c(-0.1,0.1) ), 
  list( c("phase 3") , c("phase 4") , compVal=0.0 , ROPE=c(-0.1,0.1) )
  )
numSavedSteps = 12000 # MCMC parameters
thinSteps = 10        # MCMC parameters

mcmcCoda = genMCMC_cont_table( datFrm=myDataFrame , 
                    yName=yName , x1Name=x1Name , x2Name=x2Name ,
                    numSavedSteps=numSavedSteps , thinSteps=thinSteps , 
                    )

# Get summary statistics of chain:
summaryInfo = smryMCMC_cont_table( mcmcCoda , 
                        datFrm=myDataFrame , x1Name=x1Name , x2Name=x2Name ,
                        x1contrasts=x1contrasts 
                         )

# # #  Display posterior information (not easy to read unless using interactively):
#     plotMCMC_cont_table( mcmcCoda , 
#               datFrm=myDataFrame , yName=yName , x1Name=x1Name , 
#                x2Name=x2Name ,
#               x1contrasts=x1contrasts  )

# table of heated artefact counts
kable(heat, caption = "Square A: Table of frequencies of heat treatment by phase")

# subset summaryInfo to show HDI interval for interactions
HDI_intervals_for_interactions <- summaryInfo[c((nrow(summaryInfo) - length(x1contrasts) + 1):nrow(summaryInfo)), c("HDIlow", "HDIhigh")]
kable(round(HDI_intervals_for_interactions,3), caption = "Square A: Highest density intervals (95%) for the posterior distributions of the interactions between phases and flake breakage classes.")

The code chunk above returned results that indicate credible differences in the frequency of heat treatment between phase one and two. There are more flakes that have not been affected by heat in the Holocene phase. The code chunk below computes a frequentist chi-square and Cramer's V test for the heat treatment data.

sq_A_chert_artefacts_heat_nhst <- assocstats(as.matrix(heat))

The code chunk above returns a chi-squared value of r sq_A_chert_artefacts_heat_nhst$chisq_tests[2,1] and a p-value of r sq_A_chert_artefacts_heat_nhst$chisq_tests[2,3], indicating a signficant difference in frequencies of breakage classes by phase. However, the Cramer's V value of r sq_A_chert_artefacts_heat_nhst$cramer indicates that the effect size is extremely small. We interpret this result to mean that although the test result is statistically significant, there is no substantial significance in the differences in frequencies of heat alteration by phase.

Results: Metric and technological characteristics of cores and unretouched flakes

The code chunk below produces the table summarizing the attributes of chert complete flakes from Jerimalai square A.

metrics <- sq_a_flakes %>% 
  group_by(phase) %>% 
  summarise(mean(Length, na.rm = TRUE),  
            mean(Width, na.rm = TRUE), 
            mean(Thick, na.rm = TRUE),
            mean(Weight, na.rm = TRUE),
            mean(Length, na.rm = TRUE),
            mean(Platwid, na.rm = TRUE),
            mean(Platthic, na.rm = TRUE),
            mean(NoDS, na.rm = TRUE),
            mean(Cortex, na.rm = TRUE),
            sd(Length, na.rm = TRUE),  
            sd(Width, na.rm = TRUE), 
            sd(Thick, na.rm = TRUE),
            sd(Weight, na.rm = TRUE),
            sd(Length, na.rm = TRUE),
            sd(Platwid, na.rm = TRUE),
            sd(Platthic, na.rm = TRUE),
            sd(NoDS, na.rm = TRUE),
            sd(Cortex, na.rm = TRUE),
            n = length(Weight))

# get overhang removal data also
ohr <- filter(sq_a_flakes, Overhang == "Yes") %>%
       group_by(phase) %>%
       summarise(OHR_n = length(Overhang))
# get percentages of OHR per phase
ohr$OHR_perc <- ohr$OHR_n/metrics$n * 100

# combine
metrics <- cbind(metrics, ohr[,c("OHR_n", "OHR_perc")])
metrics <- as.data.frame(t(round(metrics,2)))[-1,]
colnames(metrics) <- c("phase 1", "phase 2", "phase 3", "phase 4")
kable(metrics, caption = "Square A: Summary of chert flake metrics by phase")

The code chunk below uses a Bayesian one-way ANOVA to investigate differences in chert flake mass by depositional phase.

myDataFrame <- data.frame(phase = sq_a_flakes$phase, mass = sq_a_flakes$Weight)
yName = names(myDataFrame)[2] # mass
xName = names(myDataFrame)[1] # phase
contrasts = list(
  list( c("1") , c("2") , compVal=0.0 , ROPE=c(-0.1,0.1) ), 
  list( c("2") , c("3") , compVal=0.0 , ROPE=c(-0.1,0.1) ), 
  list( c("3") , c("4") , compVal=0.0 , ROPE=c(-0.1,0.1) ))
# Generate the MCMC chain:
  mcmcCoda_ANOVA = genMCMC_ANOVA(datFrm=myDataFrame , yName=yName , xName=xName ,
                      numSavedSteps=11000 , thinSteps=10 
                      )
  # Get summary statistics of chain:
  summaryInfo_ANOVA = smryMCMC_ANOVA( mcmcCoda_ANOVA , 
                          datFrm=myDataFrame , xName=xName ,
                          contrasts=contrasts 
                          )

#  Display posterior information (not easy to read unless using interactively):
    plotMCMC_ANOVA( mcmcCoda_ANOVA , 
              datFrm=myDataFrame , yName=yName , xName=xName , 
              contrasts=contrasts  )

# subset summaryInfo to show HDI interval for interactions
HDI_intervals_for_interactions <- summaryInfo_ANOVA[c((nrow(summaryInfo_ANOVA) - length(contrasts) + 1):nrow(summaryInfo_ANOVA)), c("HDIlow", "HDIhigh")]
kable(round(HDI_intervals_for_interactions,3), caption = "Square A: Highest density intervals (95%) for the posterior distributions of the interactions between phases and flake mass.")

The code chunk above produced results a credibly different distributions of flake mass between the two phases. In the code chunk below we repeat the same investigation using a frequentist ANOVA.

fit <- aov(Weight ~ as.factor(phase), sq_a_flakes)
fit_summary <- summary(fit)

In the code chunk above we obtain the results of a frequentist t-test which returns a statistically significant result: F = r fit_summary[[1]]$'F value'[1], df = r fit_summary[[1]]$Df[1], p = r fit_summary[[1]]$'Pr(>F)'[1]. This shows a statistically significant change in chert flake mass, consistant with what we see in the Bayesian HDI intervals. Flakes get slightly larger in the Holocene period.

The code chunk below produces a summary table of metric attributes of chert complete cores from Jerimalai square A.

sq_a_cores_mass <- sq_a_allchert[sq_a_allchert$Artclas == "core", ]
sq_a_core_metrics <- sq_a_cores_mass %>% 
  group_by(phase) %>% 
  summarise(mean(Weight), 
            mean(Length), 
            mean(Width), 
            mean(Thick),
            sd(Weight), 
            sd(Length), 
            sd(Width), 
            sd(Thick),
            n = length(Weight))
sq_a_core_metrics_t <- t(round(sq_a_core_metrics,2))
colnames(sq_a_core_metrics_t) <- c("phase 1", "phase 2", "phase 3", "phase 4" )
kable(sq_a_core_metrics_t, caption = "Square A: Summary of chert core metrics")

There are slightly fewer cores in the Holocene, and they tend to be slightly smaller. The code chunk below creates a figure that shows the distribution of core and complete flake mass by depositional phases

# plot flake and core mass by phase in one box plot
sq_a_flakes_cores_weight <- sq_a_allchert %>% 
  filter(Artclas %in% c("flake", "core")) %>% 
  filter(Square == "A") %>% 
  select(c(Artclas, Weight, phase))

ggplot(sq_a_flakes_cores_weight, aes(fill = Artclas, as.factor(phase), Weight)) +
  geom_point(aes(colour = Artclas), size = 0.5, alpha = 0.9, shape = 1, 
             position=position_jitterdodge(dodge.width=0.9)) +
  geom_boxplot(alpha = 0.5) +
  scale_y_log10() +
  #theme_minimal(base_size = 4) +
  xlab('depositional phase') +
  ylab("mass (g)") +
  scale_fill_manual(name="artefact type",
                      labels=c("core", "complete flake"), 
                     values = c("grey10", "grey80")) +
  scale_colour_manual(name="artefact type",
                      labels=c("core", "complete flake"), 
                      values = c("grey10", "grey15")) 

The box and whisker plot above suggests that changes in the disrtbution of core and flake mass were very subtle between the two phases.

The code chunk below uses a Bayesian one-way ANOVA to investigate differences in chert core mass by depositional phase.

myDataFrame <- data.frame(phase = sq_a_cores_mass$phase, mass = sq_a_cores_mass$Weight)
yName = names(myDataFrame)[2] # mass
xName = names(myDataFrame)[1] # phase
contrasts = list(
  list( c("1") ,  c("2") , compVal=0.0 , ROPE=c(-0.1,0.1) ) , 
  list( c("2") ,  c("3") , compVal=0.0 , ROPE=c(-0.1,0.1) ), 
  list( c("3") ,  c("4") , compVal=0.0 , ROPE=c(-0.1,0.1) ))
# Generate the MCMC chain:
  mcmcCoda_ANOVA = genMCMC_ANOVA(datFrm=myDataFrame , yName=yName , xName=xName ,
                      numSavedSteps=11000 , thinSteps=10 
                      )
  # Get summary statistics of chain:
  summaryInfo_ANOVA = smryMCMC_ANOVA( mcmcCoda_ANOVA , 
                          datFrm=myDataFrame , xName=xName ,
                          contrasts=contrasts 
                          )

# #  Display posterior information (not easy to read unless using interactively):
    plotMCMC_ANOVA( mcmcCoda_ANOVA , 
              datFrm=myDataFrame , yName=yName , xName=xName , 
              contrasts=contrasts  )
#   
# subset summaryInfo to show HDI interval for interactions
HDI_intervals_for_interactions <- summaryInfo_ANOVA[c((nrow(summaryInfo_ANOVA) - length(contrasts) + 1):nrow(summaryInfo_ANOVA)), c("HDIlow", "HDIhigh")]
kable(round(HDI_intervals_for_interactions,3), caption = "Square A: Highest density intervals (95%) for the posterior distributions of the interactions between phases and core mass.")
fit <- aov(mass ~ as.factor(phase), myDataFrame)
fit_summary <- summary(fit)

In the code chunk above we obtain the results of a frequentist ANOVA which returns a statistically non-significant result: F = r fit_summary[[1]]$'F value'[1], df = r fit_summary[[1]]$Df[1], p = r fit_summary[[1]]$'Pr(>F)'[1]. This shows a non-significant change in chert core mass. This is equivalent to what we see in the Bayesian HDI interval, which includes zero, suggesting no credible difference.

The code chunk below produces the figures that illustrate the frequency of flake platform categories for chert complete flakes at Jerimalai square A.

# flake platform
plat <- dcast(sq_a_flakes, Plat ~ depth) 
rownames(plat) <- plat[,1]
# get rid of rows with no plat
plat <- plat[rownames(plat) != "",]
plat <- plat[,-1]
plat_freqs <- data.frame(plat_types = rownames(plat), Freq = rowSums(plat))
plat_freqs <- plat_freqs[plat_freqs$Freq > 90,]
plat_freqs$plat_types <- c('2-scars', '3-scars', 'focalised', 'single')
# plot freq of platform types
main <- ggplot(plat_freqs, aes(reorder(plat_types, Freq), Freq)) + 
  geom_bar(stat="identity", fill = "white", colour = "black") +
  # theme_minimal() +
  xlab("platform type") +
  ylab("frequency") +
  # remove grid lines for subplot
  theme_update(panel.background = element_blank(),
               panel.grid.major = element_blank(),
               panel.grid.minor = element_blank())
main + theme_minimal()
# raw materials by spit
# compute proportions per layer (col props)
all_tab <- data.frame()
for(i in seq(ncol(plat))){
  for(j in seq(nrow(plat))){
    all_tab[j,i] <- plat[j,i]/colSums(plat)[i]
  }
}
# check 
check <- colSums(all_tab) # should == 1
colnames(all_tab) <- colnames(plat) 
check <- rowSums(all_tab) # should be various
all_tab$plat_type <- rownames(plat) 
# get rid of raw materials that are not very abundant
all_tab <- all_tab[which(rowSums(all_tab[,1:ncol(all_tab)-1]) > 1.5) , ]
# get rid of NA column
all_tab <- all_tab[,names(all_tab) != 'NA']
# plot
all_tab_m <- melt(all_tab, id.var = 'plat_type')
# by phase
plat <- dcast(sq_a_flakes, Plat ~ phase)
rownames(plat) <- plat[,1]
# get rid of rows with no plat
plat <- plat[rownames(plat) != "",]
plat <- plat[,-1]
# raw materials by site
# compute proportions per phase (col props)
all_tab <- data.frame()
for(i in seq(ncol(plat))){
  for(j in seq(nrow(plat))){
    all_tab[j,i] <- plat[j,i]/colSums(plat)[i]
  }
}
all_tab$plat_type <- rownames(plat) 
# get rid of raw materials that are not very abundant
all_tab <- all_tab[which(rowSums(all_tab[,1:ncol(all_tab)-1]) > 0.15) , ]
# get rid of NA column
all_tab <- all_tab[,names(all_tab) != 'NA']
# plot distibution of platform sizes for each type
sq_a_flakes$Platarea <- with(sq_a_flakes, (Platthic * Platwid))
plat_area_type <- sq_a_flakes[sq_a_flakes$Plat %in% c("Single", "Focal", "2-scars", "Cort", '3-scars'),]

# make names a bit more readable
plat_area_type$Plat <- ifelse(plat_area_type$Plat == 'Cort', 'cortical',
                      ifelse(plat_area_type$Plat == 'Focal',  'focalised', 
                    ifelse(plat_area_type$Plat == 'Single',  'single', as.character(plat_area_type$Plat))))  

# put types in same order as frequency plot
plat_area_type$Plat <- factor(plat_area_type$Plat, 
                              levels = c('cortical', '3-scars','2-scars', 'focalised', 'single'), ordered = TRUE)
# remove NA
plat_area_type <- plat_area_type[!is.na(plat_area_type$Plat), ]

sub <- ggplot(plat_area_type, aes(Plat, Platarea)) +
  geom_boxplot() +
  geom_jitter(size = 0.5, alpha = 0.3, shape = 1) +
  scale_y_log10() +
  ylab(as.expression(bquote('platform area (' * mm^{2} * ")" ))) +
  xlab("") +
  theme_minimal()

# plot freq of plat type and platform area together in one plot

vp <- viewport(width = 0.45, height = 0.54, 
               x = 0.57, y = 0.4, 
               just = c("right", "bottom"))

# combine plots, print and save (wont show in console)
  png("figures/sq_A_Jeremalai-platform-area-by-plat-type.png", 
      units = "mm", w = 190, h = 190/2, res = 600)
     print(main)
     print(sub + theme_bw(base_size = 10), vp = vp)
  dev.off()

The code chunk below creates a plot showing the distributions of core and flake cortex by depositional phase at Jerimalai square A.

sq_a_flakes_cores_cortex <- sq_a_allchert[(sq_a_allchert$Artclas == "flake" | sq_a_allchert$Artclas == "core"), c('Artclas', 'Cortex', 'phase') ]
ggplot(sq_a_flakes_cores_cortex, aes( fill = Artclas, as.factor(phase), Cortex)) +
  geom_point(aes(colour = Artclas), size = 0.5, alpha = 0.9, shape = 1, 
             position=position_jitterdodge(dodge.width=0.9)) +
  geom_boxplot(alpha = 0.4) +
  scale_y_log10() +
  xlab('depositional phase') +
  ylab("percent cortex") +
  scale_fill_manual(name="artefact type",
                      labels=c("core", "complete flake"), 
                     values = c("grey10", "grey80")) +
  scale_colour_manual(name="artefact type",
                      labels=c("core", "complete flake"), 
                      values = c("grey10", "grey20")) 

The code chunk below uses a Bayesian one-way ANOVA to investigate differences in chert flake cortex by depositional phase in square A.

myDataFrame <- data.frame(phase = sq_a_flakes$phase, cortex = sq_a_flakes$Cortex)
yName = names(myDataFrame)[2] # cortex
xName = names(myDataFrame)[1] # phase
# too many zero values... makes the mode zero and breaks the model
myDataFrame  <- myDataFrame[myDataFrame$cortex != 0 & !is.na(myDataFrame$cortex), ]
contrasts = list(
  list( c("1") ,  c("2") , compVal=0.0 , ROPE=c(-0.1,0.1) ) ,
  list( c("2") ,  c("3") , compVal=0.0 , ROPE=c(-0.1,0.1) ) ,
  list( c("3") ,  c("4") , compVal=0.0 , ROPE=c(-0.1,0.1) ) )
# Generate the MCMC chain:
  mcmcCoda_ANOVA = genMCMC_ANOVA(datFrm=myDataFrame , yName=yName , xName=xName ,
                      numSavedSteps=11000 , thinSteps=10 
                      )
  # Get summary statistics of chain:
  summaryInfo_ANOVA = smryMCMC_ANOVA( mcmcCoda_ANOVA , 
                          datFrm=myDataFrame , xName=xName ,
                          contrasts=contrasts 
                          )
  # #  Display posterior information (not easy to read unless using interactively):
#     plotMCMC_ANOVA( mcmcCoda_ANOVA , 
#               datFrm=myDataFrame , yName=yName , xName=xName , 
#               contrasts=contrasts  )
#   
# subset summaryInfo to show HDI interval for interactions
HDI_intervals_for_interactions_f <- summaryInfo_ANOVA[c((nrow(summaryInfo_ANOVA) - length(contrasts) + 1):nrow(summaryInfo_ANOVA)), c("HDIlow", "HDIhigh")]
kable(round(HDI_intervals_for_interactions_f,3), caption = "Square A: Highest density intervals (95%) for the posterior distributions of the interactions between phases and flake cortex")

The code chunk above produced results that indicate that the HDI include zero, which we interpret as no credible difference. In the code chunk below we repeat the same investigation using a frequentist t-test.

myDataFrame <- data.frame(phase = sq_a_flakes$phase, cortex = sq_a_flakes$Cortex)
fit <- aov(cortex ~ as.factor(phase), myDataFrame)
fit_summary <- summary(fit)

In the code chunk above we obtain the results of a frequentist t-test which returns a statistically non-significant result: F = r fit_summary[[1]]$'F value'[1], df = r fit_summary[[1]]$Df[1], p = r fit_summary[[1]]$'Pr(>F)'[1]. This shows a non-significant change in chert flake cortex amount. This is equivalent to what we see in the Bayesian HDI interval, which includes zero, suggesting no credible difference.

In the two code chunks below we repeat the statistical tests above for core cortex.

myDataFrame <- data.frame(phase = sq_a_cores_mass$phase, cortex = sq_a_cores_mass$Cortex)
myDataFrame <- myDataFrame[complete.cases(myDataFrame),] # omit NAs
yName = names(myDataFrame)[2] # cortex
xName = names(myDataFrame)[1] # phase
contrasts = list(
  list( c("1") ,  c("2") , compVal=0.0 , ROPE=c(-0.1,0.1) ) ,
   list( c("2") ,  c("3") , compVal=0.0 , ROPE=c(-0.1,0.1) ) ,
   list( c("3") ,  c("4") , compVal=0.0 , ROPE=c(-0.1,0.1) ) )
# Generate the MCMC chain:
  mcmcCoda_ANOVA = genMCMC_ANOVA(datFrm=myDataFrame , yName=yName , xName=xName ,
                      numSavedSteps=11000 , thinSteps=10 
                      )
  # Get summary statistics of chain:
  summaryInfo_ANOVA = smryMCMC_ANOVA( mcmcCoda_ANOVA , 
                          datFrm=myDataFrame , xName=xName ,
                          contrasts=contrasts 
                          )
  # #  Display posterior information (not easy to read unless using interactively):
#     plotMCMC_ANOVA( mcmcCoda_ANOVA , 
#               datFrm=myDataFrame , yName=yName , xName=xName , 
#               contrasts=contrasts  )
#   
# subset summaryInfo to show HDI interval for interactions
HDI_intervals_for_interactions_c <- summaryInfo_ANOVA[c((nrow(summaryInfo_ANOVA) - length(contrasts) + 1):nrow(summaryInfo_ANOVA)), c("HDIlow", "HDIhigh")]
kable(round(HDI_intervals_for_interactions_c,3), caption = "Highest density intervals (95%) for the posterior distributions of the interactions between phases and core cortex")
# combine core and flake HDIs into one table
HDIs_for_cores_and_flakes <- round(cbind(HDI_intervals_for_interactions_c, HDI_intervals_for_interactions_f),3)

The code chunk above produced results that indicate that the HDIs for all phases include zero, which we interpret as no credible difference. In the code chunk below we repeat the same investigation using a frequentist ANOVA.

fit <- aov(cortex ~ as.factor(phase), myDataFrame)
fit_summary <- summary(fit)

In the code chunk above we obtain the results of a frequentist t-test which returns a statistically non-significant result: F = r fit_summary[[1]]$'F value'[1], df = r fit_summary[[1]]$Df[1], p = r fit_summary[[1]]$'Pr(>F)'[1].. This shows a non-significant change in chert core cortex amount. This is equivalent to what we see in the Bayesian HDI interval, which includes zero, suggesting no credible difference.

Results: Core technology

This code draws a plot of differences in the mass of cores recovered from Jerimalai by core type.

sq_a_core_types <- core_types  %>%  filter(Square == "A")
sq_a_core_types$Type_long <- with(sq_a_core_types, ifelse(Type == "SPC", "Single plaform",
                                     ifelse(Type == "RC", "Radial",
                                      ifelse(Type == "BDC", "Bidirectional",
                                       ifelse(Type == "BiC", "Bipolar",
                                        ifelse(Type == "MPC", "Multi-platform",
                                         ifelse(Type == "LLC", "Levallois-like",
                                          ifelse(Type == "FFC", "Faceted flake", NA))))))) )
# plot
ggplot(sq_a_core_types, aes(reorder(Type_long, -Mass,  FUN=median), Mass)) +
   geom_jitter(alpha = 0.9, shape = 1) + 
  geom_boxplot(alpha = 0.1, fill = "white", colour = "black") +
  ylim(0,20) +
  xlab("Core type") +
  ylab("Core mass (g)") +
  theme_minimal(base_size = 10)

The code chunk below computes the amount of cortex for each core type.

core_cortex <- aggregate( X..Cortex ~ Type, data = sq_a_core_types, mean)
# what is the average amount of cortex for each core type?
core_cortex_means <- arrange(core_cortex, -X..Cortex)
names(core_cortex_means) <- c("Type", "Cortex percentage")
core_cortex_means[,2] <- round(core_cortex_means[,2],1)

Multiplatform cores (r filter(core_cortex_means, Type == 'MPC')$'Cortex percentag'%) and faceted flake cores (r filter(core_cortex_means, Type == 'FFC')$'Cortex percentage'%) exhibit the most cortex. Levallois-like cores and bipolar cores exhibit the least cortex (<4%).

The code chunk below computes the numbers of flake scars by each core type.

core_scars <- sq_a_core_types %>%
  group_by(Type) %>%
  summarize(means = round(mean(Number.of.Scars, na.rm = TRUE),0),
            sds = round(sd(Number.of.Scars,  na.rm = TRUE)),0) %>%
  arrange(means)

Levallois-like cores exhibit almost twice the number of flake scars on average as other cores in the assemblage (mean = r filter(core_scars, Type == "LLC")$means scars versus r round(mean(core_scars$means),0) on average).

Results: Retouched artefacts

The code chunk below computes the number of retouched flakes in square A

# frequency of flakes with retouch per phase
rt <- sq_a_flakes[sq_a_flakes$Rtch == "Yes", ]
rt_count <- nrow(rt)

There are only r rt_count retouched peices in square A, so further analysis would not help to identify patterns in retouch activity at this location.

Results: Technological types

sq_a_techno_types <- read.csv("data/Jerimalai_technological_table_Square_A.csv")
sq_a_spits <- sq_a_techno_types[, 1]
sq_a_techno_types[!is.na(sq_a_techno_types)] <- "x"
sq_a_techno_types[is.na(sq_a_techno_types)] <- ""
sq_a_techno_types$A <- sq_a_spits
# split up
sq_a_techno_retouch <- sq_a_techno_types[, c("End.scraper"                               
 , "Side"                                      
 , "Scraper.Edge"                              
 , "notch"                                     
 , "Side.and.End"                              
 , "Double.Side.Scraper"                       
, "Double.Side.and.End.Scraper"               
, "Denticulated..Retouch"                     
, "Double.End.and.Side"                       
, "Notched.double.side.and.end"               
, "Notched.side.and.end.scraper"              
, "Notched.Double.Side"                       
, "Evidence.of.dual.hemispheres"              
, "drill.like.retouch")]
sq_a_techno_features <- sq_a_techno_types[, c("Faceting",
                                              "Brumm.and.Moore.Truncated.Flake" )]
sq_a_techno_ground <- sq_a_techno_types[, c("Ground.ochre" ,
                                              "Striated.haematite" )]
sq_a_techno_techtypes <- sq_a_techno_types[, c("Levallois.like.flake" 
, "Burin.Spall"                               
, "Burin"
, "Bipolar.Flake" 
, "Eclat.debordant" 
, "redirecting.flake" 
, "Cortical.Flake"
, "Faceted.Flake.from.Ventral"
, "Unfaceted.Flake.from.Ventral" 
, "Faceted.Point")]
sq_a_techno_cores <- sq_a_techno_types[, c("Truncated.faceted.core"                    
, "Semi.Discoidal.Core"                       
, "Flake.core.with.unfaceted.ventral.removals"
, "Bidirectional.core"                        
, "Faceted.Radial.Core.Levallois"             
, "Single.Platform.Core"                      
, "Mulitplatform.Core"                        
, "Bipolar.core"                              
           )]

sq_a_phases_techo <- sq_a_makephases(sq_a_spits)
# append phase column to each table
techno_tables <- list(sq_a_techno_retouch, sq_a_techno_features, sq_a_techno_ground, sq_a_techno_techtypes, sq_a_techno_cores)
techno_tables_out <- vector("list", length = length(techno_tables))
for(i in seq_along(techno_tables)){
  techno_tables[[i]]$phase <- sq_a_phases_techo
  techno_tables[[i]][] <- lapply(techno_tables[[i]], as.character) # change factor to character
  techno_tables[[i]][techno_tables[[i]] == 'x'] <-  1 # replace x with 1
  techno_tables[[i]][] <- lapply(techno_tables[[i]], as.numeric) # change char to num 
  techno_tables_out[[i]] <- as.data.frame(t(ddply(techno_tables[[i]], "phase", numcolwise(sum, na.rm = TRUE))))
  techno_tables_out[[i]]$type <- rownames(techno_tables_out[[i]])
}
# list to dataframe
techno_tables_comb <- bind_rows(techno_tables_out) # %>%  View
techno_tables_comb <- techno_tables_comb[,c(5,1,2,3,4)]
names(techno_tables_comb) <- c("class", "phase 1", "phase 2",  "phase 3", "phase 4")

# how many spits in each phase?
sq_a_spits_per_phase <- sq_a_allchert %>% 
  group_by(phase) %>% 
  summarise(n_spits = n_distinct(Spit))

sq_a_props <- lapply(seq_along(techno_tables_comb)[2:5],  function(i) techno_tables_comb[,i] / sq_a_spits_per_phase$n_spits[i-1] )

techno_tables_comb$`phase 1` <- sq_a_props[[1]]$`phase 1`
techno_tables_comb$`phase 2` <- sq_a_props[[2]]$`phase 2`
techno_tables_comb$`phase 3` <- sq_a_props[[3]]$`phase 3`
techno_tables_comb$`phase 4` <- sq_a_props[[4]]$`phase 4`
techno_tables_comb[2:5] <- round(techno_tables_comb[2:5], 2)
kable(techno_tables_comb, caption = "Square A: Summary of proportions and classes. Proportions refers to the proportion of spits in each depositional phase containing a given class")

Colophon

This report was generated on r Sys.time() using the following computational environment and dependences:

# which R packages and versions?
sessionInfo()

# what other pieces of software?
needs <- needs()
c(needs$depends$SystemRequirements[needs$depends$SystemRequirements != "NULL"], needs$imports$SystemRequirements[needs$imports$SystemRequirements != "NULL"])

# what commit is this file at?
repo <- repository(path = "../")
last_commit <- commits(repo)[[1]]

The current git commit of this file is r last_commit@sha, which is on the r branches(repo)[[1]]@name branch and was made by r last_commit@committer@name on r when(last_commit). The current commit message is "r last_commit@summary".



benmarwick/Pleistocene-aged-stone-artefacts-from-Jerimalai--East-Timor documentation built on May 12, 2019, 1:01 p.m.