In KopfLab/isoprocessCUBES: CUBES data processor

NOTES about data reduction and run:

Fill in at the end of your data analysis before you knit the file - include information about what you culled and why, and any other notes about the nature of the run (e.g. needle clogged, lots of samples with the wrong number of peaks, etc.)

Load libraries, data, and define standards

load necessary R libraries and data, define accepted standard values for correcting standards

To install the newest version of the isoprocessCUBES package, run the following in the console: - install.packages("devtools") - devtools::install_github("KopfLab/isoprocessCUBES")

library(tidyverse)
library(stringr)
library(openxlsx)
library(plotly)
library(isoprocessCUBES)

Document knitted with isoprocessCUBES version r packageVersion("isoprocessCUBES").

load data

session<-"XXXXXX"   #update this to name your own session

raw.data<-read.csv("Gasbench_data_template.csv")   #enter the name of your data input file here; should be saved as a .csv file
raw.data<-subset(raw.data, Peak.Nr>5 )

#input all standard values in PDB mineral values; be sure to adjust for YOUR standards

corr.std<-"HIS"    #you should type the name of whichever standard you used for both your lin.std and drift.std here
O.acc <- (11.78-30.92)/1.03092 # d18OVSMOW = 1.03092 * d18OVPDB + 30.92 - conversion from SMOW value to PDB value 
C.acc <- -4.80

mon.std<-"CU YULE"
O.acc.mon <- 6.55 #in VPDB min
C.acc.mon <- -2.28 

dis.std<-"NBS18"
O.acc.dis <- -23.2 #in VPDB min
C.acc.dis <- -5.014

# data frame overview
C.stds.table <- 
  tribble( # data frame defined by row, instead of by column
    ~std.name, ~C.acc, # define the column names
    corr.std, C.acc,   # add row one
    mon.std, C.acc.mon,# add row two
    dis.std, C.acc.dis # add row three, etc.
  )

O.stds.table <- tribble( # data frame defined by row, instead of by column
    ~std.name, ~O.acc, # define the column names
    corr.std, O.acc,   # add row one
    mon.std, O.acc.mon,# add row two
    dis.std, O.acc.dis # add row three, etc.
  )

data <- 
  raw.data %>% 
  group_by(row, Identifier1, Identifier2, mass, type) %>% 
  summarize(
    num.peaks=n(),
    d13C=mean(d13Craw),
    d13C.sd=sd(d13Craw),
    d18O=mean(d18Oraw),
    d18O.sd=sd(d18Oraw),
    area44=mean(Area44),
    area44.sd=sd(Area44),
    inv.area44=1/area44
  )

data$d18O.PDB<-(data$d18O-41.43)/1.04143 #d18O output is CO2 SMOW, so need to use the CO2 SMOW to min PDB conversion

stdev<-gather(data, key = "isotope",
              value= "stdev", d13C.sd, d18O.sd)

Initial dataset check plots and culling

First round of culling looks at standard deviations of the stable isotope values of the individual peaks (typically there are 10 peaks), and uses that information to identify analytical outliers or samples with problems or too few peaks. Often the "cutoff" of outliers tends to be sd of 0.075 - 0.1 permil

stdev<-gather(data, key = "isotope",
              value= "stdev", d13C.sd, d18O.sd)

sd.values<-ggplot(stdev, aes(x=row, y=stdev, fill=Identifier1)) +
  geom_point(size=3, shape=21) +
  theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
  guides(fill=FALSE) +
  facet_grid(. ~ isotope)

sd.hist<-ggplot(stdev, aes(x=stdev, fill=isotope)) +
  geom_histogram(binwidth=.01) +
  theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
  guides(fill=FALSE) +
  facet_grid(. ~ isotope)

sd.box<-ggplot(stdev, aes(x=isotope, y=stdev, fill=isotope)) +
  geom_boxplot()

sd.v.area44<-ggplot(stdev, aes(x=area44, y=stdev, fill=factor(num.peaks))) +
  geom_point(size=3, shape=21) +
  scale_fill_discrete() +
  facet_grid(. ~ isotope)

multiplot(sd.values, sd.hist, sd.box, sd.v.area44, cols=1)

Remove any data points that did not replicate within uncertainty for the individual peaks, redo plots - creates a "culled data" file that shows samples and standards that shouldn't be used

#adjust line 121: change the value of d18O.sd.cutoff to match the cutoff of outliers based on the previous plots; can adjust as need if more than one round of outliers need to be culled
d18O.sd.cutoff <- 100

culled.data <- subset(data, d18O.sd>d18O.sd.cutoff) 
wo.culled <- subset(data, d18O.sd<d18O.sd.cutoff)

stds1<- subset(data, type!="sample" & type!="check gas" & d18O.sd<d18O.sd.cutoff)

stdev<-gather(wo.culled, key = "isotope",
              value= "stdev", d13C.sd, d18O.sd)

sd.values<-ggplot(stdev, aes(x=row, y=stdev, fill=Identifier1)) +
  geom_point(size=3, shape=21) +
  theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
  guides(fill=FALSE) +
  facet_grid(. ~ isotope)

sd.hist<-ggplot(stdev, aes(x=stdev, fill=isotope)) +
  geom_histogram(binwidth=.005) +
  theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
  guides(fill=FALSE) +
  facet_grid(. ~ isotope)

sd.box<-ggplot(stdev, aes(x=isotope, y=stdev, fill=isotope)) +
  geom_boxplot()

sd.v.area44<-ggplot(stdev, aes(x=area44, y=stdev, fill=factor(num.peaks))) +
  geom_point(size=3, shape=21) +
  scale_fill_discrete() +
  facet_grid(. ~ isotope)

multiplot(sd.values, sd.hist, sd.v.area44, sd.box, cols=1)

Plot yields of everything, and then just the standards, using INTERACTIVE plots. Use this to cull more samples if need be, by looking for statistical outliers that coincide with yield problems. Note that the linear model in the yield.stds figure here is based on ALL the standard data, so ALL stds that fall far off the line should be excluded from corrections

Check to see that samples all fall within linearity range, and for any other obvious outliers to check:

do linearity standards cover the area space of your samples and other standards?
do all the standards show typical trends between area and mass? Do you see very general trends like that in your samples?

# adjust next line only: change #'s in stds.to.cull to reflect the row #'s that need to be culled, add row#'s as needed and rerun after looking at the new plots
stds.to.cull <- c(NA) #rows with yield problem and/or significant outlier in d13C or d18O

stds.culled <- filter(stds1, row %in% stds.to.cull)
stds <- filter(stds1, !row %in% stds.to.cull) 
culled.data<-bind_rows(culled.data, stds.culled)

stds<- data.frame(stds, cbind(predict.lm(lm(stds$area44 ~ stds$mass), interval=c("confidence"), level=0.95)))

yield.all<-ggplot(subset(data, type!="check gas"), aes(x=mass, y=area44, fill=type, label=row)) +
  geom_point(size=3, shape=22) +
  theme_bw() +
  labs(title= "all data")

yield.stds<-ggplot(stds, aes(x=mass, y=area44, label=row)) +
  stat_smooth(method="lm") +   
  geom_point(aes(fill=type, shape=type), size=2) +
  theme_bw() +
  scale_shape_manual(values=c(21,22,23,24,25)) +
  labs(title= "standards - yield")

calc_std_means_d13C <- function(df) calc_means(df, "d13C")

d13C.stds <- 
  ggplot(stds, aes(label=row)) +
  geom_hline(
    data = calc_std_means_d13C,
    mapping = aes(yintercept = yintercept, color = type, linetype = linetype)) +
  geom_point(shape=21, mapping = aes(x=area44, y=d13C, fill=type)) +
  scale_linetype_manual(values = c(1, 3, 2, 3, 2)) + 
  facet_grid(type ~ ., scales = "free") +
  theme_bw() +
  labs(title= "d13C standards - means and uncertainties")

calc_std_means_d18O <- function(df) calc_means(df, "d18O.PDB")

d18O.stds <- 
  ggplot(stds, aes(label=row)) +
  geom_hline(
    data = calc_std_means_d18O,
    mapping = aes(yintercept = yintercept, color = type, linetype = linetype)) +
  geom_point(shape=21, mapping = aes(x=area44, y=d18O.PDB, fill=type)) +
  scale_linetype_manual(values = c(1, 3, 2, 3, 2)) + 
  facet_grid(type ~ ., scales = "free") +
  theme(legend.position = "none") +
  theme_bw() +
  labs(title= "d18O standards - means and uncertainties")

ggplotly(yield.all) 
ggplotly(yield.stds)
ggplotly(d18O.stds)
ggplotly(d13C.stds)

Calculate weight percent

yield.line <- lm(stds$mass ~ stds$area44)

(yield.slope <- coef(yield.line)[[2]])
(yield.intercept <- coef(yield.line)[[1]])

data$PercentCO3 <- ((yield.slope * data$area44 + yield.intercept)/data$mass *100)

data$target.wgt.ug <-  90/(data$PercentCO3/100)

Carbon corrections

Apply basic offset correction to all of the data; does NOT include linearity or drift correction to data - culling any additional standards that should be culled (e.g. 2 sd outliers)

offsetC<-subset(stds, Identifier1==corr.std)

(offsetC.mean<-mean(offsetC$d13C))
(offsetC.sd<-sd(offsetC$d13C))

offsetC$d13C.offset <- offsetC$d13C +  (C.acc - offsetC.mean)

(offsetcorrC.mean<-mean(offsetC$d13C.offset))
(offsetcorrC.sd<-sd(offsetC$d13C.offset))

d13C.offset<-ggplot(offsetC, aes(x=area44, y=d13C.offset, shape=type)) +
  geom_point(fill="orange", size=3) +
  geom_hline(yintercept=offsetcorrC.mean, colour="orange") +
  geom_hline(yintercept=offsetcorrC.mean + offsetcorrC.sd, colour="orange", linetype="dashed") +
  geom_hline(yintercept=offsetcorrC.mean - offsetcorrC.sd, colour="orange", linetype="dashed") +
  geom_hline(yintercept=offsetcorrC.mean + 2*offsetcorrC.sd, colour="orange", linetype=3) +
  geom_hline(yintercept=offsetcorrC.mean - 2*offsetcorrC.sd, colour="orange", linetype=3) +
  scale_shape_manual(values=c(21,22,23,24,25)) +
  annotate("text", y = offsetcorrC.mean + 0.01, x = min(offsetC$area44), 
    label = paste0("mean: ", sprintf("%.2f", offsetcorrC.mean), " \U00B1 ", sprintf("%.2f", offsetcorrC.sd, 2), " \U2030 (1 sd)"),
    size = 4, hjust=0, vjust=0, parse=FALSE, colour="black") +
  theme_bw() 

d13C.offset.mass<-ggplot (stds, aes(x=area44, y=mass)) +
  stat_smooth(method="lm") +
  geom_point(data=offsetC, aes(x=area44, y=mass), shape=21, fill="orange", size = 2) +
  theme_bw()

multiplot(d13C.offset, d13C.offset.mass, cols=2)

After finalizing the offset correction, apply it to the whole dataset, and check the monitoring standards

#apply offset correction to whole dataset
data$d13C.offset <- data$d13C +  (C.acc - offsetC.mean)
stds$d13C.offset <- stds$d13C +  (C.acc - offsetC.mean)

#make monitoring standard dataset and dataset for additional standards used later for discrimination correction
offsetC.mon <- subset(stds, Identifier1==mon.std)
offsetC.dis <- subset(stds, Identifier1==dis.std)

#check monitoring standard response
(offsetC.mon.mean<-mean(offsetC.mon$d13C.offset))
(offsetC.mon.sd<-sd(offsetC.mon$d13C.offset))

C.stds.table$offsetC.mean <- c(offsetcorrC.mean, offsetC.mon.mean, mean(offsetC.dis$d13C.offset))
C.stds.table$offsetC.sd <- c(offsetcorrC.sd, offsetC.mon.sd, sd(offsetC.dis$d13C.offset))

C.mon.offset<-ggplot(offsetC.mon, aes(x=area44, y=d13C.offset)) +
  geom_point(shape=21, fill="orange") +
  geom_hline(yintercept=offsetC.mon.mean, colour="orange") +
  geom_hline(yintercept=offsetC.mon.mean + offsetC.mon.sd, colour="orange", linetype="dashed") +
  geom_hline(yintercept=offsetC.mon.mean - offsetC.mon.sd, colour="orange", linetype="dashed") +
  geom_hline(yintercept=offsetC.mon.mean + 2*offsetC.mon.sd, colour="orange", linetype=3) +
  geom_hline(yintercept=offsetC.mon.mean - 2*offsetC.mon.sd, colour="orange", linetype=3) +
  annotate("text", y = offsetC.mon.mean +0.01, x = min(offsetC.mon$area44), label = paste0("mean: ", sprintf("%.2f", offsetC.mon.mean), " \U00B1 ", sprintf("%.2f", offsetC.mon.sd, 2), " \U2030 (1 sd)"), size = 4, hjust=0, vjust=0, parse=FALSE) +
  theme_bw()

C.mon.offset.mass<-ggplot (stds, aes(x=mass, y=area44)) +
  stat_smooth(method="lm") +
  geom_point(data=offsetC.mon, aes(x=mass, y=area44), shape=21, fill="orange", size = 2) +
  theme_bw()

multiplot(C.mon.offset, C.mon.offset.mass, cols=2)

drift corrections, using raw values

driftC<-subset(stds, type=="drift.std")

drift.slopeC<-(coef(lm(driftC$d13C ~ driftC$row))[[2]])
drift.interC<-(coef(lm(driftC$d13C ~ driftC$row))[[1]])

#drift check
driftC$d13C.drift<- driftC$d13C + (C.acc - (drift.slopeC * driftC$row + drift.interC))

(driftC.mean<-mean(driftC$d13C.drift))
(driftC.sd<-sd(driftC$d13C.drift))

C.drift<-ggplot(driftC, aes(x=row, y=d13C)) +
  geom_smooth(method=lm, colour="black") +
  annotate("text", x = min(driftC$row), y = max(driftC$d13C + 0.01), label = lm_eqn(driftC$row, driftC$d13C),  size = 4, hjust=0, vjust=0, parse=TRUE, colour="black") +
  geom_point(shape=21, fill="black", size=2) +
  geom_point(aes(x=row, y=d13C.drift), fill="red", shape=21, size=2) +
  geom_hline(aes(yintercept=C.acc), size=.5) +
  geom_hline(yintercept = driftC.mean + driftC.sd, colour="red", linetype="dashed") +
  geom_hline(yintercept = driftC.mean - driftC.sd, colour="red", linetype="dashed") +
  geom_hline(yintercept = driftC.mean + 2*driftC.sd, colour="red", linetype=3) +
  geom_hline(yintercept = driftC.mean - 2*driftC.sd, colour="red", linetype=3) +
  annotate("text", 
    y = driftC.mean +0.01, 
    x = min(driftC$row),
    label = paste0("mean: ", sprintf("%.2f", driftC.mean), " \U00B1 ", sprintf("%.2f", driftC.sd, 2), " \U2030 (1 sd)"),
    size = 4, hjust=0, vjust=0, parse=FALSE) +
    theme_bw()

C.drift.mass<-ggplot(stds, aes(x=mass, y=area44)) +
  stat_smooth(method="lm") +
  geom_point(data=driftC, aes(x=mass, y=area44), shape=21, fill="red", size=3) +
  theme_bw()

multiplot(C.drift, C.drift.mass, cols=2)

apply drift correction to all of the data, check with monitoring standards

data$d13C.drift <- data$d13C +  (C.acc - (drift.slopeC * data$row + drift.interC))
stds$d13C.drift <- stds$d13C +  (C.acc - (drift.slopeC * stds$row + drift.interC))

driftC.mon<-subset(stds, Identifier1==mon.std)
driftC.dis <- subset(stds, Identifier1==dis.std)

(driftC.mon.mean<-mean(driftC.mon$d13C.drift))
(driftC.mon.sd<-sd(driftC.mon$d13C.drift))

C.stds.table$driftC.mean <- c(driftC.mean, driftC.mon.mean, mean(driftC.dis$d13C.drift))
C.stds.table$driftC.sd <- c(driftC.sd, driftC.mon.sd, sd(driftC.dis$d13C.drift))

C.mon.drift<-ggplot(driftC.mon, aes(x=area44, y=d13C.drift)) +
  geom_point(shape=21, fill="red") +
  geom_hline(yintercept = driftC.mon.mean, colour="red") +
  geom_hline(yintercept = driftC.mon.mean + driftC.mon.sd, colour="red", linetype="dashed") +
  geom_hline(yintercept = driftC.mon.mean - driftC.mon.sd, colour="red", linetype="dashed") +
  geom_hline(yintercept = driftC.mon.mean + 2*driftC.mon.sd, colour="red", linetype=3) +
  geom_hline(yintercept = driftC.mon.mean - 2*driftC.mon.sd, colour="red", linetype=3) +
  annotate("text",
    y = driftC.mon.mean +0.01, 
    x = min(driftC.mon$area44),
    label = paste0("mean: ", sprintf("%.2f", driftC.mon.mean), " \U00B1 ", sprintf("%.2f", driftC.mon.sd, 2), " \U2030 (1 sd)"),
    size = 4, hjust=0, vjust=0, parse=FALSE, colour="red")

C.mon.drift.mass<-ggplot (stds, aes(x=mass, y=area44)) +
  stat_smooth(method="lm") +
  geom_point(data=driftC.mon, aes(x=mass, y=area44), shape=21, fill="red", size = 2) +
  theme_bw()

multiplot(C.mon.drift, C.mon.drift.mass, cols=2)

Plot linearity

linC<-subset(stds, type=="lin.std") 

lin.slopeC<-(coef(lm(linC$d13C ~ linC$inv.area44))[[2]])
lin.interC<-(coef(lm(linC$d13C ~ linC$inv.area44))[[1]])

#linearity check
linC$d13C.lin<-linC$d13C + (C.acc - (lin.slopeC * linC$inv.area44 + lin.interC))

(linC.mean<-mean(linC$d13C.lin))
(linC.sd<-sd(linC$d13C.lin))

C.lin.area44<-ggplot(linC, aes(x=area44, y=d13C)) +
  geom_point(shape=21, fill="blue") +
  geom_smooth()  

C.lincorr.invarea<-ggplot(linC, aes(x=inv.area44, y=d13C)) +
  geom_smooth(method=lm) +
  annotate("text", x = min(linC$inv.area44), y = max(linC$d13C + 0.01), label = lm_eqn(linC$inv.area44, linC$d13C),  size = 4, hjust=0, vjust=0, parse=TRUE) +
  geom_point(shape=21, fill="black", size=2) +
  geom_point(aes(x=inv.area44, y=d13C.lin), fill="red", shape=22) +
  geom_hline(aes(yintercept=C.acc), size=.5) +
  geom_hline(yintercept = linC.mean + linC.sd, colour="red", linetype="dashed") +
  geom_hline(yintercept = linC.mean - linC.sd, colour="red", linetype="dashed") +
  geom_hline(yintercept = linC.mean + 2*linC.sd, colour="red", linetype=3) +
  geom_hline(yintercept = linC.mean - 2*linC.sd, colour="red", linetype=3) +
  annotate("text",
    y = linC.mean +0.01, 
    x = min(linC$inv.area44),
    label = paste0("mean: ", sprintf("%.2f", linC.mean), " \U00B1 ", sprintf("%.2f", linC.sd, 2), " \U2030 (1 sd)"),
    size = 4, hjust=0, vjust=0, parse=FALSE) +
    theme_bw()

C.lin.mass<-ggplot (stds, aes(x=mass, y=area44)) +
  stat_smooth(method="lm") +
  geom_point(data=linC, aes(x=mass, y=area44), shape=21, fill="blue", size = 2) +
  theme_bw()

multiplot(C.lin.area44, C.lincorr.invarea, C.lin.mass, cols=3)

apply linearity correction + offset correction to all of the data

data$d13C.lin <- data$d13C +  (C.acc - (lin.slopeC * data$inv.area44 + lin.interC))
stds$d13C.lin <- stds$d13C +  (C.acc - (lin.slopeC * stds$inv.area44 + lin.interC))

linC.mon<-subset(stds, Identifier1==mon.std)
linC.dis<-subset(stds, Identifier1==dis.std)

(linC.mon.mean <- mean(linC.mon$d13C.lin))
(linC.mon.sd<-sd(linC.mon$d13C.lin))

C.stds.table$linC.mean <- c(linC.mean, linC.mon.mean, mean(linC.dis$d13C.lin))
C.stds.table$linC.sd <- c(linC.sd, linC.mon.sd, sd(linC.dis$d13C.lin))

C.mon.lin<-ggplot(linC.mon, aes(x=area44, y=d13C.lin)) +
  geom_point(shape=21, fill="blue") +
  geom_hline(yintercept = linC.mon.mean, colour="blue") +
  geom_hline(yintercept = linC.mon.mean + linC.mon.sd, colour="blue", linetype="dashed") +
  geom_hline(yintercept = linC.mon.mean - linC.mon.sd, colour="blue", linetype="dashed") +
  geom_hline(yintercept = linC.mon.mean + 2*linC.mon.sd, colour="blue", linetype=3) +
  geom_hline(yintercept = linC.mon.mean - 2*linC.mon.sd, colour="blue", linetype=3) +
  annotate("text",
    y = linC.mon.mean +0.01, 
    x = min(linC.mon$area44),
    label = paste0("mean: ", sprintf("%.2f", linC.mon.mean), " \U00B1 ", sprintf("%.2f", linC.mon.sd, 2), " \U2030 (1 sd)"),
    size = 4, hjust=0, vjust=0, parse=FALSE, colour="blue")

C.mon.lin.mass<-ggplot (stds, aes(x=mass, y=area44)) +
  stat_smooth(method="lm") +
  geom_point(data=linC.mon, aes(x=mass, y=area44), shape=21, fill="blue", size = 2) +
  theme_bw()

multiplot(C.mon.lin, C.mon.lin.mass, cols=2)

drift corrections on the linearity corrected values

lindriftC <- merge(driftC, data[c("row", "d13C.lin")], by.x="row", by.y="row", all.x=TRUE, all.y=FALSE, sort=FALSE)

lindrift.slopeC<-(coef(lm(lindriftC$d13C.lin ~ lindriftC$row))[[2]])
lindrift.interC<-(coef(lm(lindriftC$d13C.lin ~ lindriftC$row))[[1]])

#drift check
lindriftC$d13C.lindrift<- lindriftC$d13C.lin + (C.acc - (lindrift.slopeC * lindriftC$row + lindrift.interC))

(lindriftC.mean<-mean(lindriftC$d13C.drift))
(lindriftC.sd<-sd(lindriftC$d13C.drift))

C.lindrift<-ggplot(lindriftC, aes(x=row, y=d13C.lin)) +
  geom_smooth(method=lm, colour="black") +
  annotate("text", x = min(lindriftC$row), y = max(lindriftC$d13C.lin + 0.01), label = lm_eqn(lindriftC$row, lindriftC$d13C.lin),  size = 4, hjust=0, vjust=0, parse=TRUE, colour="black") +
  geom_point(shape=21, fill="black", size=2) +
  geom_point(aes(x=row, y=d13C.lindrift), fill="red", shape=22, size=2) +
  geom_hline(aes(yintercept=C.acc), size=.5) +
  geom_hline(yintercept = lindriftC.mean + lindriftC.sd, colour="red", linetype="dashed") +
  geom_hline(yintercept = lindriftC.mean - lindriftC.sd, colour="red", linetype="dashed") +
  geom_hline(yintercept = lindriftC.mean + 2*lindriftC.sd, colour="red", linetype=3) +
  geom_hline(yintercept = lindriftC.mean - 2*lindriftC.sd, colour="red", linetype=3) +
  annotate("text",
    y = lindriftC.mean +0.01, 
    x = min(lindriftC$row),
    label = paste0("mean: ", sprintf("%.2f", lindriftC.mean), " \U00B1 ", sprintf("%.2f", lindriftC.sd, 2), " \U2030 (1 sd)"),
    size = 4, hjust=0, vjust=0, parse=FALSE) +
    theme_bw()

C.lindrift.mass<-ggplot(stds, aes(x=mass, y=area44)) +
  stat_smooth(method="lm") +
  geom_point(data=lindriftC, aes(x=mass, y=area44), shape=21, fill="red", size=3) +
  theme_bw()

multiplot(C.lindrift, C.lindrift.mass, cols=2)

apply drift correction to all of the linearirty corrected data, check with monitoring standards

data$d13C.lindrift <- data$d13C.lin +  (C.acc - (lindrift.slopeC * data$row + lindrift.interC))
stds$d13C.lindrift <- stds$d13C.lin +  (C.acc - (lindrift.slopeC * stds$row + lindrift.interC))

lindriftC.mon<-subset(stds, Identifier1==mon.std)
lindriftC.dis<-subset(stds, Identifier1==dis.std)

(lindriftC.mon.mean<-mean(lindriftC.mon$d13C.lindrift))
(lindriftC.mon.sd<-sd(lindriftC.mon$d13C.lindrift))

C.stds.table$lindriftC.mean <- c(lindriftC.mean, lindriftC.mon.mean, mean(lindriftC.dis$d13C.lindrift))
C.stds.table$lindriftC.sd <- c(lindriftC.sd, lindriftC.mon.sd, sd(lindriftC.dis$d13C.lindrift))

C.mon.drift<-ggplot(lindriftC.mon, aes(x=area44, y=d13C.lindrift)) +
  geom_point(shape=21, fill="red") +
  geom_hline(yintercept = lindriftC.mon.mean, colour="red") +
  geom_hline(yintercept = lindriftC.mon.mean + lindriftC.mon.sd, colour="red", linetype="dashed") +
  geom_hline(yintercept = lindriftC.mon.mean - lindriftC.mon.sd, colour="red", linetype="dashed") +
  geom_hline(yintercept = lindriftC.mon.mean + 2*lindriftC.mon.sd, colour="red", linetype=3) +
  geom_hline(yintercept = lindriftC.mon.mean - 2*lindriftC.mon.sd, colour="red", linetype=3) +
  annotate("text",
    y = lindriftC.mon.mean +0.01, 
    x = min(lindriftC.mon$area44),
    label = paste0("mean: ", sprintf("%.2f", lindriftC.mon.mean), " \U00B1 ", sprintf("%.2f", lindriftC.mon.sd, 2), " \U2030 (1 sd)"),
    size = 4, hjust=0, vjust=0, parse=FALSE, colour="red")

C.mon.drift.mass<-ggplot (stds, aes(x=mass, y=area44)) +
  stat_smooth(method="lm") +
  geom_point(data=lindriftC.mon, aes(x=mass, y=area44), shape=21, fill="red", size = 2) +
  theme_bw()

multiplot(C.mon.drift, C.mon.drift.mass, cols=2)

d13C discrimination correction - First, need to pick which correction scheme you prefer, then update this with the correct data columns

# replace the character string here with the correction column you want to use
col_for_disc <- "lindrift"  # options to substitute in here are: "offset" or "drift" or  "lin" or  "lindrift"

# resulting formulas 
col_in_C.stds.table <- paste0(col_for_disc, "C.mean")
col_in_data <- paste0("d13C.", col_for_disc)
col_for_disc_mean_eq <- list(col_for_disc = lazyeval::interp(~var, var = as.name(col_in_C.stds.table)))
correction_eq <- list(d13C.disc = lazyeval::interp(~disc.slopeC * var + disc.interC, var = as.name(col_in_data)))

# safety checks
if (!col_in_C.stds.table %in% names(C.stds.table)) stop("this column does not exist in C.stds.table: ", col_in_C.stds.table, call. = FALSE)
if (!col_in_data %in% names(data)) stop("this column does not exist in data or stds table: ", col_in_data, call. = FALSE)

# regression
m <- lm(C.acc ~ col_for_disc, data = mutate_(C.stds.table, .dots = col_for_disc_mean_eq))
disc.slopeC<-(coef(m)[[2]])
disc.interC<-(coef(m)[[1]])
R2 <- summary(m)$r.squared

# apply correction
data <- mutate_(data, .dots = correction_eq)
stds <- mutate_(stds, .dots = correction_eq)

C.disc.all <- 
  ggplot(C.stds.table, aes_string(x=col_in_C.stds.table, y="C.acc")) +
  geom_smooth(method="lm", color = "blue") +
  geom_point(data=data, aes_string(x=col_in_data, y="d13C.disc"), shape=23, fill="red", size = 2) +
  geom_point(shape=21, fill="blue", size = 4) +
  geom_text(
           x = min(C.stds.table[[col_in_C.stds.table]]), 
           y = max(C.stds.table$C.acc), 
           label = str_interp("C.acc = $[.2f]{slope} ${var} + $[.2f]{intercept} (R2: $[.4f]{R2})", 
                              list(slope = disc.slopeC, intercept = disc.interC, var = col_for_disc, R2 = R2)),
           size = 4, hjust=0, vjust=0, colour="blue") +
  labs(x = col_for_disc)

C.disc.all

discC.mon<-subset(stds, Identifier1==mon.std)
discC.dis<-subset(stds, Identifier1==dis.std)
discC.corr<-subset(stds, Identifier1==corr.std)

(discC.mon.mean<-mean(discC.mon$d13C.disc))
(discC.mon.sd<-sd(discC.mon$d13C.disc))

C.stds.table$discC.mean <- c(mean(discC.corr$d13C.disc), discC.mon.mean, mean(discC.dis$d13C.disc))
C.stds.table$discC.sd <- c(sd(discC.corr$d13C.disc), discC.mon.sd, sd(discC.dis$d13C.disc))

Oxygen corrections