Fill in at the end of your data analysis before you knit the file - include information about what you culled and why, and any other notes about the nature of the run (e.g. needle clogged, lots of samples with the wrong number of peaks, etc.)
load necessary R libraries and data, define accepted standard values for correcting standards
To install the newest version of the isoprocessCUBES package, run the following in the console: - install.packages("devtools") - devtools::install_github("KopfLab/isoprocessCUBES")
library(tidyverse) library(stringr) library(openxlsx) library(plotly) library(isoprocessCUBES)
Document knitted with isoprocessCUBES version r packageVersion("isoprocessCUBES")
.
load data
session<-"XXXXXX" #update this to name your own session raw.data<-read.csv("Gasbench_data_template.csv") #enter the name of your data input file here; should be saved as a .csv file raw.data<-subset(raw.data, Peak.Nr>5 )
#input all standard values in PDB mineral values; be sure to adjust for YOUR standards corr.std<-"HIS" #you should type the name of whichever standard you used for both your lin.std and drift.std here O.acc <- (11.78-30.92)/1.03092 # d18OVSMOW = 1.03092 * d18OVPDB + 30.92 - conversion from SMOW value to PDB value C.acc <- -4.80 mon.std<-"CU YULE" O.acc.mon <- 6.55 #in VPDB min C.acc.mon <- -2.28 dis.std<-"NBS18" O.acc.dis <- -23.2 #in VPDB min C.acc.dis <- -5.014 # data frame overview C.stds.table <- tribble( # data frame defined by row, instead of by column ~std.name, ~C.acc, # define the column names corr.std, C.acc, # add row one mon.std, C.acc.mon,# add row two dis.std, C.acc.dis # add row three, etc. ) O.stds.table <- tribble( # data frame defined by row, instead of by column ~std.name, ~O.acc, # define the column names corr.std, O.acc, # add row one mon.std, O.acc.mon,# add row two dis.std, O.acc.dis # add row three, etc. )
data <- raw.data %>% group_by(row, Identifier1, Identifier2, mass, type) %>% summarize( num.peaks=n(), d13C=mean(d13Craw), d13C.sd=sd(d13Craw), d18O=mean(d18Oraw), d18O.sd=sd(d18Oraw), area44=mean(Area44), area44.sd=sd(Area44), inv.area44=1/area44 ) data$d18O.PDB<-(data$d18O-41.43)/1.04143 #d18O output is CO2 SMOW, so need to use the CO2 SMOW to min PDB conversion stdev<-gather(data, key = "isotope", value= "stdev", d13C.sd, d18O.sd)
First round of culling looks at standard deviations of the stable isotope values of the individual peaks (typically there are 10 peaks), and uses that information to identify analytical outliers or samples with problems or too few peaks. Often the "cutoff" of outliers tends to be sd of 0.075 - 0.1 permil
stdev<-gather(data, key = "isotope", value= "stdev", d13C.sd, d18O.sd) sd.values<-ggplot(stdev, aes(x=row, y=stdev, fill=Identifier1)) + geom_point(size=3, shape=21) + theme(axis.text.x = element_text(angle = 90, hjust = 1)) + guides(fill=FALSE) + facet_grid(. ~ isotope) sd.hist<-ggplot(stdev, aes(x=stdev, fill=isotope)) + geom_histogram(binwidth=.01) + theme(axis.text.x = element_text(angle = 90, hjust = 1)) + guides(fill=FALSE) + facet_grid(. ~ isotope) sd.box<-ggplot(stdev, aes(x=isotope, y=stdev, fill=isotope)) + geom_boxplot() sd.v.area44<-ggplot(stdev, aes(x=area44, y=stdev, fill=factor(num.peaks))) + geom_point(size=3, shape=21) + scale_fill_discrete() + facet_grid(. ~ isotope) multiplot(sd.values, sd.hist, sd.box, sd.v.area44, cols=1)
Remove any data points that did not replicate within uncertainty for the individual peaks, redo plots - creates a "culled data" file that shows samples and standards that shouldn't be used
#adjust line 121: change the value of d18O.sd.cutoff to match the cutoff of outliers based on the previous plots; can adjust as need if more than one round of outliers need to be culled d18O.sd.cutoff <- 100 culled.data <- subset(data, d18O.sd>d18O.sd.cutoff) wo.culled <- subset(data, d18O.sd<d18O.sd.cutoff) stds1<- subset(data, type!="sample" & type!="check gas" & d18O.sd<d18O.sd.cutoff) stdev<-gather(wo.culled, key = "isotope", value= "stdev", d13C.sd, d18O.sd) sd.values<-ggplot(stdev, aes(x=row, y=stdev, fill=Identifier1)) + geom_point(size=3, shape=21) + theme(axis.text.x = element_text(angle = 90, hjust = 1)) + guides(fill=FALSE) + facet_grid(. ~ isotope) sd.hist<-ggplot(stdev, aes(x=stdev, fill=isotope)) + geom_histogram(binwidth=.005) + theme(axis.text.x = element_text(angle = 90, hjust = 1)) + guides(fill=FALSE) + facet_grid(. ~ isotope) sd.box<-ggplot(stdev, aes(x=isotope, y=stdev, fill=isotope)) + geom_boxplot() sd.v.area44<-ggplot(stdev, aes(x=area44, y=stdev, fill=factor(num.peaks))) + geom_point(size=3, shape=21) + scale_fill_discrete() + facet_grid(. ~ isotope) multiplot(sd.values, sd.hist, sd.v.area44, sd.box, cols=1)
Plot yields of everything, and then just the standards, using INTERACTIVE plots. Use this to cull more samples if need be, by looking for statistical outliers that coincide with yield problems. Note that the linear model in the yield.stds figure here is based on ALL the standard data, so ALL stds that fall far off the line should be excluded from corrections
Check to see that samples all fall within linearity range, and for any other obvious outliers to check:
# adjust next line only: change #'s in stds.to.cull to reflect the row #'s that need to be culled, add row#'s as needed and rerun after looking at the new plots stds.to.cull <- c(NA) #rows with yield problem and/or significant outlier in d13C or d18O stds.culled <- filter(stds1, row %in% stds.to.cull) stds <- filter(stds1, !row %in% stds.to.cull) culled.data<-bind_rows(culled.data, stds.culled) stds<- data.frame(stds, cbind(predict.lm(lm(stds$area44 ~ stds$mass), interval=c("confidence"), level=0.95))) yield.all<-ggplot(subset(data, type!="check gas"), aes(x=mass, y=area44, fill=type, label=row)) + geom_point(size=3, shape=22) + theme_bw() + labs(title= "all data") yield.stds<-ggplot(stds, aes(x=mass, y=area44, label=row)) + stat_smooth(method="lm") + geom_point(aes(fill=type, shape=type), size=2) + theme_bw() + scale_shape_manual(values=c(21,22,23,24,25)) + labs(title= "standards - yield") calc_std_means_d13C <- function(df) calc_means(df, "d13C") d13C.stds <- ggplot(stds, aes(label=row)) + geom_hline( data = calc_std_means_d13C, mapping = aes(yintercept = yintercept, color = type, linetype = linetype)) + geom_point(shape=21, mapping = aes(x=area44, y=d13C, fill=type)) + scale_linetype_manual(values = c(1, 3, 2, 3, 2)) + facet_grid(type ~ ., scales = "free") + theme_bw() + labs(title= "d13C standards - means and uncertainties") calc_std_means_d18O <- function(df) calc_means(df, "d18O.PDB") d18O.stds <- ggplot(stds, aes(label=row)) + geom_hline( data = calc_std_means_d18O, mapping = aes(yintercept = yintercept, color = type, linetype = linetype)) + geom_point(shape=21, mapping = aes(x=area44, y=d18O.PDB, fill=type)) + scale_linetype_manual(values = c(1, 3, 2, 3, 2)) + facet_grid(type ~ ., scales = "free") + theme(legend.position = "none") + theme_bw() + labs(title= "d18O standards - means and uncertainties") ggplotly(yield.all) ggplotly(yield.stds) ggplotly(d18O.stds) ggplotly(d13C.stds)
yield.line <- lm(stds$mass ~ stds$area44) (yield.slope <- coef(yield.line)[[2]]) (yield.intercept <- coef(yield.line)[[1]]) data$PercentCO3 <- ((yield.slope * data$area44 + yield.intercept)/data$mass *100) data$target.wgt.ug <- 90/(data$PercentCO3/100)
Apply basic offset correction to all of the data; does NOT include linearity or drift correction to data - culling any additional standards that should be culled (e.g. 2 sd outliers)
offsetC<-subset(stds, Identifier1==corr.std) (offsetC.mean<-mean(offsetC$d13C)) (offsetC.sd<-sd(offsetC$d13C)) offsetC$d13C.offset <- offsetC$d13C + (C.acc - offsetC.mean) (offsetcorrC.mean<-mean(offsetC$d13C.offset)) (offsetcorrC.sd<-sd(offsetC$d13C.offset)) d13C.offset<-ggplot(offsetC, aes(x=area44, y=d13C.offset, shape=type)) + geom_point(fill="orange", size=3) + geom_hline(yintercept=offsetcorrC.mean, colour="orange") + geom_hline(yintercept=offsetcorrC.mean + offsetcorrC.sd, colour="orange", linetype="dashed") + geom_hline(yintercept=offsetcorrC.mean - offsetcorrC.sd, colour="orange", linetype="dashed") + geom_hline(yintercept=offsetcorrC.mean + 2*offsetcorrC.sd, colour="orange", linetype=3) + geom_hline(yintercept=offsetcorrC.mean - 2*offsetcorrC.sd, colour="orange", linetype=3) + scale_shape_manual(values=c(21,22,23,24,25)) + annotate("text", y = offsetcorrC.mean + 0.01, x = min(offsetC$area44), label = paste0("mean: ", sprintf("%.2f", offsetcorrC.mean), " \U00B1 ", sprintf("%.2f", offsetcorrC.sd, 2), " \U2030 (1 sd)"), size = 4, hjust=0, vjust=0, parse=FALSE, colour="black") + theme_bw() d13C.offset.mass<-ggplot (stds, aes(x=area44, y=mass)) + stat_smooth(method="lm") + geom_point(data=offsetC, aes(x=area44, y=mass), shape=21, fill="orange", size = 2) + theme_bw() multiplot(d13C.offset, d13C.offset.mass, cols=2)
After finalizing the offset correction, apply it to the whole dataset, and check the monitoring standards
#apply offset correction to whole dataset data$d13C.offset <- data$d13C + (C.acc - offsetC.mean) stds$d13C.offset <- stds$d13C + (C.acc - offsetC.mean) #make monitoring standard dataset and dataset for additional standards used later for discrimination correction offsetC.mon <- subset(stds, Identifier1==mon.std) offsetC.dis <- subset(stds, Identifier1==dis.std) #check monitoring standard response (offsetC.mon.mean<-mean(offsetC.mon$d13C.offset)) (offsetC.mon.sd<-sd(offsetC.mon$d13C.offset)) C.stds.table$offsetC.mean <- c(offsetcorrC.mean, offsetC.mon.mean, mean(offsetC.dis$d13C.offset)) C.stds.table$offsetC.sd <- c(offsetcorrC.sd, offsetC.mon.sd, sd(offsetC.dis$d13C.offset)) C.mon.offset<-ggplot(offsetC.mon, aes(x=area44, y=d13C.offset)) + geom_point(shape=21, fill="orange") + geom_hline(yintercept=offsetC.mon.mean, colour="orange") + geom_hline(yintercept=offsetC.mon.mean + offsetC.mon.sd, colour="orange", linetype="dashed") + geom_hline(yintercept=offsetC.mon.mean - offsetC.mon.sd, colour="orange", linetype="dashed") + geom_hline(yintercept=offsetC.mon.mean + 2*offsetC.mon.sd, colour="orange", linetype=3) + geom_hline(yintercept=offsetC.mon.mean - 2*offsetC.mon.sd, colour="orange", linetype=3) + annotate("text", y = offsetC.mon.mean +0.01, x = min(offsetC.mon$area44), label = paste0("mean: ", sprintf("%.2f", offsetC.mon.mean), " \U00B1 ", sprintf("%.2f", offsetC.mon.sd, 2), " \U2030 (1 sd)"), size = 4, hjust=0, vjust=0, parse=FALSE) + theme_bw() C.mon.offset.mass<-ggplot (stds, aes(x=mass, y=area44)) + stat_smooth(method="lm") + geom_point(data=offsetC.mon, aes(x=mass, y=area44), shape=21, fill="orange", size = 2) + theme_bw() multiplot(C.mon.offset, C.mon.offset.mass, cols=2)
drift corrections, using raw values
driftC<-subset(stds, type=="drift.std") drift.slopeC<-(coef(lm(driftC$d13C ~ driftC$row))[[2]]) drift.interC<-(coef(lm(driftC$d13C ~ driftC$row))[[1]]) #drift check driftC$d13C.drift<- driftC$d13C + (C.acc - (drift.slopeC * driftC$row + drift.interC)) (driftC.mean<-mean(driftC$d13C.drift)) (driftC.sd<-sd(driftC$d13C.drift)) C.drift<-ggplot(driftC, aes(x=row, y=d13C)) + geom_smooth(method=lm, colour="black") + annotate("text", x = min(driftC$row), y = max(driftC$d13C + 0.01), label = lm_eqn(driftC$row, driftC$d13C), size = 4, hjust=0, vjust=0, parse=TRUE, colour="black") + geom_point(shape=21, fill="black", size=2) + geom_point(aes(x=row, y=d13C.drift), fill="red", shape=21, size=2) + geom_hline(aes(yintercept=C.acc), size=.5) + geom_hline(yintercept = driftC.mean + driftC.sd, colour="red", linetype="dashed") + geom_hline(yintercept = driftC.mean - driftC.sd, colour="red", linetype="dashed") + geom_hline(yintercept = driftC.mean + 2*driftC.sd, colour="red", linetype=3) + geom_hline(yintercept = driftC.mean - 2*driftC.sd, colour="red", linetype=3) + annotate("text", y = driftC.mean +0.01, x = min(driftC$row), label = paste0("mean: ", sprintf("%.2f", driftC.mean), " \U00B1 ", sprintf("%.2f", driftC.sd, 2), " \U2030 (1 sd)"), size = 4, hjust=0, vjust=0, parse=FALSE) + theme_bw() C.drift.mass<-ggplot(stds, aes(x=mass, y=area44)) + stat_smooth(method="lm") + geom_point(data=driftC, aes(x=mass, y=area44), shape=21, fill="red", size=3) + theme_bw() multiplot(C.drift, C.drift.mass, cols=2)
apply drift correction to all of the data, check with monitoring standards
data$d13C.drift <- data$d13C + (C.acc - (drift.slopeC * data$row + drift.interC)) stds$d13C.drift <- stds$d13C + (C.acc - (drift.slopeC * stds$row + drift.interC)) driftC.mon<-subset(stds, Identifier1==mon.std) driftC.dis <- subset(stds, Identifier1==dis.std) (driftC.mon.mean<-mean(driftC.mon$d13C.drift)) (driftC.mon.sd<-sd(driftC.mon$d13C.drift)) C.stds.table$driftC.mean <- c(driftC.mean, driftC.mon.mean, mean(driftC.dis$d13C.drift)) C.stds.table$driftC.sd <- c(driftC.sd, driftC.mon.sd, sd(driftC.dis$d13C.drift)) C.mon.drift<-ggplot(driftC.mon, aes(x=area44, y=d13C.drift)) + geom_point(shape=21, fill="red") + geom_hline(yintercept = driftC.mon.mean, colour="red") + geom_hline(yintercept = driftC.mon.mean + driftC.mon.sd, colour="red", linetype="dashed") + geom_hline(yintercept = driftC.mon.mean - driftC.mon.sd, colour="red", linetype="dashed") + geom_hline(yintercept = driftC.mon.mean + 2*driftC.mon.sd, colour="red", linetype=3) + geom_hline(yintercept = driftC.mon.mean - 2*driftC.mon.sd, colour="red", linetype=3) + annotate("text", y = driftC.mon.mean +0.01, x = min(driftC.mon$area44), label = paste0("mean: ", sprintf("%.2f", driftC.mon.mean), " \U00B1 ", sprintf("%.2f", driftC.mon.sd, 2), " \U2030 (1 sd)"), size = 4, hjust=0, vjust=0, parse=FALSE, colour="red") C.mon.drift.mass<-ggplot (stds, aes(x=mass, y=area44)) + stat_smooth(method="lm") + geom_point(data=driftC.mon, aes(x=mass, y=area44), shape=21, fill="red", size = 2) + theme_bw() multiplot(C.mon.drift, C.mon.drift.mass, cols=2)
Plot linearity
linC<-subset(stds, type=="lin.std") lin.slopeC<-(coef(lm(linC$d13C ~ linC$inv.area44))[[2]]) lin.interC<-(coef(lm(linC$d13C ~ linC$inv.area44))[[1]]) #linearity check linC$d13C.lin<-linC$d13C + (C.acc - (lin.slopeC * linC$inv.area44 + lin.interC)) (linC.mean<-mean(linC$d13C.lin)) (linC.sd<-sd(linC$d13C.lin)) C.lin.area44<-ggplot(linC, aes(x=area44, y=d13C)) + geom_point(shape=21, fill="blue") + geom_smooth() C.lincorr.invarea<-ggplot(linC, aes(x=inv.area44, y=d13C)) + geom_smooth(method=lm) + annotate("text", x = min(linC$inv.area44), y = max(linC$d13C + 0.01), label = lm_eqn(linC$inv.area44, linC$d13C), size = 4, hjust=0, vjust=0, parse=TRUE) + geom_point(shape=21, fill="black", size=2) + geom_point(aes(x=inv.area44, y=d13C.lin), fill="red", shape=22) + geom_hline(aes(yintercept=C.acc), size=.5) + geom_hline(yintercept = linC.mean + linC.sd, colour="red", linetype="dashed") + geom_hline(yintercept = linC.mean - linC.sd, colour="red", linetype="dashed") + geom_hline(yintercept = linC.mean + 2*linC.sd, colour="red", linetype=3) + geom_hline(yintercept = linC.mean - 2*linC.sd, colour="red", linetype=3) + annotate("text", y = linC.mean +0.01, x = min(linC$inv.area44), label = paste0("mean: ", sprintf("%.2f", linC.mean), " \U00B1 ", sprintf("%.2f", linC.sd, 2), " \U2030 (1 sd)"), size = 4, hjust=0, vjust=0, parse=FALSE) + theme_bw() C.lin.mass<-ggplot (stds, aes(x=mass, y=area44)) + stat_smooth(method="lm") + geom_point(data=linC, aes(x=mass, y=area44), shape=21, fill="blue", size = 2) + theme_bw() multiplot(C.lin.area44, C.lincorr.invarea, C.lin.mass, cols=3)
apply linearity correction + offset correction to all of the data
data$d13C.lin <- data$d13C + (C.acc - (lin.slopeC * data$inv.area44 + lin.interC)) stds$d13C.lin <- stds$d13C + (C.acc - (lin.slopeC * stds$inv.area44 + lin.interC)) linC.mon<-subset(stds, Identifier1==mon.std) linC.dis<-subset(stds, Identifier1==dis.std) (linC.mon.mean <- mean(linC.mon$d13C.lin)) (linC.mon.sd<-sd(linC.mon$d13C.lin)) C.stds.table$linC.mean <- c(linC.mean, linC.mon.mean, mean(linC.dis$d13C.lin)) C.stds.table$linC.sd <- c(linC.sd, linC.mon.sd, sd(linC.dis$d13C.lin)) C.mon.lin<-ggplot(linC.mon, aes(x=area44, y=d13C.lin)) + geom_point(shape=21, fill="blue") + geom_hline(yintercept = linC.mon.mean, colour="blue") + geom_hline(yintercept = linC.mon.mean + linC.mon.sd, colour="blue", linetype="dashed") + geom_hline(yintercept = linC.mon.mean - linC.mon.sd, colour="blue", linetype="dashed") + geom_hline(yintercept = linC.mon.mean + 2*linC.mon.sd, colour="blue", linetype=3) + geom_hline(yintercept = linC.mon.mean - 2*linC.mon.sd, colour="blue", linetype=3) + annotate("text", y = linC.mon.mean +0.01, x = min(linC.mon$area44), label = paste0("mean: ", sprintf("%.2f", linC.mon.mean), " \U00B1 ", sprintf("%.2f", linC.mon.sd, 2), " \U2030 (1 sd)"), size = 4, hjust=0, vjust=0, parse=FALSE, colour="blue") C.mon.lin.mass<-ggplot (stds, aes(x=mass, y=area44)) + stat_smooth(method="lm") + geom_point(data=linC.mon, aes(x=mass, y=area44), shape=21, fill="blue", size = 2) + theme_bw() multiplot(C.mon.lin, C.mon.lin.mass, cols=2)
drift corrections on the linearity corrected values
lindriftC <- merge(driftC, data[c("row", "d13C.lin")], by.x="row", by.y="row", all.x=TRUE, all.y=FALSE, sort=FALSE) lindrift.slopeC<-(coef(lm(lindriftC$d13C.lin ~ lindriftC$row))[[2]]) lindrift.interC<-(coef(lm(lindriftC$d13C.lin ~ lindriftC$row))[[1]]) #drift check lindriftC$d13C.lindrift<- lindriftC$d13C.lin + (C.acc - (lindrift.slopeC * lindriftC$row + lindrift.interC)) (lindriftC.mean<-mean(lindriftC$d13C.drift)) (lindriftC.sd<-sd(lindriftC$d13C.drift)) C.lindrift<-ggplot(lindriftC, aes(x=row, y=d13C.lin)) + geom_smooth(method=lm, colour="black") + annotate("text", x = min(lindriftC$row), y = max(lindriftC$d13C.lin + 0.01), label = lm_eqn(lindriftC$row, lindriftC$d13C.lin), size = 4, hjust=0, vjust=0, parse=TRUE, colour="black") + geom_point(shape=21, fill="black", size=2) + geom_point(aes(x=row, y=d13C.lindrift), fill="red", shape=22, size=2) + geom_hline(aes(yintercept=C.acc), size=.5) + geom_hline(yintercept = lindriftC.mean + lindriftC.sd, colour="red", linetype="dashed") + geom_hline(yintercept = lindriftC.mean - lindriftC.sd, colour="red", linetype="dashed") + geom_hline(yintercept = lindriftC.mean + 2*lindriftC.sd, colour="red", linetype=3) + geom_hline(yintercept = lindriftC.mean - 2*lindriftC.sd, colour="red", linetype=3) + annotate("text", y = lindriftC.mean +0.01, x = min(lindriftC$row), label = paste0("mean: ", sprintf("%.2f", lindriftC.mean), " \U00B1 ", sprintf("%.2f", lindriftC.sd, 2), " \U2030 (1 sd)"), size = 4, hjust=0, vjust=0, parse=FALSE) + theme_bw() C.lindrift.mass<-ggplot(stds, aes(x=mass, y=area44)) + stat_smooth(method="lm") + geom_point(data=lindriftC, aes(x=mass, y=area44), shape=21, fill="red", size=3) + theme_bw() multiplot(C.lindrift, C.lindrift.mass, cols=2)
apply drift correction to all of the linearirty corrected data, check with monitoring standards
data$d13C.lindrift <- data$d13C.lin + (C.acc - (lindrift.slopeC * data$row + lindrift.interC)) stds$d13C.lindrift <- stds$d13C.lin + (C.acc - (lindrift.slopeC * stds$row + lindrift.interC)) lindriftC.mon<-subset(stds, Identifier1==mon.std) lindriftC.dis<-subset(stds, Identifier1==dis.std) (lindriftC.mon.mean<-mean(lindriftC.mon$d13C.lindrift)) (lindriftC.mon.sd<-sd(lindriftC.mon$d13C.lindrift)) C.stds.table$lindriftC.mean <- c(lindriftC.mean, lindriftC.mon.mean, mean(lindriftC.dis$d13C.lindrift)) C.stds.table$lindriftC.sd <- c(lindriftC.sd, lindriftC.mon.sd, sd(lindriftC.dis$d13C.lindrift)) C.mon.drift<-ggplot(lindriftC.mon, aes(x=area44, y=d13C.lindrift)) + geom_point(shape=21, fill="red") + geom_hline(yintercept = lindriftC.mon.mean, colour="red") + geom_hline(yintercept = lindriftC.mon.mean + lindriftC.mon.sd, colour="red", linetype="dashed") + geom_hline(yintercept = lindriftC.mon.mean - lindriftC.mon.sd, colour="red", linetype="dashed") + geom_hline(yintercept = lindriftC.mon.mean + 2*lindriftC.mon.sd, colour="red", linetype=3) + geom_hline(yintercept = lindriftC.mon.mean - 2*lindriftC.mon.sd, colour="red", linetype=3) + annotate("text", y = lindriftC.mon.mean +0.01, x = min(lindriftC.mon$area44), label = paste0("mean: ", sprintf("%.2f", lindriftC.mon.mean), " \U00B1 ", sprintf("%.2f", lindriftC.mon.sd, 2), " \U2030 (1 sd)"), size = 4, hjust=0, vjust=0, parse=FALSE, colour="red") C.mon.drift.mass<-ggplot (stds, aes(x=mass, y=area44)) + stat_smooth(method="lm") + geom_point(data=lindriftC.mon, aes(x=mass, y=area44), shape=21, fill="red", size = 2) + theme_bw() multiplot(C.mon.drift, C.mon.drift.mass, cols=2)
d13C discrimination correction - First, need to pick which correction scheme you prefer, then update this with the correct data columns
# replace the character string here with the correction column you want to use col_for_disc <- "lindrift" # options to substitute in here are: "offset" or "drift" or "lin" or "lindrift" # resulting formulas col_in_C.stds.table <- paste0(col_for_disc, "C.mean") col_in_data <- paste0("d13C.", col_for_disc) col_for_disc_mean_eq <- list(col_for_disc = lazyeval::interp(~var, var = as.name(col_in_C.stds.table))) correction_eq <- list(d13C.disc = lazyeval::interp(~disc.slopeC * var + disc.interC, var = as.name(col_in_data))) # safety checks if (!col_in_C.stds.table %in% names(C.stds.table)) stop("this column does not exist in C.stds.table: ", col_in_C.stds.table, call. = FALSE) if (!col_in_data %in% names(data)) stop("this column does not exist in data or stds table: ", col_in_data, call. = FALSE) # regression m <- lm(C.acc ~ col_for_disc, data = mutate_(C.stds.table, .dots = col_for_disc_mean_eq)) disc.slopeC<-(coef(m)[[2]]) disc.interC<-(coef(m)[[1]]) R2 <- summary(m)$r.squared # apply correction data <- mutate_(data, .dots = correction_eq) stds <- mutate_(stds, .dots = correction_eq) C.disc.all <- ggplot(C.stds.table, aes_string(x=col_in_C.stds.table, y="C.acc")) + geom_smooth(method="lm", color = "blue") + geom_point(data=data, aes_string(x=col_in_data, y="d13C.disc"), shape=23, fill="red", size = 2) + geom_point(shape=21, fill="blue", size = 4) + geom_text( x = min(C.stds.table[[col_in_C.stds.table]]), y = max(C.stds.table$C.acc), label = str_interp("C.acc = $[.2f]{slope} ${var} + $[.2f]{intercept} (R2: $[.4f]{R2})", list(slope = disc.slopeC, intercept = disc.interC, var = col_for_disc, R2 = R2)), size = 4, hjust=0, vjust=0, colour="blue") + labs(x = col_for_disc) C.disc.all
discC.mon<-subset(stds, Identifier1==mon.std) discC.dis<-subset(stds, Identifier1==dis.std) discC.corr<-subset(stds, Identifier1==corr.std) (discC.mon.mean<-mean(discC.mon$d13C.disc)) (discC.mon.sd<-sd(discC.mon$d13C.disc)) C.stds.table$discC.mean <- c(mean(discC.corr$d13C.disc), discC.mon.mean, mean(discC.dis$d13C.disc)) C.stds.table$discC.sd <- c(sd(discC.corr$d13C.disc), discC.mon.sd, sd(discC.dis$d13C.disc))
Apply basic offset correction to all of the data; does NOT include linearity or drift correction to data - culling any additional standards that should be culled (e.g. 2 sd outliers)
offsetO<-subset(stds, Identifier1==corr.std) (offsetO.mean<-mean(offsetO$d18O.PDB)) (offsetO.sd<-sd(offsetO$d18O.PDB)) offsetO$d18O.offset <- offsetO$d18O.PDB + (O.acc - offsetO.mean) (offsetcorrO.mean<-mean(offsetO$d18O.offset)) (offsetcorrO.sd<-sd(offsetO$d18O.offset)) d18O.offset<-ggplot(offsetO, aes(x=area44, y=d18O.offset, shape=type)) + geom_point(fill="orange", size=3) + geom_hline(yintercept=offsetcorrO.mean, colour="orange") + geom_hline(yintercept=offsetcorrO.mean + offsetcorrO.sd, colour="orange", linetype="dashed") + geom_hline(yintercept=offsetcorrO.mean - offsetcorrO.sd, colour="orange", linetype="dashed") + geom_hline(yintercept=offsetcorrO.mean + 2*offsetcorrO.sd, colour="orange", linetype=3) + geom_hline(yintercept=offsetcorrO.mean - 2*offsetcorrO.sd, colour="orange", linetype=3) + scale_shape_manual(values=c(21,22,23,24,25)) + annotate("text", y = offsetcorrO.mean + 0.01, x = min(offsetO$area44), label = paste0("mean: ", sprintf("%.2f", offsetcorrO.mean), " \U00B1 ", sprintf("%.2f", offsetcorrO.sd, 2), " \U2030 (1 sd)"), size = 4, hjust=0, vjust=0, parse=FALSE, colour="black") + theme_bw() d18O.offset.mass<-ggplot (stds, aes(x=area44, y=mass)) + stat_smooth(method="lm") + geom_point(data=offsetO, aes(x=area44, y=mass), shape=21, fill="orange", size = 2) + theme_bw() multiplot(d18O.offset, d18O.offset.mass, cols=2)
After finalizing the offset correction, apply it to the whole dataset, and check the monitoring standards
#apply offset correction to whole dataset data$d18O.offset <- data$d18O.PDB + (O.acc - offsetO.mean) stds$d18O.offset <- stds$d18O.PDB + (O.acc - offsetO.mean) #make monitoring standard dataset and dataset for additional standards used later for discrimination correction offsetO.mon <- subset(stds, Identifier1==mon.std) offsetO.dis <- subset(stds, Identifier1==dis.std) #check monitoring standard response (offsetO.mon.mean<-mean(offsetO.mon$d18O.offset)) (offsetO.mon.sd<-sd(offsetO.mon$d18O.offset)) O.stds.table$offsetO.mean <- c(offsetcorrO.mean, offsetO.mon.mean, mean(offsetO.dis$d18O.offset)) O.stds.table$offsetO.sd <- c(offsetcorrO.sd, offsetO.mon.sd, sd(offsetO.dis$d18O.offset)) O.mon.offset<-ggplot(offsetO.mon, aes(x=area44, y=d18O.offset)) + geom_point(shape=21, fill="orange") + geom_hline(yintercept=offsetO.mon.mean, colour="orange") + geom_hline(yintercept=offsetO.mon.mean + offsetO.mon.sd, colour="orange", linetype="dashed") + geom_hline(yintercept=offsetO.mon.mean - offsetO.mon.sd, colour="orange", linetype="dashed") + geom_hline(yintercept=offsetO.mon.mean + 2*offsetO.mon.sd, colour="orange", linetype=3) + geom_hline(yintercept=offsetO.mon.mean - 2*offsetO.mon.sd, colour="orange", linetype=3) + annotate("text", y = offsetO.mon.mean +0.01, x = min(offsetO.mon$area44), label = paste0("mean: ", sprintf("%.2f", offsetO.mon.mean), " \U00B1 ", sprintf("%.2f", offsetO.mon.sd, 2), " \U2030 (1 sd)"), size = 4, hjust=0, vjust=0, parse=FALSE) + theme_bw() O.mon.offset.mass<-ggplot (stds, aes(x=mass, y=area44)) + stat_smooth(method="lm") + geom_point(data=offsetO.mon, aes(x=mass, y=area44), shape=21, fill="orange", size = 2) + theme_bw() multiplot(O.mon.offset, O.mon.offset.mass, cols=2)
drift corrections, using raw values
driftO<-subset(stds, type=="drift.std") drift.slopeO<-(coef(lm(driftO$d18O.PDB ~ driftO$row))[[2]]) drift.interO<-(coef(lm(driftO$d18O.PDB ~ driftO$row))[[1]]) #drift check driftO$d18O.drift<- driftO$d18O.PDB + (O.acc - (drift.slopeO * driftO$row + drift.interO)) (driftO.mean<-mean(driftO$d18O.drift)) (driftO.sd<-sd(driftO$d18O.drift)) O.drift<-ggplot(driftO, aes(x=row, y=d18O.PDB)) + geom_smooth(method=lm, colour="black") + annotate("text", x = min(driftO$row), y = max(driftO$d18O.PDB + 0.01), label = lm_eqn(driftO$row, driftO$d18O.PDB), size = 4, hjust=0, vjust=0, parse=TRUE, colour="black") + geom_point(shape=21, fill="black", size=2) + geom_point(aes(x=row, y=d18O.drift), fill="red", shape=22, size=2) + geom_hline(aes(yintercept=O.acc), size=.5) + geom_hline(yintercept = driftO.mean + driftO.sd, colour="red", linetype="dashed") + geom_hline(yintercept = driftO.mean - driftO.sd, colour="red", linetype="dashed") + geom_hline(yintercept = driftO.mean + 2*driftO.sd, colour="red", linetype=3) + geom_hline(yintercept = driftO.mean - 2*driftO.sd, colour="red", linetype=3) + annotate("text", y = driftO.mean +0.01, x = min(driftO$row), label = paste0("mean: ", sprintf("%.2f", driftO.mean), " \U00B1 ", sprintf("%.2f", driftO.sd, 2), " \U2030 (1 sd)"), size = 4, hjust=0, vjust=0, parse=FALSE) + theme_bw() O.drift.mass<-ggplot(stds, aes(x=mass, y=area44)) + stat_smooth(method="lm") + geom_point(data=driftO, aes(x=mass, y=area44), shape=21, fill="red", size=3) + theme_bw() multiplot(O.drift, O.drift.mass, cols=2)
apply drift correction to all of the data, check with monitoring standards
data$d18O.drift <- data$d18O.PDB + (O.acc - (drift.slopeO * data$row + drift.interO)) stds$d18O.drift <- stds$d18O.PDB + (O.acc - (drift.slopeO * stds$row + drift.interO)) driftO.mon<-subset(stds, Identifier1==mon.std) driftO.dis <- subset(stds, Identifier1==dis.std) (driftO.mon.mean<-mean(driftO.mon$d18O.drift)) (driftO.mon.sd<-sd(driftO.mon$d18O.drift)) O.stds.table$driftO.mean <- c(driftO.mean, driftO.mon.mean, mean(driftO.dis$d18O.drift)) O.stds.table$driftO.sd <- c(driftO.sd, driftO.mon.sd, sd(driftO.dis$d18O.drift)) O.mon.drift<-ggplot(driftO.mon, aes(x=area44, y=d18O.drift)) + geom_point(shape=21, fill="red") + geom_hline(yintercept = driftO.mon.mean, colour="red") + geom_hline(yintercept = driftO.mon.mean + driftO.mon.sd, colour="red", linetype="dashed") + geom_hline(yintercept = driftO.mon.mean - driftO.mon.sd, colour="red", linetype="dashed") + geom_hline(yintercept = driftO.mon.mean + 2*driftO.mon.sd, colour="red", linetype=3) + geom_hline(yintercept = driftO.mon.mean - 2*driftO.mon.sd, colour="red", linetype=3) + annotate("text", y = driftO.mon.mean +0.01, x = min(driftO.mon$area44), label = paste0("mean: ", sprintf("%.2f", driftO.mon.mean), " \U00B1 ", sprintf("%.2f", driftO.mon.sd, 2), " \U2030 (1 sd)"), size = 4, hjust=0, vjust=0, parse=FALSE, colour="red") O.mon.drift.mass<-ggplot (stds, aes(x=mass, y=area44)) + stat_smooth(method="lm") + geom_point(data=driftO.mon, aes(x=mass, y=area44), shape=21, fill="red", size = 2) + theme_bw() multiplot(O.mon.drift, O.mon.drift.mass, cols=2)
Plot linearity
linO<-subset(stds, type=="lin.std") lin.slopeO<-(coef(lm(linO$d18O.PDB ~ linO$inv.area44))[[2]]) lin.interO<-(coef(lm(linO$d18O.PDB ~ linO$inv.area44))[[1]]) #linearity check linO$d18O.lin<-linO$d18O.PDB + (O.acc - (lin.slopeO * linO$inv.area44 + lin.interO)) (linO.mean<-mean(linO$d18O.lin)) (linO.sd<-sd(linO$d18O.lin)) O.lin.area44<-ggplot(linO, aes(x=area44, y=d18O.PDB)) + geom_point(shape=21, fill="blue") + geom_smooth() O.lincorr.invarea<-ggplot(linO, aes(x=inv.area44, y=d18O.PDB)) + geom_smooth(method=lm) + annotate("text", x = min(linO$inv.area44), y = max(linO$d18O.PDB + 0.01), label = lm_eqn(linO$inv.area44, linO$d18O.PDB), size = 4, hjust=0, vjust=0, parse=TRUE) + geom_point(shape=21, fill="black", size=2) + geom_point(aes(x=inv.area44, y=d18O.lin), fill="red", shape=22) + geom_hline(aes(yintercept=O.acc), size=.5) + geom_hline(yintercept = linO.mean + linO.sd, colour="red", linetype="dashed") + geom_hline(yintercept = linO.mean - linO.sd, colour="red", linetype="dashed") + geom_hline(yintercept = linO.mean + 2*linO.sd, colour="red", linetype=3) + geom_hline(yintercept = linO.mean - 2*linO.sd, colour="red", linetype=3) + annotate("text", y = linO.mean +0.01, x = min(linO$inv.area44), label = paste0("mean: ", sprintf("%.2f", linO.mean), " \U00B1 ", sprintf("%.2f", linO.sd, 2), " \U2030 (1 sd)"), size = 4, hjust=0, vjust=0, parse=FALSE) + theme_bw() O.lin.mass<-ggplot (stds, aes(x=mass, y=area44)) + stat_smooth(method="lm") + geom_point(data=linO, aes(x=mass, y=area44), shape=21, fill="blue", size = 2) + theme_bw() multiplot(O.lin.area44, O.lincorr.invarea, O.lin.mass, cols=3)
apply linearity correction + offset correction to all of the data
data$d18O.lin <- data$d18O.PDB + (O.acc - (lin.slopeO * data$inv.area44 + lin.interO)) stds$d18O.lin <- stds$d18O.PDB + (O.acc - (lin.slopeO * stds$inv.area44 + lin.interO)) linO.mon<-subset(stds, Identifier1==mon.std) linO.dis<-subset(stds, Identifier1==dis.std) (linO.mon.mean <- mean(linO.mon$d18O.lin)) (linO.mon.sd<-sd(linO.mon$d18O.lin)) O.stds.table$linO.mean <- c(linO.mean, linO.mon.mean, mean(linO.dis$d18O.lin)) O.stds.table$linO.sd <- c(linO.sd, linO.mon.sd, sd(linO.dis$d18O.lin)) O.mon.lin<-ggplot(linO.mon, aes(x=area44, y=d18O.lin)) + geom_point(shape=21, fill="blue") + geom_hline(yintercept = linO.mon.mean, colour="blue") + geom_hline(yintercept = linO.mon.mean + linO.mon.sd, colour="blue", linetype="dashed") + geom_hline(yintercept = linO.mon.mean - linO.mon.sd, colour="blue", linetype="dashed") + geom_hline(yintercept = linO.mon.mean + 2*linO.mon.sd, colour="blue", linetype=3) + geom_hline(yintercept = linO.mon.mean - 2*linO.mon.sd, colour="blue", linetype=3) + annotate("text", y = linO.mon.mean +0.01, x = min(linO.mon$area44), label = paste0("mean: ", sprintf("%.2f", linO.mon.mean), " \U00B1 ", sprintf("%.2f", linO.mon.sd, 2), " \U2030 (1 sd)"), size = 4, hjust=0, vjust=0, parse=FALSE, colour="blue") O.mon.lin.mass<-ggplot (stds, aes(x=mass, y=area44)) + stat_smooth(method="lm") + geom_point(data=linO.mon, aes(x=mass, y=area44), shape=21, fill="blue", size = 2) + theme_bw() multiplot(O.mon.lin, O.mon.lin.mass, cols=2)
drift corrections on the linearity corrected values
lindriftO <- merge(driftO, data[c("row", "d18O.lin")], by.x="row", by.y="row", all.x=TRUE, all.y=FALSE, sort=FALSE) lindrift.slopeO<-(coef(lm(lindriftO$d18O.lin ~ lindriftO$row))[[2]]) lindrift.interO<-(coef(lm(lindriftO$d18O.lin ~ lindriftO$row))[[1]]) #drift check lindriftO$d18O.lindrift<- lindriftO$d18O.lin + (O.acc - (lindrift.slopeO * lindriftO$row + lindrift.interO)) (lindriftO.mean<-mean(lindriftO$d18O.drift)) (lindriftO.sd<-sd(lindriftO$d18O.drift)) O.lindrift<-ggplot(lindriftO, aes(x=row, y=d18O.lin)) + geom_smooth(method=lm, colour="black") + annotate("text", x = min(lindriftO$row), y = max(lindriftO$d18O.lin + 0.01), label = lm_eqn(lindriftO$row, lindriftO$d18O.lin), size = 4, hjust=0, vjust=0, parse=TRUE, colour="black") + geom_point(shape=21, fill="black", size=2) + geom_point(aes(x=row, y=d18O.lindrift), fill="red", shape=22, size=2) + geom_hline(aes(yintercept=O.acc), size=.5) + geom_hline(yintercept = lindriftO.mean + lindriftO.sd, colour="red", linetype="dashed") + geom_hline(yintercept = lindriftO.mean - lindriftO.sd, colour="red", linetype="dashed") + geom_hline(yintercept = lindriftO.mean + 2*lindriftO.sd, colour="red", linetype=3) + geom_hline(yintercept = lindriftO.mean - 2*lindriftO.sd, colour="red", linetype=3) + annotate("text", y = lindriftO.mean +0.01, x = min(lindriftO$row), label = paste0("mean: ", sprintf("%.2f", lindriftO.mean), " \U00B1 ", sprintf("%.2f", lindriftO.sd, 2), " \U2030 (1 sd)"), size = 4, hjust=0, vjust=0, parse=FALSE) + theme_bw() O.lindrift.mass<-ggplot(stds, aes(x=mass, y=area44)) + stat_smooth(method="lm") + geom_point(data=lindriftO, aes(x=mass, y=area44), shape=21, fill="red", size=3) + theme_bw() multiplot(O.lindrift, O.lindrift.mass, cols=2)
apply drift correction to all of the linearirty corrected data, check with monitoring standards
data$d18O.lindrift <- data$d18O.lin + (O.acc - (lindrift.slopeO * data$row + lindrift.interO)) stds$d18O.lindrift <- stds$d18O.lin + (O.acc - (lindrift.slopeO * stds$row + lindrift.interO)) lindriftO.mon<-subset(stds, Identifier1==mon.std) lindriftO.dis<-subset(stds, Identifier1==dis.std) (lindriftO.mon.mean<-mean(lindriftO.mon$d18O.lindrift)) (lindriftO.mon.sd<-sd(lindriftO.mon$d18O.lindrift)) O.stds.table$lindriftO.mean <- cbind(c(lindriftO.mean, lindriftO.mon.mean, mean(lindriftO.dis$d18O.lindrift))) O.stds.table$lindriftO.sd <- cbind(c(lindriftO.sd, lindriftO.mon.sd, sd(lindriftO.dis$d18O.lindrift))) O.mon.drift<-ggplot(lindriftO.mon, aes(x=area44, y=d18O.lindrift)) + geom_point(shape=21, fill="red") + geom_hline(yintercept = lindriftO.mon.mean, colour="red") + geom_hline(yintercept = lindriftO.mon.mean + lindriftO.mon.sd, colour="red", linetype="dashed") + geom_hline(yintercept = lindriftO.mon.mean - lindriftO.mon.sd, colour="red", linetype="dashed") + geom_hline(yintercept = lindriftO.mon.mean + 2*lindriftO.mon.sd, colour="red", linetype=3) + geom_hline(yintercept = lindriftO.mon.mean - 2*lindriftO.mon.sd, colour="red", linetype=3) + annotate("text", y = lindriftO.mon.mean +0.01, x = min(lindriftO.mon$area44), label = paste0("mean: ", sprintf("%.2f", lindriftO.mon.mean), " \U00B1 ", sprintf("%.2f", lindriftO.mon.sd, 2), " \U2030 (1 sd)"), size = 4, hjust=0, vjust=0, parse=FALSE, colour="red") O.mon.drift.mass<-ggplot (stds, aes(x=mass, y=area44)) + stat_smooth(method="lm") + geom_point(data=lindriftO.mon, aes(x=mass, y=area44), shape=21, fill="red", size = 2) + theme_bw() multiplot(O.mon.drift, O.mon.drift.mass, cols=2)
d18O discrimination correction - First, need to pick which correction scheme you prefer, then update this with the correct data columns
# replace the character string here with the correction column you want to use col_for_disc <- "lindrift" # options to substitute in here are: "offset" or "drift" or "lin" or "lindrift" # resulting formulas col_in_O.stds.table <- paste0(col_for_disc, "O.mean") col_in_data <- paste0("d18O.", col_for_disc) col_for_disc_mean_eq <- list(col_for_disc = lazyeval::interp(~var, var = as.name(col_in_O.stds.table))) correction_eq <- list(d18O.disc = lazyeval::interp(~disc.slopeO * var + disc.interO, var = as.name(col_in_data))) # safety checks if (!col_in_O.stds.table %in% names(O.stds.table)) stop("this column does not exist in O.stds.table: ", col_in_O.stds.table, call. = FALSE) if (!col_in_data %in% names(data)) stop("this column does not exist in data or stds table: ", col_in_data, call. = FALSE) # regression m <- lm(O.acc ~ col_for_disc, data = mutate_(O.stds.table, .dots = col_for_disc_mean_eq)) disc.slopeO<-(coef(m)[[2]]) disc.interO<-(coef(m)[[1]]) R2 <- summary(m)$r.squared # apply correction data <- mutate_(data, .dots = correction_eq) stds <- mutate_(stds, .dots = correction_eq) O.disc.all <- ggplot(O.stds.table, aes_string(x=col_in_O.stds.table, y="O.acc")) + geom_smooth(method="lm", color = "blue") + geom_point(data=data, aes_string(x=col_in_data, y="d18O.disc"), shape=23, fill="red", size = 2) + geom_point(shape=21, fill="blue", size = 4) + geom_text( x = min(O.stds.table[[col_in_O.stds.table]]), y = max(O.stds.table$O.acc), label = str_interp("O.acc = $[.2f]{slope} ${var} + $[.2f]{intercept} (R2: $[.4f]{R2})", list(slope = disc.slopeO, intercept = disc.interO, var = col_for_disc, R2 = R2)), size = 4, hjust=0, vjust=0, colour="blue") + labs(x = col_for_disc) #O.disc.all
discO.mon<-subset(stds, Identifier1==mon.std) discO.dis<-subset(stds, Identifier1==dis.std) discO.corr<-subset(stds, Identifier1==corr.std) (discO.mon.mean<-mean(discO.mon$d18O.disc)) (discO.mon.sd<-sd(discO.mon$d18O.disc)) O.stds.table$discO.mean <- c(mean(discO.corr$d18O.disc), discO.mon.mean, mean(discO.dis$d18O.disc)) O.stds.table$discO.sd <- c(sd(discO.corr$d18O.disc), discO.mon.sd, sd(discO.dis$d18O.disc))
save data to spreadsheet
wb <- createWorkbook("data") wb <- add_ws_with_data(wb, "offsetC", offsetC) wb <- add_ws_with_data(wb, "driftC", driftC) wb <- add_ws_with_data(wb, "linC", linC) wb <- add_ws_with_data(wb, "lindriftC", lindriftC) wb <- add_ws_with_data(wb, "C Stds means", C.stds.table) wb <- add_ws_with_data(wb, "offsetO", offsetO) wb <- add_ws_with_data(wb, "driftO", driftO) wb <- add_ws_with_data(wb, "linO", linO) wb <- add_ws_with_data(wb, "lindriftO", lindriftO) wb <- add_ws_with_data(wb, "O Stds means", O.stds.table) wb <- add_ws_with_data(wb, "all stds used", stds) wb <- add_ws_with_data(wb, "culled data", culled.data) wb <- add_ws_with_data(wb, "all data", data) saveWorkbook(wb, paste0(session, "_corrected_data.xlsx"), overwrite = TRUE)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.