#Put whatever you normally put in a setup chunk.
#I usually at least include:
################################################################################
#Load packages
################################################################################
if(!require(microcystinchla)){
  devtools::install_github("usepa/microcystinchla",
                           auth_token = getOption("Github_Access"))
}
if(!require(viridis)){
  install.packages("viridis") 
}
if(!require(autocrop)){
  devtools::install_github("jhollist/autocrop") 
}
library("microcystinchla")
library("dplyr")
library("magrittr")
library("pander")
library("knitr")
library("e1071")
library("autocrop")
#opts_chunk$set(dev = 'jpg', fig.width=6, fig.height=5)

if(file.exists("manuscript_cache")){
  for(i in gsub(".rdb","",list.files("manuscript_cache/latex",".rdb",full.names=T))){
    lazyLoad(i)
  }
}

# Table Captions from @DeanK on http://stackoverflow.com/questions/15258233/
# using-table-caption-on-r-markdown-file-using-knitr-to-use-in-pandoc-to-convert-t
# Figure captions are handled by LaTeX

knit_hooks$set(tab.cap = function(before, options, envir) {
                  if(!before) { 
                    paste('\n\n:', options$tab.cap, sep='') 
                  }
                })
default_output_hook = knit_hooks$get("output")
knit_hooks$set(output = function(x, options) {
  if (is.null(options$tab.cap) == FALSE) {
    x
  } else
    default_output_hook(x,options)
})
#All analysis in here, that way all bits of the paper have access to the final objects
#Place tables and figures and numerical results where they need to go.



################################################################################
# Data Prep
################################################################################
data_url<-"https://www2.epa.gov/sites/production/files/2014-10/nla2007_recreational_conditionestimates_20091123.csv"
nla_dat <- get_nla(data_url)
nla_dat <- mutate(nla_dat,log_chla = log10(CHLA),log_micro = log10(MCYST_TL_UGL))

################################################################################
# Condtional Probability
################################################################################
mc_guides <- c(0.3, 1, 1.6, 2, 4, 10, 20)
mc_names <- c("USEPA","WHO","USEPA","WHO","WHO","WHO","WHO")
mc_type <- c("Child Drinking Water Advisory",
             "Drinking Water Advisory",
             "Adult Drinking Water",
             "Recreational: Low Prob. of Effect",
             "Recreational: Moderate Prob. of Effect",
             "Recreational: High Prob. of Effect",
             "Recreational: Very High Prob. of Effect")

#wgt <- nla_dat$WGT_NLA
wgt<- rep(1,nrow(nla_dat))

#set.seed(19720923)
#epa_child_cp <- condprob(xX = nla_dat$log_chla,xY = nla_dat$log_micro, 
#                     xImpair = log10(mc_guides[1]), ProbComp = "gt", Exceed = "gte",
#                     ci = TRUE, R = 10000, xW=wgt)

#mc_chla[1] <- get_chla(epa_child_cp,0.5)
#set.seed(19720923)
#who_drink_cp <- condprob(xX = nla_dat$log_chla,xY = nla_dat$log_micro, 
#                     xImpair = log10(mc_guides[2]), ProbComp = "gt", Exceed = "gte",
#                     ci = TRUE, R = 10000, xW=wgt)

#mc_chla[2] <- get_chla(who_drink_cp,0.5)
#set.seed(19720923)
#epa_adult_cp <- condprob(xX = nla_dat$log_chla,xY = nla_dat$log_micro, 
#                     xImpair = log10(mc_guides[3]), ProbComp = "gt", Exceed = "gte",
#                     ci = TRUE, R = 10000, xW=wgt)

#mc_chla[3] <- get_chla(epa_adult_cp,0.5)
#set.seed(19720923)
#who_rec_low1_cp <- condprob(xX = nla_dat$log_chla,xY = nla_dat$log_micro, 
#                     xImpair = log10(mc_guides[4]), ProbComp = "gt", Exceed = "gte",
#                     ci = TRUE, R = 10000, xW=wgt)

#mc_chla[4] <- get_chla(who_rec_low1_cp,0.5)
epa_child <- NULL
who_drink <- NULL
epa_adult <- NULL
who_rec <- NULL
cps<- seq(0.1,0.9,0.1)
for(i in 1:9){
  epa_child[i] <- get_chla(epa_child_cp,cps[i])[1]
  if(epa_child[i]>0.1){epa_child[i]<-round(epa_child[i])}
  who_drink[i] <- get_chla(who_drink_cp,cps[i])[1]
  if(who_drink[i]>0.1){who_drink[i]<-round(who_drink[i])}
  epa_adult[i] <- get_chla(epa_adult_cp,cps[i])[1]
  if(epa_adult[i]>0.1){epa_adult[i]<-round(epa_adult[i])}
  who_rec[i] <- get_chla(who_rec_low1_cp,cps[i])[1]
  if(who_rec[i]>0.1){who_rec[i]<-round(who_rec[i])}
}

mc_chla<-cbind(epa_child,who_drink,
               epa_adult,who_rec)

#Agreement Tables
child_conmat<-conmat_to_df(table(chla=nla_dat$CHLA>=mc_chla[5,1],
                                 mcsyt=nla_dat$MCYST_TL_UGL>=0.3))
child_conmat$tots<-child_conmat[,1]+child_conmat[,2]
child_conmat[3,]<-child_conmat[1,]+child_conmat[2,]

who_drink_conmat<-conmat_to_df(table(nla_dat$CHLA>=mc_chla[5,2],
                        nla_dat$MCYST_TL_UGL>=1))
who_drink_conmat$tots<-who_drink_conmat[,1]+who_drink_conmat[,2]
who_drink_conmat[3,]<-who_drink_conmat[1,]+who_drink_conmat[2,]

adult_conmat<-conmat_to_df(table(nla_dat$CHLA>=mc_chla[5,3],
                    nla_dat$MCYST_TL_UGL>=1.6))
adult_conmat$tots<-adult_conmat[,1]+adult_conmat[,2]
adult_conmat[3,]<-adult_conmat[1,]+adult_conmat[2,]

who_rec_conmat<-conmat_to_df(table(nla_dat$CHLA>=mc_chla[5,4],
                      nla_dat$MCYST_TL_UGL>=2))
who_rec_conmat$tots<-who_rec_conmat[,1]+who_rec_conmat[,2]
who_rec_conmat[3,]<-who_rec_conmat[1,]+who_rec_conmat[2,]


names(child_conmat)<-c("Not Exceed","Exceed","Row Totals")
row.names(child_conmat)<-c("Not Exceed","Exceed","Column Totals")
names(who_drink_conmat)<-c("Not Exceed","Exceed","Row Totals")
row.names(who_drink_conmat)<-c("Not Exceed","Exceed","Column Totals")
names(adult_conmat)<-c("Not Exceed","Exceed","Row Totals")
row.names(adult_conmat)<-c("Not Exceed","Exceed","Column Totals")
names(who_rec_conmat)<-c("Not Exceed","Exceed","Row Totals")
row.names(who_rec_conmat)<-c("Not Exceed","Exceed","Column Totals")



total_accuracy<-NULL
total_accuracy$epa_child<-round((child_conmat[1,1]+child_conmat[2,2])/
  sum(child_conmat[1:2,1:2]),2)
total_accuracy$who_drink<-round((who_drink_conmat[1,1]+who_drink_conmat[2,2])/
  sum(who_drink_conmat[1:2,1:2]),2)
total_accuracy$epa_adult<-round((adult_conmat[1,1]+adult_conmat[2,2])/
  sum(adult_conmat[1:2,1:2]),2)
total_accuracy$who_rec<-round((who_rec_conmat[1,1]+who_rec_conmat[2,2])/
  sum(who_rec_conmat[1:2,1:2]),2)

fn_percent<-NULL
fn_percent$epa_child<-round((child_conmat[1,2])/
  sum(child_conmat[1:2,1:2]),2)
fn_percent$who_drink<-round((who_drink_conmat[1,2])/
  sum(who_drink_conmat[1:2,1:2]),2)
fn_percent$epa_adult<-round((adult_conmat[1,2])/
  sum(adult_conmat[1:2,1:2]),2)
fn_percent$who_rec<-round((who_rec_conmat[1,2])/
  sum(who_rec_conmat[1:2,1:2]),2)

\singlespace

\vspace{2mm}\hrule

Cyanobacteria harmful algal blooms (cHABs) are associated with a wide range of adverse health effects that stem mostly from the presence of cyanotoxins. To help protect against these impacts, several health advisory levels have been set for some toxins. In particular, one of the more common toxins, microcystin, has several advisory levels set for drinking water and recreational use. However, compared to other water quality measures, field measurements of microcystin are not commonly available due to cost and advanced understanding required to interpret results. Addressing these issues will take time and resources. Thus, there is utility in finding indicators of microcystin that are already widely available, can be estimated quickly and in situ, and used as a first defense against high levels of microcystin. Chlorophyll a is commonly measured, can be estimated in situ, and has been shown to be positively associated with microcystin. In this paper, we use this association to provide estimates of chlorophyll a concentrations that are indicative of a higher probability of exceeding select health advisory concentrations for microcystin. Using the 2007 National Lakes Assessment and a conditional probability approach, we identify chlorophyll a concentrations that are more likely than not to be associated with an exceedance of a microcystin health advisory level. We look at the recent US EPA health advisories for drinking water as well as the World Health Organization levels for drinking water and recreational use and identify a range of chlorophyll a thresholds. A 50% chance of exceeding one of the specific advisory microcystin concentrations of 0.3, 1, 1.6, and 2 $\mu$g/L is associatied with chlorophyll a concentration thresholds of r mc_chla[5,1], r mc_chla[5,2], r mc_chla[5,3], and r mc_chla[5,4] $\mu$g/L, respectively. When managing for these various microcystin levels, exceeding these reported chlorophyll a concentrations should be a trigger for further testing and possible management action.

\vspace{3mm}\hrule \doublespace

Introduction

Over the last decade, numerous events and legislative activities have raised the public awareness of harmful algal blooms [@jetoo2015toledo; @rinta2009lake; @HABHRCA2014]. In response the US Environmental Protection Agency (USEPA) has recently released suggested microcystin (one of the more common toxins) concentrations that would trigger health advisories [@mcelhiney2005detection; @zurawell2005hepatotoxic; @usepa2015drinking]. Additionally, the World Health Organization (WHO) has microcystin advisory levels for drinking water and for a range of recreational risk levels [@world2003cyanobacterial; @chorus1999toxic]. While these levels and associated advisories are likely to help mitigate the impacts from harmful algal blooms, they are not without complications.

One of these complications is that they rely on available measurements of microcystin. While laboratory testing (e.g., chromatography) remains the gold standard for quantifying microcystin concentrations in water samples, several field test kits have been developed. Even though field tests provide a much needed means for rapid assessment, they are not yet widely used and are moderately expensive (approximately $150-$200 depending on specific kit) with a limited shelf life (typically one year) [@james2011environmental; @aranda2015evaluation]. Additionally, each technique requires nuanced understanding of the detection method (e.g., limit of detection, specific microcystin variants being measured, and sampling protocol).

Fortunately, cyanobacteria and microcystin-LR has been shown to be associated with several other, more commonly measured and well understood components of water quality that are readily assessed in the field [e.g. @yuan2015deriving]. For instance, there are small or hand held fluorometers that measure chlorohpyll a. Additionally, chlorophyll a is a very commonly measured component of water quality that is also known to be positively associated with microsystin-LR concentrations [@pip2014microcystin; @yuan2014managing]. Recently, Yuan et. al [-@yuan2014managing] explored these associations in detail and controlled for other related variables. In their analysis they find that total nitrogen and chlorophyll a show the strongest association with microcystin. Furthermore, they identify chlorophyll a and total nitrogen concentrations that are associated with exceeding 1 $\mu$g/L of microcystin. These findings suggest that chlorophyll a concentrations could also track the new USEPA microcystin health advisory levels for drinking water. Identifying this association would provide an important tool for water resource managers to help manage the threat to public health posed by cHABs and would be especially useful in the absence of measured microcystin concentrations.

In fact, this is a similar tact to the World Health Organization who, in addition to advisory levels for microcystin, have also proposed related advisory levels for cyanobacteria abundance and chlorophyll a [@world2003cyanobacterial; @chorus1999toxic]. The chlorophyll a concentrations proposed by the WHO are for low (< 10 $\mu$g/L), moderate (between 10 and 50 $\mu$g/L, high (between 50 and 5000 $\mu$g/L), and very high risk (>5000 $\mu$g/L) [@chorus1999toxic]. While these advisories have proven to be useful tools they do suffer from being coarse, broad, and have been found to overestimate actual risk [@loftin2016cyanotoxins].

In this paper we build on these past efforts and utilize the National Lakes Assessment (NLA) data and identify chlorophyll a concentrations that are associated with higher probabilities of exceeding several microcystin health advisory concentrations [@usepa2009national; @usepa2015drinking; @chorus1999toxic]. We build on past studies by exploring associations with the newly announced advisory levels and by also applying a different method, conditional probability analysis. Utilizing different methods strengthens the evidence for suggested chlorophyll a levels that are associated with increased risk of exceeding the health advisory levels as those levels are not predicated on a single analytical method. So that others may repeat or adjust this analysis, the data, code, and this manuscript are freely available via https://github.com/USEPA/microcystinchla.

Methods

Data

We used the 2007 NLA chlorophyll a and microcystin-LR concentration data [@usepa2009national, NLA]. These data represent a snapshot of water quality from the summer of 2007 for the conterminous United States and were collected as part of an ongoing probabilistic monitoring program [@usepa2009national]. Water quality data, including chlorophyll a and microcystin-LR were obtained via an intergated sample taken from the surface of the lake down to 2 meters. Samples were taken at the same time from the index site (e.g. near the centroid of the lake) and these provide the source for both chlorophyll a and microcystin-LR [@usepa2009national, @nla_fieldops].

For our analysis we only used samples that were part of the probability sampling design (i.e. no reference samples) and from the first visit to the lake (e.g. some lakes were sampled mutliple times). The detection limit for microcysin-LR was 0.05 $\mu$g/L. Approximately r round(sum(nla_dat$MCYST_TL_UGL==0.05)/nrow(nla_dat),2)*100% of lakes reported microcystin-LR at the detection limit. For this analysis we retained these values as removing them would erroneously reduce the confidence intervals around the conditional probabilities. Data on chlorophyll a and microcystin-LR concentrations are available for r nrow(na.omit(data.frame(nla_dat$CHLA,nla_dat$MCYST_TL_UGL))) lakes.

Analytical Methods

We used a conditional probability analysis (CPA) approach to explore associations between chlorophyll a concentrations and World Health Organization (WHO) and USEPA microcystin health advisory levels [@paul2011probability]. Many health advisory levels have been suggested (Table \ref{tab:microcystin_levels}), but lakes with higher microcystin-LR concentrations in the NLA were rare. Only r round(100*(length(nla_dat$MCYST_TL_UGL[nla_dat$MCYST_TL_UGL>=10])/nrow(nla_dat)),2)% of lakes sampled had a concentration greater than 10 $\mu$g/L. Thus, for this analysis we focused on the microcystin concentrations that are better represented in the NLA data. These were the USEPA children's (i.e. bottle fed infants to pre-school age children) drinking water advisory level of 0.3 $\mu$g/L (USEPA Child), the WHO drinking water advisory level of 1 $\mu$g/L (WHO Drinking), the USEPA adult (i.e. beyond pre-school aged individuals) drinking water advisory level of 1.6 $\mu$g/L (USEPA Adult), and the WHO recreational, low probability of effect advisory level of 2 $\mu$g/L (WHO Recreational).

Conditional probability analysis provides information about the probability of observing one event given another event has also occured. For this analysis, we used CPA to examine how the conditional probability of exceeding one of the health advisories changes as chlorophyll a increases in a lake. We expect to find higher chlorohpyll a concentrations to be associated with higher probabilities of exceeding the microcystin health advisory levels. We also calculated bootstrapped 95% confidence intervals (CI) using 10,000 bootstrapped samples. Thus, to identify chlorophyll a concentrations of concern we identified the value of the upper 95% CI across a range of conditional probabilities of exceeding each health advisory level. Using the upper confidence limit to identify a threshold is justified as it ensures that a given threshold is unlikely to miss a microcystin exceedance.

As both microcystin-LR and chlorophyll a values were highly right skewed, a log base 10 transformation was used. Additional details of the specific implementation are available at https://github.com/USEPA/microcystinchla. A more detailed discussion of CPA is beyond the scope of this paper, but see Paul et al. [-@paul2005development] and Hollister et al. [-@hollister2008cprob] for greater detail. All analyses were conducted using R version 3.2.3 and code and data from this analysis are freely available as an R package at https://github.com/USEPA/microcystinchla.

Lastly, we assessed the ability of these chlorophyll a thresholds to predict microcystin exceedance. We used error matrices and calculate total accuracy as well as the proportion of false negatives. Total accuracy is the total number of correct predictions divided by total observations. The proportion of false negatives is the total number of lakes that were predicted to not exceed the microcystin guidelines but actually did, divided by the total number of observations.

Results

In the 2007 NLA, microcystin-LR concentrations ranged from r min(nla_dat$MCYST_TL_UGL,na.rm=T) to r max(nla_dat$MCYST_TL_UGL, na.rm=T) $\mu$g/L. Microcystin-LR concentrations of 0.05 $\mu$g/L represent the detection limits. Any value greater than that indicates the presence of microcystin-LR. Of those lakes with microcystin-LR, the median concentration was r round(median(nla_dat$MCYST_TL_UGL[nla_dat$MCYST_TL_UGL>0.05],na.rm=T), 2) $\mu$g/L and the mean was r round(mean(nla_dat$MCYST_TL_UGL[nla_dat$MCYST_TL_UGL>0.05],na.rm=T), 2) $\mu$g/L. Of all lakes sampled, r round(100*(length(nla_dat$MCYST_TL_UGL[nla_dat$MCYST_TL_UGL>=0.3])/length(nla_dat$MCYST_TL_UGL)),1)% of lakes exceeded the USEPA Child level, r round(100*(length(nla_dat$MCYST_TL_UGL[nla_dat$MCYST_TL_UGL>=1.6])/length(nla_dat$MCYST_TL_UGL)),1)% of lakes exceeded the USEPA Adult level, r round(100*(length(nla_dat$MCYST_TL_UGL[nla_dat$MCYST_TL_UGL>=1])/length(nla_dat$MCYST_TL_UGL)),1)% of lakes exceeded the WHO Drinking level, and r round(100*(length(nla_dat$MCYST_TL_UGL[nla_dat$MCYST_TL_UGL>=2])/length(nla_dat$MCYST_TL_UGL)),1)% of lakes exceeded the WHO Recreational level. Chlorophyll a, ranged from r min(nla_dat$CHLA,na.rm=T) to r max(nla_dat$CHLA, na.rm=T) $\mu$g/L and this captures the range of trrophic states from oligotrophic to hypereutrophic. All lakes had detectable levels of chlorophyll a. The median concentration was r median(nla_dat$CHLA,na.rm=T) $\mu$g/L and the mean was r round(mean(nla_dat$CHLA,na.rm=T),2) $\mu$g/L. The association between chlorophyll a and the upper confidence interval across a range of conditional probability values are shown in Table \ref{tab:mc_chla_table}. Specific chlorophyll a concentrations that are associated with greater than even odds of exceeding the advisory levels were r mc_chla[5,1], r mc_chla[5,2], r mc_chla[5,3], and r mc_chla[5,4] $\mu$g/L for 0.3, 1.0, 1.6, and 2.0 $\mu$g/L advisory levels, respectively (Table \ref{tab:mc_chla_table} & Figure \ref{fig:multi_cp_plot}).

jpeg(filename = "hollister_microcystin_fig1.jpg",
       res = 450,
       width = 7.82,
       height = 6.8,
       units = "in")
multiplot(
  plot_cp(epa_child_cp,x = "Log 10 Chlorophyll", 
          y = "Conditional Probability",
          title="A."),
  plot_cp(who_drink_cp, x = "Log 10 Chlorophyll", 
          y = "Conditional Probability", 
          title="B."),
  plot_cp(epa_adult_cp, x = "Log 10 Chlorophyll", 
          y = "Conditional Probability", 
          title="C."),
  plot_cp(who_rec_low1_cp, x = "Log 10 Chlorophyll", 
          y = "Conditional Probability", 
          title="D."),
  cols=2
)
x<-dev.off()
x<-autocrop("hollister_microcystin_fig1.jpg",border = 25, 
            outfile = "hollister_microcystin_fig1.jpg",
            width = 6.1,
            res = 450)

The chlorophyll a cutoffs may be used to predict whether or not a lake exceeds the microcystin health advisories. Doing so allows us to compare the accuracy of the prediction as well as evaluate false negatives. Total accuracy of the four cutoffs predicting microcystin exceedances were r total_accuracy$epa_child*100% for the USEPA children's drinking water advisory, r total_accuracy$who_drink*100% for the WHO drinking water advisory, r total_accuracy$epa_adult*100% for the USEPA adult drinking water advisory, and r total_accuracy$who_rec*100% for the WHO recreational advisory (Tables \ref{tab:child_conmat_table}, \ref{tab:who_drink_conmat_table}, \ref{tab:adult_conmat_table}, & \ref{tab:who_rec_conmat_table}). However, total accuracy is only one part of the prediction performace with which we are concerned.

When using the chlorophyll a cutoffs as an indicator of microcystin exceedances, the error that should be avoided is predicting that no exceedance has occurred when in fact it has. In other words, we would like to avoid Type II errors and minimize the proportion of false negatives. For the four chlorophyll a cut-offs we had a proportion of false negatives of r fn_percent$epa_child*100%, r fn_percent$who_drink*100%, r fn_percent$epa_adult*100%, and r fn_percent$who_rec*100% for the USEPA children's, the WHO drinking water, the USEPA adult, and the WHO recreational advisories, respectively. In each case we missed less than 10% of the lakes that in fact exceeded the microcystin advisory. While this method performs well with regard to the false negative percentage, it is possible that is a relic of the NLA dataset and testing with additional data would allow us to confirm this result.

Discussion

The log-log association between microcystin-LR and chlorophyll a indicates that, in general, higher concentrations of microcystin-LR almost always co-occur with higher concentrations of chlorophyll a yet the inverse is not true (Figure \ref{fig:chla_micro_scatter}). Higher chlorophyll a is not necessarily predictive of higher microcystin-LR concentrations; however, chlorophyll a may be predictive of the probability of exceeding a certain threshold.

ggplot2::ggsave("hollister_microcystin_fig2.jpg",
       plot_scatter(nla_dat,xvar="log_chla",yvar = "log_micro",pt_col = "black",
             x=expression(paste('Log10 Chl ', italic("a"),' (',mu,'g/L)')),
             y=expression(paste('Log10 microcystin-LR',' (',mu,'g/L)'))),
       dpi = 450,
       height = 6.8,
       units = "in")
x<-autocrop("hollister_microcystin_fig2.jpg",border = 25, 
            outfile = "hollister_microcystin_fig2.jpg",
            width = 6.1,
            res = 450)

Indeed, the probability of exceeding each of the four tested health advisory levels increased as a function of chlorophyll a concentration (Figure \ref{fig:multi_cp_plot}). We used this association to identify chlorophyll a concentrations that were associated with a range of probabilities of exceeding a given health advisory level (Table \ref{tab:mc_chla_table}). For the purposes of this discussion we focus on a conditional probability of 50% or greater (i.e., greater than even odds to exceed a health advisory level). The 50% conditional probability chlorophyll a thresholds represents r round(100*(length(nla_dat$CHLA[nla_dat$CHLA>=mc_chla[5,1]])/length(nla_dat$CHLA)),1)%, r round(100*(length(nla_dat$CHLA[nla_dat$CHLA>=mc_chla[5,2]])/length(nla_dat$CHLA)),1)%, r round(100*(length(nla_dat$CHLA[nla_dat$CHLA>=mc_chla[5,3]])/length(nla_dat$CHLA)),1)%, and r round(100*(length(nla_dat$CHLA[nla_dat$CHLA>=mc_chla[5,4]])/length(nla_dat$CHLA)),1)% of sample lakes for the USEPA Child, the WHO Drinking, the USEPA Adult, and the WHO recreational levels, respecitvely.

There are numerous possible uses for the chlorophyll a and microcystin advisory cut-off values. First, in the absence of microcystin-LR measurements, exceedence of the chlorophyll a concentrations could be a trigger for further actions. Given that there is uncertainity around these chlorophyll a cutoffs the best case scenario would be to monitor for chlorophyll a and in the event of exceeding a target concentration take water samples and have those samples tested for microcystin-LR.

A second potential use is to identify past bloom events from historical data. As harmful algal blooms are made up of many species and have various mechanisms responsible for adverse impacts (e.g., toxins, hypoxia, odors), there is no single definition of a bloom. For cHABs, one approach has been to utilize phycocyanin to screen for or identify bloom events [@miller2013spatiotemporal, ahn2007alternative, @marion2012vivo]. This is a useful approach, but phycocyanin is not always available, thus limiting its utility especially for examining historical data. Using our chlorophyll a cutoffs provides a value that is also associated with microcystin-LR and can be used to classify lakes, from past surveys, as having bloomed.

The values we propose are national and may miss regional variation in water quality, including, chlorophyll a and microcystin-LR [@beaver2014land, @wagner2011landscape]. A set of regional conditional probabilities would be interesting; however, limiting the analysis to the data available per region would make interpretation difficult. The sample size for each of the regional conditional probabilities would be reduced and the number of lakes in each region that exceed the microcystin values would also be reduced. Thus, our confidence in the conditional probabilites would be less (i.e. greatly increased confidence intervals) and the relationships less pronounced as we have fewer lakes on which to base the probabilites. Thus, this dataset is best for making national scale recommendations.

There are two other limitations with the 2007 National Lakes Assessment dataset. First, it represents a single sample from a lake and does not capture temporal dynamics. Second, validation of the predictions with the 2007 data alone would be challenging as the data would need to be subset and this would only sever to increase the uncertainty of our conditional probabilities, reducing our ability to validate the presence of microcystin-LR. The 2017 National Lakes Assessment would be ideal for this task. However, as of this writing, the 2012 National Lakes Assessment data are not public. When these data are released, a validation of this approach can be completed then.

Lastly, using chlorophyll a is not meant as a replacement for testing of microcystin-LR or other toxins. It should be used when other, direct measurements of cyanotoxins are not available. In those cases, which are likely to be common at least in the near future, using a more ubiquitous measurement such as chlorophyll a will provide a reasonable proxy for the probability of exceeding a microcystin health advisory level and provide better protection against adverse effects in both drinking and recreational use cases.

Acknowledgements

We would like to thank Anne Kuhn, Bryan Milstead, John Kiddon, Joe LiVolsi, Tim Gleason, Wayne Munns, and Leslie D'Anglada for constructive reviews of this paper. Special thanks to Jason Marion, Alan Wilson, and Zofia Taranu for reviews of the submitted manuscript. This paper has not been subjected to Agency review. Therefore, it does not necessary reflect the views of the Agency. Mention of trade names or commercial products does not constitute endorsement or recommendation for use. This contribution is identified by the tracking number ORD-015143 of the Atlantic Ecology Division, Office of Research and Development, National Health and Environmental Effects Research Laboratory, US Environmental Protection Agency.

\newpage

USEPA/Microcystinchla documentation built on May 9, 2019, 5:23 p.m.