# Path to water quality database and precipitation spreadsheet base_path <- '\\\\psf/Dropbox/MysticDB/' db_path <- file.path(base_path, 'MysticDB_20150227.accdb') precip_path <- file.path(base_path, 'Processed/Precip/LoganPrecip.xlsx') sample_date <- "2014-11-10" # use YYYY-MM-DD format monitors <- "Andrew Hrycyna" # use single string for multiple (e.g. "Andy Hrycyna, Patrick Herron") # rename specific LocationIDs to match MyRWA and Municipalities # example: replaces "Blanchard Road" with "CAMD03OF0000" # rename_locations <- c("Blanchard Road"="CAMD03OF0000") rename_locations <- c()
library(myrwaR) library(RODBC) library(knitr) library(ggplot2) library(reshape2) library(scales) library(dplyr) library(lubridate) library(gridExtra) library(xtable) library(stringr) library(tidyr) library(dataRetrieval) theme_set(theme_bw()) options(stringsAsFactors=FALSE) options("xtable.comment"=FALSE) knitr::opts_chunk$set(echo=FALSE, message=FALSE, warning=FALSE)
# check db and precip paths if (!file.exists(db_path)) { stop(paste0('Could not find database: ', db_path)) } if (!file.exists(precip_path)) { stop(paste0('Could not find precipitation file: ', precip_path)) } # parse sample date sample_date <- as.Date(sample_date)
# load all hotspot and municipal data from database wq <- myrwaR::load_wq(path=db_path, projects=c('HOTSPOT', 'MUNI'), sample_types=NULL, exclude_flags=FALSE) %>% mutate(Qualifier=ifelse(is.na(Qualifier), "", Qualifier), Date=as.Date(Datetime, unit='day')) %>% filter(Date <= sample_date) # assign flag for samples on current sample date wq$Current <- wq$Date==sample_date stopifnot(sum(wq$Current)>0) # check there is at least one sample for sample_date # get visits ch <- myrwaR::db_connect(path = db_path) visits <- ch %>% myrwaR::db_table(table_name = 'Visit') %>% mutate(Date=as.Date(Datetime)) %>% filter(ProjectID=="HOTSPOT", Date==sample_date) # get locations locations <- ch %>% myrwaR::db_table(table_name = 'Location') %>% rename(LocationID=ID) %>% filter(LocationID %in% visits$LocationID) %>% select(LocationID, MunicipalityID, Latitude, Longitude, WaterBodyID, LocationDescription) close(ch) # fix WaterBodyID issue by trimming extra spaces (e.g. "Aberjona River " -> "Aberjona River") wq$WaterBodyID <- str_trim(wq$WaterBodyID) locations$WaterBodyID <- str_trim(locations$WaterBodyID) # revalue LocationID wq$LocationID <- plyr::revalue(wq$LocationID, rename_locations) visits$LocationID <- plyr::revalue(visits$LocationID, rename_locations) locations$LocationID <- plyr::revalue(locations$LocationID, rename_locations) # get visits with no flows visits_noflow <- filter(visits, HasFlow==0) locations <- arrange(locations, LocationID) locations <- left_join(locations, select(visits, LocationID, HasFlow)) locations_hasflow <- locations[which(locations$HasFlow==1), 'LocationID']
# check that LocationID is not duplicated in locations table stopifnot(sum(duplicated(locations$LocationID))==0) # get list of unique waterbodies and municipalities waterbodies <- sort(unique(locations$WaterBodyID)) municipalities <- sort(unique(locations$MunicipalityID))
# load logan airport precipitation data from xlsx file precip <- myrwaR::load_precip_from_xls(path=precip_path, as.type='dataframe') # append hourly precip data from USGS (Muddy River) gage # if sample date is after end of logan dataset if (sample_date > as.Date(max(precip$Datetime))) { startDate <- format(with_tz(max(precip$Datetime)+hours(1), tz="America/New_York"), "%Y-%m-%dT%H:%M:%S") endDate <- format(sample_date+days(1), "%Y-%m-%d") precip_usgs <- readNWISuv(siteNumber="01104683", # muddy river parameterCd="00045", # precip startDate=startDate, endDate=endDate, tz="America/New_York") %>% select(Datetime=dateTime, Precip=X_00045_00011) %>% mutate(Datetime=with_tz(Datetime, tzone="EST")) %>% mutate(Datetime=floor_date(Datetime, unit='hour')) %>% group_by(Datetime) %>% summarise(Precip=sum(Precip)) precip <- rbind(precip, precip_usgs) }
# add weather classification (dry/wet) wq <- myrwaR::append_weather(wq=wq, precip=precip, period=48, precip.threshold=0.25, precip.name="Precip")
# get the weather class and 48 hour precip for sample date from logan precip sample_weather <- unique(as.character(wq[which(wq$Current), 'Weather'])) # if the individual samples have different 48-hour precip, use max sample_precip_48 <- max(unique(wq[which(wq$Current), 'Precip.48'])) # if some samples are Dry and others Wet, call the the overall event a Wet event if (length(sample_weather)>1 && 'Wet' %in% sample_weather) { sample_weather <- 'Wet' } stopifnot(length(sample_weather)==1) stopifnot(!is.na(sample_weather)) stopifnot(length(sample_precip_48)==1) stopifnot(!is.na(sample_precip_48)) # create title string for precipitation on sampling data precip_record <- paste0(sample_weather, " (", format(sample_precip_48, nsmall=1), "\" 48 hr prior to sampling)")
# find type of bacteria measurement bacteria_type <- intersect(unique(subset(wq, Current)$CharacteristicID), c("ECOLI", "ENT")) # check that samples only include one of ECOLI or ENT, not both or neither stopifnot(length(bacteria_type)==1) # data frame containing standards and labels for bacteria bac_standards <- data.frame(CharacteristicID=c('ECOLI', 'ENT'), BoatingStandard=c(1260, 350), SwimmingStandard=c(235, 104), BacteriaLabel=c('E. coli', 'Enterococcus')) # create bacteria label for plots and tables bac_label <- filter(bac_standards, CharacteristicID==bacteria_type)$BacteriaLabel if (bac_label=='Enterococcus') bac_label <- 'Entero.' # add BoatingStandard, SwimmingStandard and BacteriaLabel columns to wq data frame # and assign exceedence levels (low/med/high) wq <- left_join(wq, bac_standards) %>% mutate(ExceedanceLevel=ifelse(ResultValue>=BoatingStandard, 'high', ifelse(ResultValue>=SwimmingStandard, 'med', 'low')), ExceedanceLevel=ordered(ExceedanceLevel, levels=c('low', 'med', 'high')))
p.bar <- filter(wq, Current, CharacteristicID==bacteria_type, ProjectID=="HOTSPOT") %>% ggplot() + geom_bar(aes(x=LocationID, y=ResultValue, fill=ExceedanceLevel, width=0.65), stat="identity") + geom_hline(aes(yintercept=BoatingStandard), data=filter(bac_standards, CharacteristicID==bacteria_type), size=1, color='grey30') + geom_hline(aes(yintercept=SwimmingStandard), data=filter(bac_standards, CharacteristicID==bacteria_type), color='grey30') + geom_text(aes(x=1, y=BoatingStandard, label=paste0('Boating Standard: ', BoatingStandard, ' #/100ml')), data=filter(bac_standards, CharacteristicID==bacteria_type), hjust=0, vjust=1.1, alpha=0.8, size=3) + geom_text(aes(x=1, y=SwimmingStandard, label=paste0('Swimming Standard: ', SwimmingStandard, ' #/100ml')), data=filter(bac_standards, CharacteristicID==bacteria_type), hjust=0, vjust=1.1, alpha=0.8, size=3) + geom_text(aes(label=paste(Qualifier, str_trim(prettyNum(ResultValue, big.mark=",", scientific=F))), x=LocationID, y=ResultValue), position=position_dodge(width = 0.8), vjust=-.6, size =3.5) + labs(y=paste0(filter(bac_standards, CharacteristicID==bacteria_type)$BacteriaLabel, ' #/100ml'), x='Site ID') + scale_y_log10(breaks=10^seq(0, 7), labels=comma) + scale_fill_manual(values=c('high'="#DA635D", 'med'="#FF9933", 'low'="#6699CC"), drop=FALSE) + theme(axis.text.x=element_text(angle=45, hjust=1, vjust=NULL), plot.title=element_text(size = rel(1.2), hjust = 0)) + guides(fill=FALSE)
tblDataCharacteristics <- c(bacteria_type, "SURF_ANIONIC", "NH3") tblData <- filter(wq, LocationID %in% locations_hasflow) %>% select(Current, VisitID, LocationID, CharacteristicID, Datetime, Qualifier, ResultValue, WaterBodyID, Weather, ProjectID, Comment) %>% arrange(VisitID, LocationID, Datetime, WaterBodyID, Comment) %>% filter(CharacteristicID %in% tblDataCharacteristics) tblData <- mutate(tblData, ResultValue=ifelse(CharacteristicID==bacteria_type, prettyNum(ResultValue, digits=3, big.mark=",", scientific=F), prettyNum(ResultValue, nsmall=2, small.interval=2, scientific=F))) tblData <- mutate(tblData, ResultValue=ifelse(ResultValue=="NA", "", paste(Qualifier, str_trim(ResultValue)))) %>% select(-Qualifier) missing_characteristics <- setdiff(tblDataCharacteristics, unique(tblData$CharacteristicID)) tblData <- spread(tblData, CharacteristicID, ResultValue) if (length(missing_characteristics) > 0) { for (char in missing_characteristics) { tblData[, char] <- NA } } tblData <- arrange(tblData, LocationID) %>% mutate(Date=format(Datetime, "%Y-%m-%d"), Time=format(Datetime, "%H:%M"))
tblDataCurrent <- filter(tblData, Current, ProjectID=="HOTSPOT") %>% select_("LocationID", bacteria_type, "SURF_ANIONIC", "NH3", "WaterBodyID", "Comment") if (nrow(visits_noflow) > 0) { locations_noflow <- filter(locations, HasFlow==0) data_noflow <- select(locations_noflow, LocationID, WaterBodyID) %>% mutate(LocationID=paste0(LocationID, "*"), SURF_ANIONIC=NA, NH3=NA, Comment=NA) data_noflow[, bacteria_type] <- NA tblDataCurrent <- rbind(tblDataCurrent, data_noflow) %>% arrange(as.character(LocationID)) } xtblDataCurrent <- xtable(tblDataCurrent, caption=paste0("Data collected at outfalls during Hotspot sampling event on ", format(sample_date, '%B %d, %Y')), align=c("l","l", ">{\\raggedleft}p{0.5in}", rep(">{\\raggedleft}p{0.42in}", times=2), "l","p{2in}"), digits = c(0,0, 0,2,2, 0,0), include.rownames=FALSE, include.colnames=TRUE) colnames(xtblDataCurrent) <- c("Site ID", paste0(bac_label, ' (#/100ml)'), "Surfact. (ppt)", "NH3 (mg/l)", "Water Body", "Comment")
xtblLocations <- mutate(locations, LocationID=ifelse(HasFlow==1, LocationID, paste0(LocationID, "*"))) %>% select(LocationID, MunicipalityID, WaterBodyID, Latitude, Longitude, LocationDescription) %>% arrange(LocationID) %>% xtable(caption=paste0("Locations visited during Hotspot sampling event on ", format(sample_date, '%B %d, %Y')), align=c("l","l","l","l", "r","r","p{2in}"), digits = c(0, 0, 0, 0, 4, 4, 0), include.rownames=FALSE, include.colnames=T) # rename columns colnames(xtblLocations) <- plyr::revalue(colnames(xtblLocations), c("LocationID"="Site ID", "MunicipalityID"="Municipality", "WaterBodyID"="Waterbody", "Latitude"="Latitude", "Longitude"="Longitude", "LocationDescription"="Location Description"))
wq.historical <- filter(wq, CharacteristicID==bacteria_type, LocationID %in% locations_hasflow) wq.historical.count <- group_by(wq.historical, LocationID) %>% summarize(N=n()) wq.historical <- left_join(wq.historical, wq.historical.count) p.box <- filter(wq.historical, N>=5) %>% ggplot(aes(LocationID, ResultValue)) + geom_boxplot(fill='grey90') + geom_point(mapping=aes(color=Weather), data=wq.historical, size=4) + geom_point(data=filter(wq.historical, Current), pch=1, size=4, color='red') + geom_hline(aes(yintercept=BoatingStandard), data=filter(bac_standards, CharacteristicID==bacteria_type), color='grey30', size=1) + geom_hline(aes(yintercept=SwimmingStandard), data=filter(bac_standards, CharacteristicID==bacteria_type), color='grey30') + geom_text(aes(x=1, y=BoatingStandard, label=paste0('Boating Standard: ', BoatingStandard, ' #/100ml')), data=filter(bac_standards, CharacteristicID==bacteria_type), hjust=0, vjust=1.1, alpha=0.8, size=3) + geom_text(aes(x=1, y=SwimmingStandard, label=paste0('Swimming Standard: ', SwimmingStandard, ' #/100ml')), data=filter(bac_standards, CharacteristicID==bacteria_type), hjust=0, vjust=1.1, alpha=0.8, size=3) + labs(y=paste0(filter(bac_standards, CharacteristicID==bacteria_type)$BacteriaLabel, ' #/100ml'), x='Site ID') + scale_y_log10(breaks=10^seq(0, 7), labels=comma) + scale_color_manual('Weather', values=c('Dry'='cadetblue2', 'Wet'='deepskyblue4'), drop=FALSE) + theme(axis.text.x=element_text(angle=45, hjust=1, vjust=NULL)) # get ymax for placing sample counts ymax <- 10^ggplot_build(p.box)$panel$ranges[[1]]$y.range[2] p.box <- p.box + geom_text(mapping=aes(x=LocationID, y=ymax, label=paste0('(',N,')')), data=wq.historical.count, size=4)
tbl_historical <- function(loc) { tblDataHistorical <- filter(tblData, LocationID==loc) %>% arrange(desc(Date)) %>% select_("Date", bacteria_type, "SURF_ANIONIC", "NH3", "Weather", "ProjectID") %>% mutate(ProjectID=plyr::revalue(ProjectID, c('HOTSPOT'='Hotspot', 'MUNI'='Municipal'))) xtblDataHistorical <- xtable(tblDataHistorical, caption=paste("Historical Data for ", loc), align=c("l","l","r","r","r","r","l"), digits = c(0,0,0,2,2,0,0), include.rownames=FALSE, include.colnames=TRUE ) colnames(xtblDataHistorical) <- c("Date", paste0(bac_label, ' (#/100ml)'), "Surfact. (ppt)", "NH3 (mg/l)", "Weather", "Project") # create shade for rows with wet weather rws <- which(tblDataHistorical$Weather=="Wet")-1 shade <- rep('\\rowcolor[gray]{0.9}', length(rws)) print(xtblDataHistorical, latex.environments=NULL, table.placement='H', include.rownames=FALSE, caption.placement="top", floating=TRUE, hline.after=c(0), NA.string='', add.to.row=list(pos=as.list(rws), command=shade)) }
\textbf{Hotspot Sampling Date}: r format(sample_date, "%B %d, %Y")
\textbf{Municipalities}: r paste(municipalities, collapse=', ')
\textbf{Precipitation Record}: r precip_record
\textbf{Report Date}: r rmarkdown::metadata$report_date
\textbf{Technical Report}: r rmarkdown::metadata$report_number
The Mystic River Watershed Association conducted dry-weather monitoring in the municipalities of Somerville and Melrose on r format(sample_date, "%B %d, %Y")
. The main goal of the monitoring event was to evaluate conditions at stormwater outfalls and other sites that have not been sampled extensively in the past few years by MyRWA and other sites of interest. Water quality samples were taken in accordance with the MyRWA MA-DEP approved QAPP along with accurate notes.
Testing revealed that six of ten tested sites had E. coli values that exceeded MassDEP Water Quality Standards for Class B water bodies for swimming. The highest E. coli values were found at Ell Pond, at outfall MEL003, where E. coli levels were 6910 MPN E. coli / 100 ml, a value which exceeds the standard for boating as well.
MassDEP Water Quality Standards for Class B water bodies are 1260 E. coli / 100 ml. for boating and 235 E. coli / 100 ml for swimming.
This report was prepared by:
Mystic River Watershed Association 20 Academy St. Suite 306
Arlington, MA 02476
781-316-3438
www.mysticriver.org
Principal Investigators: Patrick Herron and Andrew Hrycyna
Water Quality Monitors: r monitors
Lab Analysis: EPA Region 1 - Chelmsford
This report describes monitoring data collected by MyRWA under its MassDEP-approved Quality Assurance Project Plan.
This project was undertaken in connection with settlement of an enforcement action, United States v. Sterling Suffolk Racecourse LLC, taken on behalf of the U.S. Environmental Protection Agency under the Clean Water Act.
EPA Region 1 Laboratory in Chelmsford, MA conducted all laboratory sample analyses. These services were provided in-kind to this project.
This report should be cited in the literature in the following manner:
r rmarkdown::metadata$title
--Technical Reportr rmarkdown::metadata$report_number
. Mystic River Watershed Association, Arlington, MA.
On r format(sample_date, "%B %d, %Y")
, MyRWA conducted a dry-weather sampling event at Alewife Brook, Ell Pond and the Malden Canal. During this sampling event, MyRWA collected ten water samples and tested them for bacteria levels to identify potential sources of contamination that could affect water quality in the surface water bodies.
Elevated E. coli levels adversely impacting water quality were observed at six of 10 sites surveyed, where measured values exceeded MassDEP Water Quality Standards for swimming safety in Class B water bodies. In addition, at one site near Ell Pond, E. coli levels exceeded the standard for safe boating as well.
Results suggest that two sites should be prioritized, and others followed up on:
\clearpage
\begin{figure}[h]
\caption{Bacteria Levels Measured on r format(sample_date, '%B %d, %Y')
}
print(p.bar)
\end{figure}
print(xtblDataCurrent, floating=TRUE, latex.environments=NULL, table.placement='H', include.rownames=FALSE, caption.placement="top", hline.after=c(0), NA.string='') if (nrow(visits_noflow) > 0) { cat("\\textit{* Location visited but not sampled due to no flow}") }
print(xtblLocations, latex.environments=NULL, table.placement='H', include.rownames=FALSE, caption.placement="top", floating=TRUE, hline.after=c(0), NA.string='') if (nrow(visits_noflow) > 0) { cat("\\textit{* Location visited but not sampled due to no flow}") }
\clearpage
\begin{figure}[ht]
\caption{Distribution of historical bacteria data at outfalls sampled during Hotspot event on r format(sample_date, '%B %d, %Y')
}
p.box
\end{figure}
Boxplots show the median (center blank line), inter-quartile range (25th-75th percentiles, top/bottom of box), and range of extreme values (vertical lines). Values at the top of the chart indicate the number of samples for each outfall. Points with red outline were sampled during the Hotspot event on r format(sample_date, '%B %d, %Y')
. Box-and-whiskers only shown for sites with at least 5 samples.
\clearpage
for (loc in locations_hasflow) { tbl_historical(loc) }
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.