In walkerjeffd/myrwaR: Tools for the Mystic River Watershed Association

# Path to water quality database and precipitation spreadsheet
base_path <- '\\\\psf/Dropbox/MysticDB/'
db_path <- file.path(base_path, 'MysticDB_20150227.accdb')
precip_path <- file.path(base_path, 'Processed/Precip/LoganPrecip.xlsx')

sample_date <- "2014-11-10"  # use YYYY-MM-DD format
monitors <- "Andrew Hrycyna" # use single string for multiple (e.g. "Andy Hrycyna, Patrick Herron")

# rename specific LocationIDs to match MyRWA and Municipalities
# example: replaces "Blanchard Road" with "CAMD03OF0000"
# rename_locations <- c("Blanchard Road"="CAMD03OF0000")
rename_locations <- c()

library(myrwaR)
library(RODBC)
library(knitr)
library(ggplot2)
library(reshape2)
library(scales)
library(dplyr)
library(lubridate)
library(gridExtra)
library(xtable)
library(stringr)
library(tidyr)
library(dataRetrieval)

theme_set(theme_bw())

options(stringsAsFactors=FALSE)
options("xtable.comment"=FALSE)
knitr::opts_chunk$set(echo=FALSE, message=FALSE, warning=FALSE)

# check db and precip paths 
if (!file.exists(db_path)) {
  stop(paste0('Could not find database: ', db_path))
}
if (!file.exists(precip_path)) {
  stop(paste0('Could not find precipitation file: ', precip_path))
}

# parse sample date
sample_date <- as.Date(sample_date)

# load all hotspot and municipal data from database
wq <- myrwaR::load_wq(path=db_path,
                      projects=c('HOTSPOT', 'MUNI'),
                      sample_types=NULL,
                      exclude_flags=FALSE) %>%
  mutate(Qualifier=ifelse(is.na(Qualifier), "", Qualifier),
         Date=as.Date(Datetime, unit='day')) %>%
  filter(Date <= sample_date)

# assign flag for samples on current sample date
wq$Current <- wq$Date==sample_date
stopifnot(sum(wq$Current)>0) # check there is at least one sample for sample_date

# get visits
ch <- myrwaR::db_connect(path = db_path)
visits <- ch %>%
  myrwaR::db_table(table_name = 'Visit') %>%
  mutate(Date=as.Date(Datetime)) %>%
  filter(ProjectID=="HOTSPOT", Date==sample_date)

# get locations
locations <- ch %>%
  myrwaR::db_table(table_name = 'Location') %>%
  rename(LocationID=ID) %>%
  filter(LocationID %in% visits$LocationID) %>%
  select(LocationID, MunicipalityID, Latitude, Longitude, WaterBodyID, LocationDescription)
close(ch)

# fix WaterBodyID issue by trimming extra spaces (e.g. "Aberjona River " -> "Aberjona River")
wq$WaterBodyID <- str_trim(wq$WaterBodyID)
locations$WaterBodyID <- str_trim(locations$WaterBodyID)

# revalue LocationID
wq$LocationID <- plyr::revalue(wq$LocationID, rename_locations)
visits$LocationID <- plyr::revalue(visits$LocationID, rename_locations)
locations$LocationID <- plyr::revalue(locations$LocationID, rename_locations)

# get visits with no flows
visits_noflow <- filter(visits, HasFlow==0)

locations <- arrange(locations, LocationID)
locations <- left_join(locations, select(visits, LocationID, HasFlow))

locations_hasflow <- locations[which(locations$HasFlow==1), 'LocationID']

# check that LocationID is not duplicated in locations table
stopifnot(sum(duplicated(locations$LocationID))==0) 

# get list of unique waterbodies and municipalities
waterbodies <- sort(unique(locations$WaterBodyID))
municipalities <- sort(unique(locations$MunicipalityID))

# load logan airport precipitation data from xlsx file
precip <- myrwaR::load_precip_from_xls(path=precip_path, as.type='dataframe')

# append hourly precip data from USGS (Muddy River) gage
# if sample date is after end of logan dataset
if (sample_date > as.Date(max(precip$Datetime))) {
  startDate <- format(with_tz(max(precip$Datetime)+hours(1), tz="America/New_York"), 
                      "%Y-%m-%dT%H:%M:%S")
  endDate <- format(sample_date+days(1), "%Y-%m-%d")

  precip_usgs <- readNWISuv(siteNumber="01104683",  # muddy river
                            parameterCd="00045",    # precip 
                            startDate=startDate, 
                            endDate=endDate, 
                            tz="America/New_York") %>%
    select(Datetime=dateTime, Precip=X_00045_00011) %>%
    mutate(Datetime=with_tz(Datetime, tzone="EST")) %>%
    mutate(Datetime=floor_date(Datetime, unit='hour')) %>%
    group_by(Datetime) %>%
    summarise(Precip=sum(Precip))
  precip <- rbind(precip, precip_usgs)
}

# add weather classification (dry/wet)
wq <- myrwaR::append_weather(wq=wq, precip=precip, 
                             period=48, precip.threshold=0.25, 
                             precip.name="Precip")

# get the weather class and 48 hour precip for sample date from logan precip
sample_weather <- unique(as.character(wq[which(wq$Current), 'Weather']))

# if the individual samples have different 48-hour precip, use max
sample_precip_48 <- max(unique(wq[which(wq$Current), 'Precip.48']))

# if some samples are Dry and others Wet, call the the overall event a Wet event
if (length(sample_weather)>1 && 'Wet' %in% sample_weather) {
  sample_weather <- 'Wet'
}

stopifnot(length(sample_weather)==1)
stopifnot(!is.na(sample_weather))
stopifnot(length(sample_precip_48)==1)
stopifnot(!is.na(sample_precip_48))

# create title string for precipitation on sampling data
precip_record <- paste0(sample_weather, " (", format(sample_precip_48, nsmall=1), 
                        "\" 48 hr prior to sampling)")

# find type of bacteria measurement
bacteria_type <- intersect(unique(subset(wq, Current)$CharacteristicID), c("ECOLI", "ENT"))

# check that samples only include one of ECOLI or ENT, not both or neither
stopifnot(length(bacteria_type)==1) 

# data frame containing standards and labels for bacteria
bac_standards <- data.frame(CharacteristicID=c('ECOLI', 'ENT'),
                            BoatingStandard=c(1260, 350),
                            SwimmingStandard=c(235, 104),
                            BacteriaLabel=c('E. coli', 'Enterococcus'))

# create bacteria label for plots and tables
bac_label <- filter(bac_standards, CharacteristicID==bacteria_type)$BacteriaLabel
if (bac_label=='Enterococcus') bac_label <- 'Entero.'

# add BoatingStandard, SwimmingStandard and BacteriaLabel columns to wq data frame
# and assign exceedence levels (low/med/high)
wq <- left_join(wq, bac_standards) %>%
  mutate(ExceedanceLevel=ifelse(ResultValue>=BoatingStandard,
                                'high',
                                ifelse(ResultValue>=SwimmingStandard,
                                       'med', 'low')),
         ExceedanceLevel=ordered(ExceedanceLevel, levels=c('low', 'med', 'high')))

p.bar <- filter(wq, Current, CharacteristicID==bacteria_type, ProjectID=="HOTSPOT") %>%
  ggplot() +
  geom_bar(aes(x=LocationID, y=ResultValue, fill=ExceedanceLevel, width=0.65), stat="identity") +
  geom_hline(aes(yintercept=BoatingStandard), 
             data=filter(bac_standards, CharacteristicID==bacteria_type),
             size=1, color='grey30') +
  geom_hline(aes(yintercept=SwimmingStandard), 
             data=filter(bac_standards, CharacteristicID==bacteria_type),
             color='grey30') +
  geom_text(aes(x=1, y=BoatingStandard,
                label=paste0('Boating Standard: ', BoatingStandard, ' #/100ml')), 
            data=filter(bac_standards, CharacteristicID==bacteria_type), 
            hjust=0, vjust=1.1, alpha=0.8, size=3) +
  geom_text(aes(x=1, y=SwimmingStandard,
                label=paste0('Swimming Standard: ', SwimmingStandard, ' #/100ml')), 
            data=filter(bac_standards, CharacteristicID==bacteria_type), 
            hjust=0, vjust=1.1, alpha=0.8, size=3) +
  geom_text(aes(label=paste(Qualifier, 
                            str_trim(prettyNum(ResultValue, big.mark=",", scientific=F))),
                x=LocationID, y=ResultValue),
            position=position_dodge(width = 0.8), 
            vjust=-.6, size =3.5) +
  labs(y=paste0(filter(bac_standards, CharacteristicID==bacteria_type)$BacteriaLabel, ' #/100ml'),
       x='Site ID') +
  scale_y_log10(breaks=10^seq(0, 7), labels=comma) +
  scale_fill_manual(values=c('high'="#DA635D", 'med'="#FF9933", 'low'="#6699CC"), drop=FALSE) +
  theme(axis.text.x=element_text(angle=45, hjust=1, vjust=NULL),
        plot.title=element_text(size = rel(1.2), hjust = 0)) +
  guides(fill=FALSE)

tblDataCharacteristics <- c(bacteria_type, "SURF_ANIONIC", "NH3")

tblData <- filter(wq, LocationID %in% locations_hasflow) %>%
  select(Current, VisitID, LocationID, CharacteristicID, 
         Datetime, Qualifier, ResultValue, WaterBodyID, Weather, ProjectID, Comment) %>%
  arrange(VisitID, LocationID, Datetime, WaterBodyID, Comment) %>%
  filter(CharacteristicID %in% tblDataCharacteristics)

tblData <- mutate(tblData,
                  ResultValue=ifelse(CharacteristicID==bacteria_type,
                                     prettyNum(ResultValue, digits=3, big.mark=",", scientific=F),
                                     prettyNum(ResultValue, nsmall=2, small.interval=2, scientific=F)))

tblData <- mutate(tblData,
                  ResultValue=ifelse(ResultValue=="NA", "", 
                                     paste(Qualifier, str_trim(ResultValue)))) %>%
  select(-Qualifier)

missing_characteristics <- setdiff(tblDataCharacteristics, unique(tblData$CharacteristicID))

tblData <- spread(tblData, CharacteristicID, ResultValue)

if (length(missing_characteristics) > 0) {
  for (char in missing_characteristics) {
    tblData[, char] <- NA
  }
}

tblData <- arrange(tblData, LocationID) %>%
  mutate(Date=format(Datetime, "%Y-%m-%d"),
         Time=format(Datetime, "%H:%M"))

tblDataCurrent <- filter(tblData, Current, ProjectID=="HOTSPOT") %>%
  select_("LocationID", bacteria_type, "SURF_ANIONIC", "NH3", "WaterBodyID", "Comment")

if (nrow(visits_noflow) > 0) {
  locations_noflow <- filter(locations, HasFlow==0)
  data_noflow <- select(locations_noflow, LocationID, WaterBodyID) %>%
    mutate(LocationID=paste0(LocationID, "*"),
           SURF_ANIONIC=NA,
           NH3=NA,
           Comment=NA)
  data_noflow[, bacteria_type] <- NA
  tblDataCurrent <- rbind(tblDataCurrent, data_noflow) %>%
    arrange(as.character(LocationID))
}

xtblDataCurrent <- xtable(tblDataCurrent, 
  caption=paste0("Data collected at outfalls during Hotspot sampling event on ", 
                 format(sample_date, '%B %d, %Y')),
  align=c("l","l",
         ">{\\raggedleft}p{0.5in}",
         rep(">{\\raggedleft}p{0.42in}", times=2),
         "l","p{2in}"),
  digits = c(0,0,
            0,2,2,
            0,0),
  include.rownames=FALSE, 
  include.colnames=TRUE)

colnames(xtblDataCurrent) <- c("Site ID", paste0(bac_label, ' (#/100ml)'),
                               "Surfact. (ppt)", "NH3 (mg/l)", 
                               "Water Body", "Comment")

xtblLocations <- mutate(locations,
                        LocationID=ifelse(HasFlow==1, LocationID, paste0(LocationID, "*"))) %>%
  select(LocationID, MunicipalityID, WaterBodyID, Latitude, Longitude, LocationDescription) %>%
  arrange(LocationID) %>%
  xtable(caption=paste0("Locations visited during Hotspot sampling event on ", 
                        format(sample_date, '%B %d, %Y')),
         align=c("l","l","l","l",
                 "r","r","p{2in}"),
         digits = c(0, 0, 0, 0, 4, 4, 0),
         include.rownames=FALSE, include.colnames=T)

# rename columns
colnames(xtblLocations) <- plyr::revalue(colnames(xtblLocations),
                                         c("LocationID"="Site ID",
                                           "MunicipalityID"="Municipality",
                                           "WaterBodyID"="Waterbody",
                                           "Latitude"="Latitude",
                                           "Longitude"="Longitude",
                                           "LocationDescription"="Location Description"))

wq.historical <- filter(wq, 
                        CharacteristicID==bacteria_type, 
                        LocationID %in% locations_hasflow)
wq.historical.count <- group_by(wq.historical, LocationID) %>%
  summarize(N=n())

wq.historical <- left_join(wq.historical, wq.historical.count)

p.box <- filter(wq.historical, N>=5) %>%
  ggplot(aes(LocationID, ResultValue)) +
  geom_boxplot(fill='grey90') +
  geom_point(mapping=aes(color=Weather), data=wq.historical, size=4) +
  geom_point(data=filter(wq.historical, Current), pch=1, size=4, color='red') +
  geom_hline(aes(yintercept=BoatingStandard), 
             data=filter(bac_standards, CharacteristicID==bacteria_type),
             color='grey30', size=1) +
  geom_hline(aes(yintercept=SwimmingStandard), 
             data=filter(bac_standards, CharacteristicID==bacteria_type),
             color='grey30') +
  geom_text(aes(x=1, y=BoatingStandard,
                label=paste0('Boating Standard: ', BoatingStandard, ' #/100ml')), 
            data=filter(bac_standards, CharacteristicID==bacteria_type), 
            hjust=0, vjust=1.1, alpha=0.8, size=3) +
  geom_text(aes(x=1, y=SwimmingStandard,
                label=paste0('Swimming Standard: ', SwimmingStandard, ' #/100ml')), 
            data=filter(bac_standards, CharacteristicID==bacteria_type), 
            hjust=0, vjust=1.1, alpha=0.8, size=3) +
  labs(y=paste0(filter(bac_standards, CharacteristicID==bacteria_type)$BacteriaLabel, ' #/100ml'),
       x='Site ID') +
  scale_y_log10(breaks=10^seq(0, 7), labels=comma) +
  scale_color_manual('Weather', values=c('Dry'='cadetblue2', 'Wet'='deepskyblue4'), drop=FALSE) +
  theme(axis.text.x=element_text(angle=45, hjust=1, vjust=NULL))

# get ymax for placing sample counts
ymax <- 10^ggplot_build(p.box)$panel$ranges[[1]]$y.range[2]

p.box <- p.box + 
  geom_text(mapping=aes(x=LocationID, y=ymax, label=paste0('(',N,')')), 
            data=wq.historical.count, size=4)

tbl_historical <- function(loc) {
  tblDataHistorical <- filter(tblData, LocationID==loc) %>%
    arrange(desc(Date)) %>%
    select_("Date", bacteria_type, "SURF_ANIONIC", "NH3", "Weather", "ProjectID") %>%
    mutate(ProjectID=plyr::revalue(ProjectID, c('HOTSPOT'='Hotspot', 'MUNI'='Municipal')))

  xtblDataHistorical <- xtable(tblDataHistorical, 
                               caption=paste("Historical Data for ", loc),
                               align=c("l","l","r","r","r","r","l"),
                               digits = c(0,0,0,2,2,0,0),
                               include.rownames=FALSE, 
                               include.colnames=TRUE )

  colnames(xtblDataHistorical) <- c("Date", paste0(bac_label, ' (#/100ml)'),
                          "Surfact. (ppt)", "NH3 (mg/l)", 
                          "Weather", "Project")

  # create shade for rows with wet weather
  rws <- which(tblDataHistorical$Weather=="Wet")-1
  shade <- rep('\\rowcolor[gray]{0.9}', length(rws))

  print(xtblDataHistorical, 
        latex.environments=NULL,
        table.placement='H',
        include.rownames=FALSE, 
        caption.placement="top",
        floating=TRUE,
        hline.after=c(0),
        NA.string='',
        add.to.row=list(pos=as.list(rws), command=shade))
}

\textbf{Hotspot Sampling Date}: r format(sample_date, "%B %d, %Y")

\textbf{Municipalities}: r paste(municipalities, collapse=', ')

\textbf{Precipitation Record}: r precip_record

\textbf{Report Date}: r rmarkdown::metadata$report_date

\textbf{Technical Report}: r rmarkdown::metadata$report_number

Executive Summary

The Mystic River Watershed Association conducted dry-weather monitoring in the municipalities of Somerville and Melrose on r format(sample_date, "%B %d, %Y"). The main goal of the monitoring event was to evaluate conditions at stormwater outfalls and other sites that have not been sampled extensively in the past few years by MyRWA and other sites of interest. Water quality samples were taken in accordance with the MyRWA MA-DEP approved QAPP along with accurate notes.

Testing revealed that six of ten tested sites had E. coli values that exceeded MassDEP Water Quality Standards for Class B water bodies for swimming. The highest E. coli values were found at Ell Pond, at outfall MEL003, where E. coli levels were 6910 MPN E. coli / 100 ml, a value which exceeds the standard for boating as well.

MassDEP Water Quality Standards for Class B water bodies are 1260 E. coli / 100 ml. for boating and 235 E. coli / 100 ml for swimming.

Citation

This report was prepared by:

Mystic River Watershed Association 20 Academy St. Suite 306
Arlington, MA 02476
781-316-3438
www.mysticriver.org

Principal Investigators: Patrick Herron and Andrew Hrycyna

Water Quality Monitors: r monitors

Lab Analysis: EPA Region 1 - Chelmsford

This report describes monitoring data collected by MyRWA under its MassDEP-approved Quality Assurance Project Plan.

This project was undertaken in connection with settlement of an enforcement action, United States v. Sterling Suffolk Racecourse LLC, taken on behalf of the U.S. Environmental Protection Agency under the Clean Water Act.

EPA Region 1 Laboratory in Chelmsford, MA conducted all laboratory sample analyses. These services were provided in-kind to this project.

This report should be cited in the literature in the following manner:

r rmarkdown::metadata$title--Technical Report r rmarkdown::metadata$report_number. Mystic River Watershed Association, Arlington, MA.

Water Quality Results

On r format(sample_date, "%B %d, %Y"), MyRWA conducted a dry-weather sampling event at Alewife Brook, Ell Pond and the Malden Canal. During this sampling event, MyRWA collected ten water samples and tested them for bacteria levels to identify potential sources of contamination that could affect water quality in the surface water bodies.

Elevated E. coli levels adversely impacting water quality were observed at six of 10 sites surveyed, where measured values exceeded MassDEP Water Quality Standards for swimming safety in Class B water bodies. In addition, at one site near Ell Pond, E. coli levels exceeded the standard for safe boating as well.

Conclusions and Action Items

Results suggest that two sites should be prioritized, and others followed up on:

The two sites near Ell Pond, MEL003 and MEL002 showed the highest E. coli values, exceeding or approaching the standard for boating safety. Both show elevated levels over the past several years in dry weather, indicating persistent problems. Additional testing should be done to determine the cause of this contamination.
Prioritize other outfalls whose values exceeded water quality standard, as per table and maps. Note that two stream samples from Malden Canal showed moderately high levels of bacteria.

\clearpage

\begin{figure}[h] \caption{Bacteria Levels Measured on r format(sample_date, '%B %d, %Y')}

print(p.bar)

\end{figure}

print(xtblDataCurrent,
      floating=TRUE,
      latex.environments=NULL,
      table.placement='H',
      include.rownames=FALSE, 
      caption.placement="top", 
      hline.after=c(0),
      NA.string='')
if (nrow(visits_noflow) > 0) {
  cat("\\textit{* Location visited but not sampled due to no flow}")
}

print(xtblLocations, 
      latex.environments=NULL,
      table.placement='H',
      include.rownames=FALSE, 
      caption.placement="top",
      floating=TRUE,
      hline.after=c(0),
      NA.string='')
if (nrow(visits_noflow) > 0) {
  cat("\\textit{* Location visited but not sampled due to no flow}")
}

\clearpage

Appendix 2: Historical Data

\begin{figure}[ht] \caption{Distribution of historical bacteria data at outfalls sampled during Hotspot event on r format(sample_date, '%B %d, %Y')}

p.box

\end{figure}

Boxplots show the median (center blank line), inter-quartile range (25th-75th percentiles, top/bottom of box), and range of extreme values (vertical lines). Values at the top of the chart indicate the number of samples for each outfall. Points with red outline were sampled during the Hotspot event on r format(sample_date, '%B %d, %Y'). Box-and-whiskers only shown for sites with at least 5 samples.

\clearpage

for (loc in locations_hasflow) {
  tbl_historical(loc)
}

walkerjeffd/myrwaR documentation built on May 3, 2019, 10:46 p.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

walkerjeffd/myrwaR
Tools for the Mystic River Watershed Association

In walkerjeffd/myrwaR: Tools for the Mystic River Watershed Association

Executive Summary

Citation

Water Quality Results

Conclusions and Action Items

Appendix 2: Historical Data

R Package Documentation

Browse R Packages

We want your feedback!

walkerjeffd/myrwaR Tools for the Mystic River Watershed Association

In walkerjeffd/myrwaR: Tools for the Mystic River Watershed Association

Executive Summary

Citation

Water Quality Results

Conclusions and Action Items

Appendix 2: Historical Data

R Package Documentation

Browse R Packages

We want your feedback!

walkerjeffd/myrwaR
Tools for the Mystic River Watershed Association