#' Auto-validate USEPA WQP stations
#'
#' Performs auto-validation on previously queried WQP stations combined with the existing master site list.
#' Auto-validates any new sites (i.e. those not currently in the master site list) and appends them into existing master site list flagged for acceptance, rejection, or review.
#' Checks for any new site types in new data. A warning message and a list of new site types is printed if new site types are encountered.
#' Also re-auto-validates any sites in the master site list that have previously only undergone auto-validation (to account for any changes in auto validation process) and checks existing master site list for changes in AUs, selected site types, or property boundaries.
#'
#' @param sites_object Sites object queried from WQP to be reviewed.
#' @param master_site_file Full path and filename of master site list as generated by autoValidateWQPsites function and manual site review application. Also contains a sheet of waterbody types and their associated IR_FLAGs (.xlsx).
#' @return Exports a new, undated master site list to the location & filename provided by the user.
#' @import sp
#' @import lwgeom
#' @import sf
#' @import wqTools
#' @importFrom plyr rbind.fill
#' @export
autoValidateWQPsites=function(sites_object,master_site_file,correct_longitude=FALSE){
#########
###TESTING SETUP
#library(sp)
#library(sf)
#sites_object=read.csv("C:/Users/jvander/Documents/R/irTools-test-16/01-raw-data/sites-2019-04-04.csv")
#master_site_file="P:/WQ/Integrated Report/IR_2020/2020-IR-assessments/spatial_files/IR_master_site_file-including-manual-edits.xlsx"
#waterbody_type_file = "C:/Users/jvander/Documents/R/irTools-test-16/00-lookup-tables/waterbody_type_domain_table.csv"
#correct_longitude=FALSE
########
# Polygon intersection function
intpoly <- function(polygon, sites_object, sites){
isect=suppressMessages({suppressWarnings({st_intersection(sites, st_difference(st_make_valid(polygon)))})})
st_geometry(isect)=NULL
check=dim(sites_object)[1]
sites_object=merge(sites_object,isect,all.x=TRUE)
if(dim(sites_object)[1]!=check){
stop("Spatial join and merge causing duplicated values.")
}
return(sites_object)
}
print("Reading in sites_file and master_sites_file and checking for new waterbody types...")
# Read in master site table and waterbody type domain tables
waterbody_table = as.data.frame(readxl::read_xlsx(master_site_file, sheet = "waterbodyTypeTable"))
master_site = as.data.frame(readxl::read_xlsx(master_site_file, sheet = "masterSiteTable"))
master_site = dplyr::na_if(master_site, "NA")
# Read in WQP station and results data
stn = sites_object
stn[stn==""]=NA #Make sure all blanks are NA
stn=unique(stn)
# Turn any factors to character vectors for merging w/ master sites
i <- sapply(stn, is.factor)
stn[i] <- lapply(stn[i], as.character)
# Ensure waterbody type table does not contain any unreviewed waterbody types
if(any(is.na(waterbody_table$MonitoringLocationTypeName)|waterbody_table$MonitoringLocationTypeName=="")){
stop("Monitoring location type table missing IR_FLAG data. Fill out table before proceeding.")
}
# Remove past InData column
waterbody_table=waterbody_table[,!names(waterbody_table)%in%"InData"]
# Add waterbody types from sites file to domain table
stn_site_types=data.frame(unique(stn$MonitoringLocationTypeName))
colnames(stn_site_types)="MonitoringLocationTypeName"
stn_site_types$InData="Y"
waterbody_merge <- merge(waterbody_table,stn_site_types, all=TRUE)
# Prompt user to fill out domain table before moving on to autovalidation.
if(dim(waterbody_merge)[1]>dim(waterbody_table)[1]){
write.csv(waterbody_merge,"waterbody_type_table_new.csv",row.names = FALSE)
new <- dim(waterbody_merge)[1]-dim(waterbody_table)[1]
stop(paste(new,"new monitoring location types detected and added to domain table. Update IR_FLAG column in domain table before proceeding with autovalidation tool."))}else{print("No new monitoring location types detected.")
}
site_type_keep <- subset(waterbody_merge$MonitoringLocationTypeName, waterbody_merge$IR_FLAG=="ACCEPT")
# Ensure no duplicates in master site file which could cause erroneous duplication during merges.
master_site=unique(master_site)
orig_master <- dim(master_site) # original count of the number of records in master_site
print(paste(orig_master[1],"total master site records in file."))
ms_dim=dim(master_site)[1]
if(dim(master_site)[1]>0){
master_site[master_site==""]=NA
master_site[master_site=="NA"]=NA} #Make sure all blanks are NA
if(length(master_site$ValidationType[is.na(master_site$ValidationType)==TRUE])>0){
stop("NA's detected in ValidationType column of master_site_file. Correct NA's before re-running autovalidateWQPsites.")
}
suppressWarnings({class(stn$HorizontalAccuracyMeasure.MeasureValue)="numeric"}) #Non-numeric values introduced to this column were causing appearance of duplicates in master_site_file
suppressWarnings({class(master_site$HorizontalAccuracyMeasure.MeasureValue)="numeric"})
# Identify any new sites (all review columns == NA) and move to new data frame (stn_new)
stn2=merge(stn, master_site[,c('UID','OrganizationIdentifier','OrganizationFormalName','ProviderName','MonitoringLocationIdentifier','MonitoringLocationName','MonitoringLocationTypeName')], all.x=TRUE)
stn_new=stn2[is.na(stn2$UID),]
dim(stn_new)
print(paste(dim(stn_new)[1],"sites found in sites_file not present in master_site_file."))
if(dim(stn_new)[1]==0){
readline(prompt="Press [enter] to continue with master_site autovalidation or [esc] to end function.")
}else{readline(prompt="Press [enter] to continue.")}
rm(stn2)
#Assign UID to new sites
if(dim(master_site)[1]>0&dim(stn_new)[1]>0){
stn_new$UID=seq(1:dim(stn_new)[1])+max(master_site$UID)
}else{
if(dim(stn_new)[1]>0){
stn_new$UID=seq(1:dim(stn_new)[1])}
}
# Read in polygons
au_poly=wqTools::au_poly
au_poly=au_poly[,c("ASSESS_ID","AU_NAME","AU_Type")]
bu_poly=wqTools::bu_poly
bu_poly=bu_poly[,!names(bu_poly) %in% 'Status']
names(bu_poly)[names(bu_poly)=='bu_class']='BEN_CLASS'
ss_poly=wqTools::ss_poly
ss_poly=ss_poly[,"ss_descrp"]
names(ss_poly)[names(ss_poly)=='ss_descrp']='ss_R317Descrp'
ut_poly=wqTools::ut_poly
ut_poly=ut_poly[,"jurisdict"]
#suppressWarnings({ut_poly=st_buffer(ut_poly, 0)})
gsl_poly=subset(au_poly, AU_NAME=='Gilbert Bay' | AU_NAME=='Gunnison Bay' | AU_NAME=='Farmington Bay' | AU_NAME=='Bear River Bay')
gsl_poly$gsl='y'
wmu_poly=wqTools::wmu_poly
##################################
####Master site reviews
if(dim(master_site)[1]>0){
#Kick master sites that were previously accepted or merged, but are now at a non-assessed site type etc. to AUTO validation (convert review type to AUTO)
#(where they will be rejected, doing this here keeps them out of later master site spatial count based checks)
print("Performing master site reviews...")
#Site type
table(master_site$ValidationType)
master_site$ValidationType[!master_site$MonitoringLocationTypeName %in% site_type_keep]="AUTO"
table(master_site$ValidationType)
# ID master sites where fundamental attributes have changed (MonitoringLocationTypeName, Lat/Long has been updated in WQP, send them to auto review
stn2=stn[,c('OrganizationIdentifier','OrganizationFormalName','ProviderName','MonitoringLocationIdentifier','MonitoringLocationName','MonitoringLocationTypeName', 'LatitudeMeasure', 'LongitudeMeasure')]
names(stn2)=c('OrganizationIdentifier','OrganizationFormalName','ProviderName','MonitoringLocationIdentifier','MonitoringLocationName','type', 'lat', 'long')
mst2=merge(master_site, stn2, all.x=T)
mst2=subset(mst2,
(!is.na(type) & MonitoringLocationTypeName!=type) |
(!is.na(LatitudeMeasure) & round(LatitudeMeasure, 5)!=round(lat, 5)) |
(!is.na(LongitudeMeasure) & round(LongitudeMeasure, 5)!=round(long, 5))
)
master_site$ValidationType[master_site$UID %in% mst2$UID]="AUTO"
rm(mst2, stn2)
table(master_site$ValidationType)
#Remove ASSESS_ID, & AU_NAME cols from master_site before re-assigning (this updates master list in case polygons change)
master_site=master_site[,!names(master_site)%in%c("ASSESS_ID","AU_NAME","AU_Type","BEN_CLASS","R317Descrp","ss_R317Descrp","Water_Type", "Mgmt_Unit")]
class(master_site$IR_FLAG)
dim(master_site)
#Generate spatial sites object
sites=master_site
sites$lat=ifelse(is.na(sites$IR_Lat), sites$LatitudeMeasure, sites$IR_Lat)
sites$long=ifelse(is.na(sites$IR_Long), sites$LongitudeMeasure, sites$IR_Long)
sites=sites[,c('UID','lat','long')]
coordinates(sites)=c("long","lat")
proj4string(sites)=CRS("+init=epsg:4326")
sites=st_as_sf(sites, remove = FALSE)
#Intersect sites w/ Utah poly, AU poly, BU poly, and SS poly
master_site <- intpoly(ut_poly,master_site, sites)
master_site <- intpoly(au_poly,master_site, sites)
master_site <- intpoly(bu_poly,master_site, sites)
master_site <- intpoly(ss_poly,master_site, sites)
master_site <- intpoly(gsl_poly,master_site, sites)
master_site <- intpoly(wmu_poly,master_site, sites)
rm(sites)
#Send to AUTO review by is.na(STATE_NAME)
table(master_site$ValidationType)
master_site$ValidationType[is.na(master_site$jurisdict)]="AUTO"
table(master_site$ValidationType)
master_site=master_site[,!names(master_site) %in% "jurisdict"]
#Send to AUTO review by is.na(AU)
table(master_site$ValidationType)
master_site$ValidationType[is.na(master_site$ASSESS_ID)]="AUTO"
table(master_site$ValidationType)
#Send to AUTO review if GSL
table(master_site$ValidationType)
master_site$ValidationType[master_site$gsl=='y']="AUTO"
table(master_site$ValidationType)
master_site=master_site[,!names(master_site) %in% "gsl"]
#Send to AUTO where MonitoringLocationTypeName is a canal type & AU_Type!="Canal"
master_site$ValidationType[
(master_site$MonitoringLocationTypeName=="Stream: Canal" & master_site$AU_Type != "Canal")|
(master_site$MonitoringLocationTypeName=="Stream: Ditch" & master_site$AU_Type != "Canal")|
(master_site$MonitoringLocationTypeName=="Canal Transport" & master_site$AU_Type != "Canal")|
(master_site$MonitoringLocationTypeName=="Canal Drainage" & master_site$AU_Type != "Canal")|
(master_site$MonitoringLocationTypeName=="Canal Irrigation" & master_site$AU_Type != "Canal")
]="AUTO"
table(master_site$ValidationType)
#Send to AUTO where MonitoringLocationTypeName is a stream or spring type & AU_Type!="River/Stream" or "Canal"
master_site$ValidationType[
(master_site$MonitoringLocationTypeName=="Stream" & master_site$AU_Type != "River/Stream")&
(master_site$MonitoringLocationTypeName=="Stream" & master_site$AU_Type != "Canal") |
(master_site$MonitoringLocationTypeName=="River/Stream" & master_site$AU_Type != "River/Stream")&
(master_site$MonitoringLocationTypeName=="River/Stream" & master_site$AU_Type != "Canal")|
(master_site$MonitoringLocationTypeName=="River/Stream Intermittent" & master_site$AU_Type != "River/Stream")&
(master_site$MonitoringLocationTypeName=="River/Stream Intermittent" & master_site$AU_Type != "Canal")|
(master_site$MonitoringLocationTypeName=="River/Stream Perennial" & master_site$AU_Type != "River/Stream")&
(master_site$MonitoringLocationTypeName=="River/Stream Perennial" & master_site$AU_Type != "Canal")|
(master_site$MonitoringLocationTypeName=="Spring" & master_site$AU_Type != "River/Stream")&
(master_site$MonitoringLocationTypeName=="Spring" & master_site$AU_Type != "Canal")
]="AUTO"
table(master_site$ValidationType)
#Send to AUTO where MonitoringLocationTypeName is a lake type & AU_Type!="Reservoir/Lake"
master_site$ValidationType[
(master_site$MonitoringLocationTypeName=="Lake, Reservoir, Impoundment" & master_site$AU_Type != "Reservoir/Lake")|
(master_site$MonitoringLocationTypeName=="Lake" & master_site$AU_Type != "Reservoir/Lake")|
(master_site$MonitoringLocationTypeName=="Reservoir" & master_site$AU_Type != "Reservoir/Lake")
]="AUTO"
table(master_site$ValidationType)
mast_autval <- length(master_site$ValidationType[master_site$ValidationType=="AUTO"])
mast_man <- length(master_site$ValidationType[master_site$ValidationType=="MANUAL"])
new_autval <- length(stn_new[,1])
if(mast_autval+mast_man!=orig_master[1]){
stop("Dimensions of master_site have changed following master site reviews. Function/file check needed.")
}
#Send master sites that have only undergone AUTO validation or were flagged for AUTO review above to stn_new (i.e. re-auto review those sites to account for any changes in automated review process)
stn_new=plyr::rbind.fill(stn_new,master_site[master_site$ValidationType=="AUTO",c(names(stn_new), 'IR_Lat', 'IR_Long', 'Note')])
dim(stn_new)
table(stn_new$HorizontalCoordinateReferenceSystemDatumName)
#Remove stn_new sites from master_site
dim(master_site)
master_site=master_site[!master_site$UID%in%stn_new$UID,]
dim(master_site)
dim(stn_new)
print(paste(mast_autval,"master site(s) and", new_autval,"new site(s) sent to AUTO review.",mast_man,"master site(s) have undergone previous manual review and have not been flagged for further review."))
readline(prompt="Press [enter] to continue.")
}else{
mast_autval <- length(master_site$ValidationType[master_site$ValidationType=="AUTO"])
mast_man <- length(master_site$ValidationType[master_site$ValidationType=="MANUAL"])
new_autval <- dim(stn_new)[1]
}
######################################
######################################
print("Performing attribute based site checks...")
#Stop execution if there are no new sites
if(dim(stn_new)[1]==0){stop("No new sites identified. This is suspicious. Double check inputs. Coding modification may be required.",call.=FALSE)}
#Correct positive longitudes (if correct_longitude==TRUE) (JV note, moved to apply to all stations that will undergo auto review - stn_new)
if(correct_longitude==TRUE){
stn_new$LongitudeMeasure[stn_new$LongitudeMeasure>0]<- -stn_new$LongitudeMeasure[stn_new$LongitudeMeasure>0]
}
# Create IR specific columns, all values filled w/ "REVIEW"
stn_new[,c("IR_MLID","IR_MLNAME","IR_FLAG","IR_COMMENT")] = "REVIEW"
##Auto review new sites & master sites re-flagged to AUTO...
rej_reasons_att=data.frame(matrix(nrow=0,ncol=2))
# If [MonitoringLocationDescriptionText] contains "Duplicate","Replicate","Dummy","replaced","Blank","QA", or "QC", reject as QAQC
reason_n = ifelse(grepl("Duplicate",stn_new$MonitoringLocationDescriptionText) | grepl("Replicate",stn_new$MonitoringLocationDescriptionText) | grepl("Dummy",stn_new$MonitoringLocationDescriptionText) |
grepl("replaced",stn_new$MonitoringLocationDescriptionText) | grepl("Blank",stn_new$MonitoringLocationDescriptionText) | grepl("QA",stn_new$MonitoringLocationDescriptionText) |
grepl("QC",stn_new$MonitoringLocationDescriptionText),"Attributes indicate dup, rep, blank, dummy, or QAQC site",NA)
if(length(reason_n)>0){rej_reasons_att=rbind(rej_reasons_att,na.omit(cbind(stn_new$MonitoringLocationIdentifier,reason_n)))}
# If [OrganizationIdentifier] is test or demo, reject site.
reason_n=ifelse(stn_new$OrganizationIdentifier%in%c("OST_SHPD_TEST","DEMOTEST"),"Organization identifier indicates test/demo",NA)
if(length(reason_n)>0){rej_reasons_att=rbind(rej_reasons_att,na.omit(cbind(stn_new$MonitoringLocationIdentifier,reason_n)))}
# # Reject datums that != NAD27, NAD83, or WGS84 or is UNKWN
# reason_n=ifelse(!stn_new$HorizontalCoordinateReferenceSystemDatumName %in% c('NAD27','NAD83','WGS84'),"Horizontal datum not NAD27, NAD83, or WGS84 and/or is unknown",NA)
# if(length(reason_n)>0){rej_reasons_att=rbind(rej_reasons_att,na.omit(cbind(stn_new$MonitoringLocationIdentifier,reason_n)))}
#Reject sites with ConstructionDateText populated
reason_n=ifelse(!is.na(stn_new$ConstructionDateText),"Construction date text populated",NA)
if(length(reason_n)>0){rej_reasons_att=rbind(rej_reasons_att,na.omit(cbind(stn_new$MonitoringLocationIdentifier,reason_n)))}
#Reject sites with WellDepthMeasure.MeasureValue populated
reason_n=ifelse(!is.na(stn_new$WellDepthMeasure.MeasureValue),"Well depth measure populated",NA)
if(length(reason_n)>0){rej_reasons_att=rbind(rej_reasons_att,na.omit(cbind(stn_new$MonitoringLocationIdentifier,reason_n)))}
#Reject sites with WellDepthMeasure.MeasureUnitCode populated
reason_n=ifelse(!is.na(stn_new$WellDepthMeasure.MeasureUnitCode),"Well depth measure unit code populated",NA)
if(length(reason_n)>0){rej_reasons_att=rbind(rej_reasons_att,na.omit(cbind(stn_new$MonitoringLocationIdentifier,reason_n)))}
#Reject sites with WellHoleDepthMeasure.MeasureValue populated
reason_n=ifelse(!is.na(stn_new$WellHoleDepthMeasure.MeasureValue),"Well hole depth measure populated",NA)
if(length(reason_n)>0){rej_reasons_att=rbind(rej_reasons_att,na.omit(cbind(stn_new$MonitoringLocationIdentifier,reason_n)))}
#Reject sites with WellHoleDepthMeasure.MeasureUnitCode populated
reason_n=ifelse(!is.na(stn_new$WellHoleDepthMeasure.MeasureUnitCode),"Well hole depth measure unit code populated",NA)
if(length(reason_n)>0){rej_reasons_att=rbind(rej_reasons_att,na.omit(cbind(stn_new$MonitoringLocationIdentifier,reason_n)))}
#Reject sites with AquiferName populated
reason_n=ifelse(!is.na(stn_new$AquiferName),"Aquifer name populated: associated with unassessed wells",NA)
if(length(reason_n)>0){rej_reasons_att=rbind(rej_reasons_att,na.omit(cbind(stn_new$MonitoringLocationIdentifier,reason_n)))}
#Reject sites with FormationTypeText populated
reason_n=ifelse(!is.na(stn_new$FormationTypeText),"Formation type populated: associated with unassessed wells",NA)
if(length(reason_n)>0){rej_reasons_att=rbind(rej_reasons_att,na.omit(cbind(stn_new$MonitoringLocationIdentifier,reason_n)))}
#Reject sites with AquiferTypeName populated
reason_n=ifelse(!is.na(stn_new$AquiferTypeName),"Aquifer type name populated: associated with unassessed wells",NA)
if(length(reason_n)>0){rej_reasons_att=rbind(rej_reasons_att,na.omit(cbind(stn_new$MonitoringLocationIdentifier,reason_n)))}
#Reject sites where MonitoringLocationTypeName !%in% site_type_keep argument
reason_n=ifelse(!stn_new$MonitoringLocationTypeName%in%site_type_keep,"Non-assessed site type",NA)
if(length(reason_n)>0){rej_reasons_att=rbind(rej_reasons_att,na.omit(cbind(stn_new$MonitoringLocationIdentifier,reason_n)))}
names(rej_reasons_att)=c("MonitoringLocationIdentifier","Reason")
rej_reasons_att$ReasonType="Attribute based"
rej_reasons_att$FLAG="REJECT"
head(rej_reasons_att)
print("Attribute based site rejection reason count:")
print(table(rej_reasons_att$Reason))
readline(prompt="Press [enter] to continue.")
#Set stn_new IR_FLAG and reason for attribute based site rejections
stn_new$IR_FLAG[stn_new$MonitoringLocationIdentifier %in% rej_reasons_att$MonitoringLocationIdentifier]="REJECT"
table(stn_new$IR_FLAG)
####################
###Spatial checks###
####################
print("Performing spatial site checks...")
#Generate spatial sites object
sites=stn_new
sites$lat=ifelse(is.na(sites$IR_Lat), sites$LatitudeMeasure, sites$IR_Lat)
sites$long=ifelse(is.na(sites$IR_Long), sites$LongitudeMeasure, sites$IR_Long)
sites=sites[,c('OrganizationIdentifier','OrganizationFormalName','MonitoringLocationIdentifier','MonitoringLocationName','MonitoringLocationTypeName','ProviderName',"LatitudeMeasure","LongitudeMeasure",'lat','long')]
coordinates(sites)=c("long","lat")
proj4string(sites)=CRS("+init=epsg:4326")
sites=st_as_sf(sites, remove = FALSE)
#Intersect sites w/ Utah poly, AU poly, BU poly, SS poly, and GSL poly
stn_new <- intpoly(ut_poly,stn_new, sites)
stn_new <- intpoly(au_poly,stn_new, sites)
stn_new <- intpoly(bu_poly,stn_new, sites)
stn_new <- intpoly(ss_poly,stn_new, sites)
stn_new <- intpoly(gsl_poly,stn_new, sites)
stn_new <- intpoly(wmu_poly,stn_new, sites)
rm(sites)
###Spatial rejections
rej_reasons_spat=data.frame(matrix(nrow=0,ncol=2))
#Reject by is.na(AU)
reason_n=ifelse(is.na(stn_new$ASSESS_ID),"Undefined AU",NA)
if(length(reason_n)>0){rej_reasons_spat=rbind(rej_reasons_spat,na.omit(cbind(stn_new$MonitoringLocationIdentifier,reason_n)))}
#Reject by is.na(STATE_NAME)
reason_n=ifelse(is.na(stn_new$jurisdict),"Non-jurisdictional: out of state or within tribal boundaries",NA)
if(length(reason_n)>0){rej_reasons_spat=rbind(rej_reasons_spat,na.omit(cbind(stn_new$MonitoringLocationIdentifier,reason_n)))}
#Reject by GSL poly
reason_n=ifelse(stn_new$gsl=='y',"GSL assessed through separate program",NA)
if(length(reason_n)>0){rej_reasons_spat=rbind(rej_reasons_spat,na.omit(cbind(stn_new$MonitoringLocationIdentifier,reason_n)))}
#Remove unneeded spatial join columns
stn_new=stn_new[,!names(stn_new)%in%c("jurisdict","gsl")]
#Reject where MonitoringLocationTypeName is a canal type & AU_Type!="Canal"
reason_n=ifelse(
(stn_new$MonitoringLocationTypeName=="Stream: Canal" & stn_new$AU_Type != "Canal")|
(stn_new$MonitoringLocationTypeName=="Stream: Ditch" & stn_new$AU_Type != "Canal")|
(stn_new$MonitoringLocationTypeName=="Canal Transport" & stn_new$AU_Type != "Canal")|
(stn_new$MonitoringLocationTypeName=="Canal Drainage" & stn_new$AU_Type != "Canal")|
(stn_new$MonitoringLocationTypeName=="Canal Irrigation" & stn_new$AU_Type != "Canal")
,"Non-assessed canal or ditch",NA)
if(length(reason_n)>0){rej_reasons_spat=rbind(rej_reasons_spat,na.omit(cbind(stn_new$MonitoringLocationIdentifier,reason_n)))}
#Reject where MonitoringLocationTypeName is a stream or spring type & AU_Type!="River/Stream"
reason_n=ifelse(
(stn_new$MonitoringLocationTypeName=="Stream" & stn_new$AU_Type != "River/Stream")|
(stn_new$MonitoringLocationTypeName=="River/Stream" & stn_new$AU_Type != "River/Stream")|
(stn_new$MonitoringLocationTypeName=="River/Stream Intermittent" & stn_new$AU_Type != "River/Stream")|
(stn_new$MonitoringLocationTypeName=="River/Stream Perennial" & stn_new$AU_Type != "River/Stream")|
(stn_new$MonitoringLocationTypeName=="Spring" & stn_new$AU_Type != "River/Stream")
,"Stream or spring site type in non-River/Stream AU",NA)
rej_reasons_spat=rbind(rej_reasons_spat,na.omit(cbind(stn_new$MonitoringLocationIdentifier,reason_n)))
names(rej_reasons_spat)=c("MonitoringLocationIdentifier","Reason")
rej_reasons_spat$ReasonType="Spatial"
rej_reasons_spat$FLAG="REJECT"
head(rej_reasons_spat)
print("Spatial site rejection reason count:")
print(table(rej_reasons_spat$Reason))
readline(prompt="Press [enter] to continue.")
#Set stn_new IR_FLAG and reason for spatial site rejections
stn_new$IR_FLAG[stn_new$MonitoringLocationIdentifier %in% rej_reasons_spat$MonitoringLocationIdentifier]="REJECT"
table(stn_new$IR_FLAG)
##########
##Calculate full distance matrix, lat/long, mlid, and site 100m counts.
##For new sites and master flagged for re-AUTO (stn_new), apply lat/long, 100 m site count logic to determine review status
##For previously manually reviewed master sites, check if MLID_Count, Lat_Count, Long_Count, or sites100m_count have increased, flag these for review
print("Performing 100m/duplicate MLID/duplicate lat-long checks...")
#rbind new auto-validated sites back to master file and calculate distances on all non-rejected sites in master file (this way if polygons change, it is automatically accounted for in master file)
spatial_check_data=rbind(master_site[,names(stn_new)],stn_new)
dim(spatial_check_data)
#Splitting off rejected sites prior to other spatial analyses (including all previously accepted/merged sites allows calc of distances including previously reviewed sites.)
accept_review=spatial_check_data[spatial_check_data$IR_FLAG!="REJECT",]
rejected=spatial_check_data[spatial_check_data$IR_FLAG=="REJECT",]
class(accept_review$IR_FLAG)
table(accept_review$IR_FLAG)
sum(table(accept_review$IR_FLAG))
#Count MLIDs, add as column to accept_review, MLID_Count>1 means duplicated MLID
MLID_Count=as.vector(table(accept_review$MonitoringLocationIdentifier)[accept_review$MonitoringLocationIdentifier])
accept_review$MLID_Count=MLID_Count
#Count Latitudes, add as column to accept_review, Lat_Count>1 means duplicated lat
Lat_Count=as.vector(table(accept_review$LatitudeMeasure))[as.factor(accept_review$LatitudeMeasure)]
accept_review$Lat_Count=Lat_Count
#Count Longitudes, add as column to accept_review, Long_Count>1 means duplicated long
Long_Count=as.vector(table(accept_review$LongitudeMeasure))[as.factor(accept_review$LongitudeMeasure)]
accept_review$Long_Count=Long_Count
#Identify non-rejected sites w/in 100 m (0.1 km)
accept_review$lat=ifelse(is.na(accept_review$IR_Lat), accept_review$LatitudeMeasure, accept_review$IR_Lat)
accept_review$long=ifelse(is.na(accept_review$IR_Long), accept_review$LongitudeMeasure, accept_review$IR_Long)
distmat=spDists(cbind(accept_review$long,accept_review$lat),longlat=TRUE)
row.names(distmat)=accept_review$MonitoringLocationIdentifier
colnames(distmat)=accept_review$MonitoringLocationIdentifier
accept_review=accept_review[,!names(accept_review) %in% c('lat','long')]
sum(table(accept_review$IR_FLAG))
countSites100m=function(data){
count=sum(data>0&data<=0.1)
#names(data[data>0&data<=0.1])
return(count)
}
sites100m_count=apply(distmat,1,FUN='countSites100m')
accept_review$sites100m_count=sites100m_count
rejected$sites100m_count=NA
rejected$MLID_Count=NA
rejected$Lat_Count=NA
rejected$Long_Count=NA
#Re-appending rejected data
spatial_check_data=rbind(accept_review,rejected)
table(spatial_check_data$IR_FLAG)
sum(table(spatial_check_data$IR_FLAG))
rm(accept_review)
rm(rejected)
#Join spatial checks to stn_new (drop spatial count columns then merge)
dim(stn_new)
stn_new=stn_new[,!names(stn_new)%in%c("ASSESS_ID","AU_NAME","AU_Type","BEN_CLASS","R317Descrp","ss_R317Descrp","OWNERSHIP","ValidationType","MLID_Count","Lat_Count","Long_Count","sites100m_count")]
stn_new=merge(stn_new,spatial_check_data,all.x=T)
dim(stn_new)
stn_new$ValidationType="AUTO"
#Spatial review flags & reasons (Apply to stn_new only)
#Populate stn_new$MLID & lat/long for new sites w/ no duplicate MLIDS, lats, longs, and 0 other sites w/in 100m (IR_FLAG=="REVIEW" for all non-rejected new sites at this point)
stn_new$IR_MLID = ifelse(stn_new$IR_FLAG=="REVIEW"&stn_new$MLID_Count==1&stn_new$Lat_Count==1&stn_new$Long_Count==1&stn_new$sites100m_count==0,as.vector(stn_new$MonitoringLocationIdentifier),"REVIEW")
stn_new$IR_MLNAME = ifelse(stn_new$IR_FLAG=="REVIEW"&stn_new$MLID_Count==1&stn_new$Lat_Count==1&stn_new$Long_Count==1&stn_new$sites100m_count==0,as.vector(stn_new$MonitoringLocationName),NA)
stn_new$IR_Lat = ifelse(stn_new$IR_FLAG=="REVIEW"&stn_new$MLID_Count==1&stn_new$Lat_Count==1&stn_new$Long_Count==1&stn_new$sites100m_count==0,stn_new$LatitudeMeasure,NA)
stn_new$IR_Long = ifelse(stn_new$IR_FLAG=="REVIEW"&stn_new$MLID_Count==1&stn_new$Lat_Count==1&stn_new$Long_Count==1&stn_new$sites100m_count==0,stn_new$LongitudeMeasure,NA)
#Populate rejected MLID, lat, and long w/ REJECT
stn_new$IR_MLID = ifelse(stn_new$IR_FLAG=="REJECT","REJECT",as.vector(stn_new$IR_MLID))
stn_new$IR_MLNAME = ifelse(stn_new$IR_FLAG=="REJECT","REJECT",as.vector(stn_new$IR_MLNAME))
stn_new$IR_Lat = ifelse(stn_new$IR_FLAG=="REJECT",NA,stn_new$IR_Lat)
stn_new$IR_Long = ifelse(stn_new$IR_FLAG=="REJECT",NA,stn_new$IR_Long)
#Review reasons
review_reasons=data.frame(matrix(nrow=0,ncol=2))
#MLID, lat/long, and site 100 m counts
reason_n=ifelse(stn_new$MLID_Count>1,"Duplicated MLID",NA)
if(length(reason_n)>0){review_reasons=rbind(review_reasons,na.omit(cbind(stn_new$MonitoringLocationIdentifier,reason_n)))}
reason_n=ifelse(stn_new$Lat_Count>1 | stn_new$Long_Count>1,"Duplicated lat or long",NA)
if(length(reason_n)>0){review_reasons=rbind(review_reasons,na.omit(cbind(stn_new$MonitoringLocationIdentifier,reason_n)))}
reason_n=ifelse(stn_new$sites100m_count>=1,"One or more sites w/in 100 m",NA)
if(length(reason_n)>0){review_reasons=rbind(review_reasons,na.omit(cbind(stn_new$MonitoringLocationIdentifier,reason_n)))}
#MonitoringLocationTypeName is a stream or spring type & AU_Type=="Canal"
reason_n=ifelse(
(stn_new$MonitoringLocationTypeName=="Stream" & stn_new$AU_Type == "Canal")|
(stn_new$MonitoringLocationTypeName=="River/Stream" & stn_new$AU_Type == "Canal")|
(stn_new$MonitoringLocationTypeName=="River/Stream Intermittent" & stn_new$AU_Type == "Canal")|
(stn_new$MonitoringLocationTypeName=="River/Stream Perennial" & stn_new$AU_Type == "Canal")|
(stn_new$MonitoringLocationTypeName=="Spring" & stn_new$AU_Type == "Canal")
,"Stream or spring site type in canal AU type",NA)
if(length(reason_n)>0){review_reasons=rbind(review_reasons,na.omit(cbind(stn_new$MonitoringLocationIdentifier,reason_n)))}
table(review_reasons$reason_n)
#MonitoringLocationTypeName is a lake type & AU_Type!="Reservoir/Lake"
reason_n=ifelse(
(stn_new$MonitoringLocationTypeName=="Lake, Reservoir, Impoundment" & stn_new$AU_Type != "Reservoir/Lake")|
(stn_new$MonitoringLocationTypeName=="Lake" & stn_new$AU_Type != "Reservoir/Lake")|
(stn_new$MonitoringLocationTypeName=="Reservoir" & stn_new$AU_Type != "Reservoir/Lake")
,"MLID type is lake/reservoir, but AU_Type is not - potential new AU needed",NA)
if(length(reason_n)>0){review_reasons=rbind(review_reasons,na.omit(cbind(stn_new$MonitoringLocationIdentifier,reason_n)))}
table(review_reasons$reason_n)
#Join spatial checks to master_site
spatial_check_data=spatial_check_data[,names(spatial_check_data) %in% c("UID","MLID_Count","Lat_Count","Long_Count","sites100m_count")]
names(spatial_check_data)=c("UID","MLID_Count2","Lat_Count2","Long_Count2","sites100m_count2")
master_site=merge(master_site,spatial_check_data,all.x=T)
#Check if MLID_Count, Lat_Count, Long_Count, or sites100m_count have increased, flag these for review
reason_n=ifelse(master_site$MLID_Count2>master_site$MLID_Count,"Master site MLID count has increased", NA)
if(length(reason_n)>0){review_reasons=rbind(review_reasons,na.omit(cbind(master_site$MonitoringLocationIdentifier,reason_n)))}
table(review_reasons$reason_n)
reason_n=ifelse(master_site$Lat_Count2>master_site$Lat_Count,"Master site lat count has increased", NA)
if(length(reason_n)>0){review_reasons=rbind(review_reasons,na.omit(cbind(master_site$MonitoringLocationIdentifier,reason_n)))}
table(review_reasons$reason_n)
reason_n=ifelse(master_site$Long_Count2>master_site$Long_Count,"Master site long count has increased", NA)
if(length(reason_n)>0){review_reasons=rbind(review_reasons,na.omit(cbind(master_site$MonitoringLocationIdentifier,reason_n)))}
table(review_reasons$reason_n)
reason_n=ifelse(master_site$sites100m_count2>master_site$sites100m_count,"Master site sites w/in 100 m count has increased", NA)
if(length(reason_n)>0){review_reasons=rbind(review_reasons,na.omit(cbind(master_site$MonitoringLocationIdentifier,reason_n)))}
table(review_reasons$reason_n)
#Rename review reason columns
names(review_reasons)=c("MonitoringLocationIdentifier","Reason")
review_reasons$ReasonType="Spatial"
review_reasons$FLAG="REVIEW"
print("Spatial site review reason count:")
print(table(review_reasons$Reason))
#rbind reasons together
reject_reasons=rbind(rej_reasons_att,rej_reasons_spat)
reasons_all=rbind(reject_reasons, review_reasons)
#Update master_site MLID_Count, Lat_Count, Long_Count, or sites100m
master_site$MLID_Count=master_site$MLID_Count2
master_site$Lat_Count=master_site$Lat_Count2
master_site$Long_Count=master_site$Long_Count2
master_site$sites100m_count=master_site$sites100m_count2
#Drop master_site MLID_Count2, Lat_Count2, Long_Count2, and sites100m2
master_site=master_site[,!names(master_site) %in% c("MLID_Count2","Lat_Count2","Long_Count2","sites100m_count2")]
#Populate ACCEPT for new sites w/ no duplicate MLIDS, lats, longs, and 0 other sites w/in 100m (IR_FLAG=="REVIEW" for all non-rejected new sites at this point)
stn_new=within(stn_new,{
IR_FLAG[!MonitoringLocationIdentifier %in% reasons_all$MonitoringLocationIdentifier &IR_FLAG!="REJECT" & MLID_Count==1 & Lat_Count==1 & Long_Count==1 & sites100m_count==0]<-"ACCEPT"
})
print(paste(" Site validation complete and autovalidation resulted in", table(stn_new$IR_FLAG)[1],"accepted sites,",table(stn_new$IR_FLAG)[2],"rejected sites, and",table(stn_new$IR_FLAG)[3],"sites in need of review."))
#rbind master_site & stn_new to make full list of all sites (master_new)
master_site$IR_COMMENT=as.factor(master_site$IR_COMMENT)
stn_new$IR_COMMENT=as.factor(stn_new$IR_COMMENT)
master_new=plyr::rbind.fill(master_site, stn_new)
if(any(master_new$MonitoringLocationIdentifier[master_new$IR_FLAG=="ACCEPT" & master_new$ValidationType=="AUTO"] %in% reasons_all$MonitoringLocationIdentifier)){
stop("Accepted site represented in reject/review site log. Check function code rules for unintended exceptions.")
}
###Set IR_FLAG for REJECT & REVIEW
master_new=within(master_new,{
IR_FLAG[MonitoringLocationIdentifier %in% review_reasons$MonitoringLocationIdentifier]="REVIEW"
IR_FLAG[MonitoringLocationIdentifier %in% reject_reasons$MonitoringLocationIdentifier]="REJECT"
})
#Set IR_COMMENT
master_new$IR_COMMENT=as.character(master_new$IR_COMMENT)
master_new=within(master_new,{
IR_COMMENT[IR_FLAG=="REJECT" & ValidationType=="AUTO"]="Automatically flagged for rejection"
IR_COMMENT[IR_FLAG=="REVIEW" & ValidationType=="AUTO"]="Automatically flagged for review"
IR_COMMENT[IR_FLAG=="ACCEPT" & ValidationType=="AUTO"]="Automatically accepted"
})
#Set ValidationType to AUTO for any MLID in review reasons (this is for previously manually reviewed sites that now have a review reason e.g. MLID count has increased)
master_new=within(master_new,{
ValidationType[MonitoringLocationIdentifier %in% reasons_all$MonitoringLocationIdentifier]="AUTO"
})
####Sort by UID and re-order columns before writing
master_new=master_new[order(master_new$UID),]
master_new=master_new[,names(master_site)]
levels(master_new$AU_Type)=c(levels(master_new$AU_Type),"Undefined")
master_new$AU_Type[is.na(master_new$AU_Type)]="Undefined"
newsitesadded <- dim(master_new)[1]-orig_master[1]
if(newsitesadded!=new_autval){
print("WARNING: discrepancy in number of sites added to master site list and number of new sites detected. Code/data review needed.")
}
if(dim(master_new)[2]!=orig_master[2]){
print("WARNING: discrepancy in number of columns in old and updated master site list. Code/data review needed.")
}
print(paste("Updated master site list has",dim(master_new)[1],"sites, with", newsitesadded,"new sites added to original",orig_master[1],"sites in master list."))
# Export master site file
writexl::write_xlsx(list(sites=master_new, reasons=reasons_all),
path = choose.files(caption="Save As...", filters = c("Excel Workbook (.xlsx)","*.xlsx"), multi=F), format_headers=F, col_names=T)
print("Master site file updated and review/rejection reasons file created.")
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.