#' @title Stressor List
#'
#' @description Get stressor list.
#'
#' @details Box plots of each stressor, grouped by category.
#'
#' Required objects: all specified as inputs.
#'
#' chem.info need to include DirIncStress. Valid values are 'inc' or 'dec'.
#'
#' @param TargetSiteID Site ID
#' @param site.Clusters Clusters
#' @param chem.info chem information
#' @param cluster.chem chem data cluster.
#' @param cluster.samps sample cluster.
#' @param ref.sites reference sites
#' @param site.chem Chem sites
#' @param probsHigh probabilities, high
#' @param probsLow probabilities, low
#' @param biocomm Biological community; algae or BMI. Default = "BMI".
#' @param dir_results Directory to save plots. Default = working directory and Results.
#' @param dir_sub Subdirectory for outputs from this function. Default = "SiteInfo"
#'
#' @return A jpeg in the "Results" subdirectory of the working directory with box plots.
#' Also returns a list of stressors; stressors and site.stressor.pctrank.
#'
# @importFrom pryr "%<a-%"
#'
#' @examples
#' TargetSiteID <- "SRCKN001.61"
#' dir_results <- file.path(getwd(), "Results")
#'
#' # Data getSiteInfo
#' # data, example included with package
#' data.Stations.Info <- data_Sites # need for getSiteInfo and getChemDataSubsets
#' data.SampSummary <- data_SampSummary
#' data.303d.ComID <- data_303d
#' data.bmi.metrics <- data_BMIMetrics
#' data.algae.metrics <- data_AlgMetrics
#' data.mod <- data_ReachMod
#'
#' # Cluster based on elevation category # need for getSiteInfo and getChemDataSubsets
#' elev_cat <- toupper(data.Stations.Info[data.Stations.Info[,"StationID_Master"]==TargetSiteID
#' , "ElevCategory"])
#' if(elev_cat=="HI"){
#' data.cluster <- data_Cluster_Hi
#' } else if(elev_cat=="LO") {
#' data.cluster <- data_Cluster_Lo
#' }
#'
#' # Map data
#' # San Diego
#' #flowline <- rgdal::readOGR(dsn = "data_gis/NHDv2_Flowline_Ecoreg85", layer = "NHDv2_eco85_Project")
#' #outline <- rgdal::readOGR(dsn = "data_gis/Eco85", layer = "Ecoregion85")
#' # AZ
#' map_flowline <- data_GIS_Flow_HI
#' map_flowline2 <- data_GIS_Flow_LO
#' if(elev_cat=="HI"){
#' map_flowline <- data_GIS_Flow_HI
#' } else if(elev_cat=="LO") {
#' map_flowline <- data_GIS_Flow_LO
#' }
#' map_outline <- data_GIS_AZ_Outline
#' # Project site data to USGS Albers Equal Area
#' usgs.aea <- "+proj=aea +lat_1=29.5 +lat_2=45.5 +lat_0=23
#' +lon_0=-96 +x_0=0 +y_0=0 +datum=NAD83
#' +units=m +no_defs +ellps=GRS80 +towgs84=0,0,0"
#' # projection for outline
#' my.aea <- "+proj=aea +lat_1=20 +lat_2=60 +lat_0=40 +lon_0=-96 +x_0=0 +y_0=0
#' +datum=NAD83 +units=m +no_defs +ellps=GRS80 +towgs84=0,0,0"
#' map_proj <- my.aea
#' #
#' dir_sub <- "SiteInfo"
#'
#' # Run getSiteInfo
#' list.SiteSummary <- getSiteInfo(TargetSiteID, dir_results, data.Stations.Info
#' , data.SampSummary, data.303d.ComID
#' , data.bmi.metrics, data.algae.metrics
#' , data.cluster, data.mod
#' , map_proj, map_outline, map_flowline
#' , dir_sub=dir_sub)
#'
#' # Data getChemDataSubsets
#' # data import, example
#' # data.chem.raw <- read.delim(paste(myDir.Data,"data.chem.raw.tab",sep=""),na.strings = c(""," "))
#' # data.chem.info <- read.delim(paste(myDir.Data,"data.chem.info.tab",sep=""))
#' # data, example included with package
#' site.COMID <- list.SiteSummary$COMID
#' site.Clusters <- list.SiteSummary$ClustIDs
#' data.chem.raw <- data_Chem
#' data.chem.info <- data_ChemInfo
#'
#' # Run getChemDataSubsets
#' list.data <- getChemDataSubsets(TargetSiteID, comid=site.COMID, cluster=site.Clusters
#' , data.cluster=data.cluster, data.Stations.Info=data.Stations.Info
#' , data.chem.raw=data.chem.raw, data.chem.info=data.chem.info)
#'
#' # datasets getStressorList
#' chem.info <- list.data$chem.info
#' cluster.chem <- list.data$cluster.chem
#' cluster.samps <- list.data$cluster.samps
#' ref.sites <- list.data$ref.sites
#' site.chem <- list.data$site.chem
#' dir_sub <- "CandidateCauses"
#'
#' # set cutoff for possible stressor identification
#' probsLow <- 0.10
#' probsHigh <- 0.90
#' biocomm <- "bmi"
#'
#' # Run getStressorList
#' list.stressors <- getStressorList(TargetSiteID, site.Clusters, chem.info, cluster.chem
#' , cluster.samps, ref.sites, site.chem
#' , probsHigh, probsLow, biocomm, dir_results
#' , dir_sub)
#
#' @export
getStressorList <- function(TargetSiteID, site.Clusters, chem.info, cluster.chem
, cluster.samps, ref.sites, site.chem
, probsHigh, probsLow, biocomm="bmi"
, dir_results=file.path(getwd(), "Results")
, dir_sub="CandidateCauses"
) {##FUNCTION.START
# DEBUGGING ####
boo.DEBUG <- FALSE
#
if(boo.DEBUG==TRUE){##IF.boo.DEBUG.START
g <- 1
# all other function inputs defined in example.
}##IF.boo.DEBUG.END
#
useLU <- FALSE
# QC, 20190905
# chem.info$DirIncStress to lower case
chem.info$DirIncStress <- tolower(chem.info$DirIncStress)
# check for and create (if necessary) "Results" subdirectory of working directory
# wd <- getwd()
# dir.sub <- "Results"
wd <- dirname(dir_results)
dir.sub <- basename(dir_results)
dir.sub2 <- TargetSiteID
dir.sub3 <- dir_sub
ifelse(!dir.exists(file.path(wd, dir.sub, dir.sub2))==TRUE
, dir.create(file.path(wd, dir.sub, dir.sub2))
, FALSE)
ifelse(!dir.exists(file.path(wd, dir.sub, dir.sub2, dir.sub3))==TRUE
, dir.create(file.path(wd, dir.sub, dir.sub2, dir.sub3))
, FALSE)
#
stations <- TargetSiteID
nolu.cluster <- "clust_noland"
lu.cluster <- "clust_land"
# First 2 columns are ChemSampID and StationID_Master
cluster.chem.data <- cluster.chem[3:ncol(cluster.chem)]
cluster.ref.chem <- subset(cluster.chem, cluster.chem$StationID_Master %in% ref.sites)
cluster.ref.chem.data <- cluster.ref.chem[3:ncol(cluster.ref.chem)]
chemnames <- names(cluster.chem[,3:ncol(cluster.chem.data)])
allcount <- apply(cluster.chem.data, 2, function(x) sum(!is.na(x)))
alltype <- unlist(lapply(1:ncol(cluster.chem.data), function(x) is.numeric(cluster.chem[,x])))
coolvar <- names(allcount)[allcount>2 & alltype]
groupnames <- unique(subset(chem.info, chem.info$ConvertTo %in% chemnames, select = "GroupName"))
numgps <- length(groupnames[,1])
# Plots ####
ppi <- 300
plot_H <- 4
plot_W <- 9
# Capture each plot in a list for the PDF
## https://stackoverflow.com/questions/13273611/how-to-append-a-plot-to-an-existing-pdf-file
## https://www.andrewheiss.com/blog/2016/12/08/save-base-graphics-as-pseudo-objects-in-r/
plots.g <- vector(numgps, mode="list")
# Generate 1 box plot for each group, ref sites in blue, target site in red
for (g in 1:numgps) {##FOR.g.START
gpchems <- subset(chem.info, GroupName == groupnames[g,], select = "ConvertTo")
gpcoolvar <- subset(coolvar, coolvar %in% gpchems$ConvertTo)
n <- length(gpcoolvar)
#
if(boo.DEBUG==TRUE){##IF~boo.DEBUG~START
print(paste0("Item (", g, "/", numgps, ")"))
utils::flush.console()
}##IF~boo.DEBUG~START
#
if(n>0) { ##FOR.n.START
# plot.pryr %<a-% {##pryr.START
# maintitle <- paste(groupnames[g,], "Standardized values, All sites in cluster", sep=", ")
# graphics::par(mfrow = c(1,1), mar = c(4,8,1,1))
# if (useLU == TRUE) {##IF.useLU.START
# labmain = paste(stations, ": Cluster", site.Clusters[1,lu.cluster])
# } else {
# labmain = paste(stations, ": Cluster", site.Clusters[1,nolu.cluster])
# }##IF.useLU.END
# labx = paste(maintitle, labmain, sep = "\n")
# graphics::plot(y= 1:n, x= stats::runif(n,0,1), axes = F, type="n", xlab = "", ylab ="",
# xlim = c(0,1), cex.lab = 0.8)
# graphics::title(xlab=labx, line = 1, cex.lab = 0.8)
# graphics::axis(2, at = 1:n, labels = gpcoolvar[1:n], las =1, cex.axis = 0.6)
# for(i in 1:n) {##FOR.i.START
# xvar <- cluster.chem[,gpcoolvar[i]]; dif <- diff(range(xvar, na.rm =T))
# newvar <- (xvar-min(xvar, na.rm=T))/dif
# graphics::boxplot(newvar, at = i,boxwex=0.5, horizontal =T, add =T,axes = F
# , outcex = 0.6, staplewex = 1, medlwd = 0.9, boxlwd = 0.8)
# good.ref.data <- cluster.ref.chem.data[,gpcoolvar[i]][!is.na(cluster.ref.chem[,gpcoolvar[i]])]
# if (length(good.ref.data) != 0) {##IF.length.START
# point2 <- (cluster.ref.chem.data[,gpcoolvar[i]]-min(xvar, na.rm=T))/dif
# graphics::points(point2, rep(i,length(point2)), col = "blue", pch = 15,cex=0.6, bg = 2)
# }##IF.length.END
# point1 <- (site.chem[,gpcoolvar[i]]-min(xvar, na.rm=T))/dif
# graphics::points(point1, rep(i,length(point1)), col = "red", pch = 19,cex=0.6, bg = 2)
# }##FOR.i.END
# graphics::box(bty="l")
# }##pryr.END
#
# # PDF, capture plot in list
# #lst.plots.g[[g]] <- grDevices::recordPlot()
# #plots.g[[g]] <- plot.pryr
# #assign(paste0("plot_",g),plot.pryr)
# plot.pryr
# plots.g[[g]] <- grDevices::recordPlot()
#
# # JPG, create
# grDevices::jpeg(filename = paste0("Results/",TargetSiteID,"/",TargetSiteID,
# ".boxes.", make.names(groupnames[g,]), ".jpg"), width = 4*ppi,
# height = 3*ppi, pointsize = 8, quality = 100, bg = "white",
# res = ppi)
# plot.pryr
# grDevices::dev.off()
#
# ggplot ####
## Plot, Data, Cluster
df_plot_wide <- as.data.frame(cluster.chem[,gpcoolvar])
colnames(df_plot_wide) <- gpcoolvar
# need as.data.frame and colnames for groups with only 1 parameter
df_plot_wide_min <- apply(df_plot_wide, 2, min, na.rm=TRUE)
df_plot_wide_range <- apply(df_plot_wide, 2, range, na.rm=TRUE)
df_plot_wide_diff <- apply(df_plot_wide_range, 2, diff)
#df_plot_wide_mod <- (df_plot_wide - df_plot_wide_min) / df_plot_wide_diff
df_plot_wide_valminusmin <- sweep(df_plot_wide, 2, df_plot_wide_min, FUN="-")
df_plot_wide_mod <- sweep(df_plot_wide_valminusmin, 2, df_plot_wide_diff, FUN="/")
# reshape from wide to long
df_plot_long <- reshape2::melt(df_plot_wide_mod, measure.vars=gpcoolvar, variable.name = "GrpNm")
# Remove NaN so get rid of error message?
df_plot_long <- df_plot_long[!is.na(df_plot_long$value), ]
## Plot, Data, Cluster_Ref
# QC for nrow
boo_plot_ref <- FALSE
if(nrow(cluster.ref.chem.data)>0){##IF~nrow(cluster.ref.chem.data)~START
df_plot_ref_wide <- as.data.frame(cluster.ref.chem.data[, gpcoolvar])
colnames(df_plot_ref_wide) <- gpcoolvar
df_plot_ref_wide_valminusmin <- sweep(df_plot_ref_wide, 2, df_plot_wide_min, FUN="-")
df_plot_ref_wide_mod <- sweep(df_plot_ref_wide_valminusmin, 2, df_plot_wide_diff, FUN="/")
df_plot_long_ref <- reshape2::melt(df_plot_ref_wide_mod, measure.vars=gpcoolvar, variable.name = "GrpNm")
df_plot_long_ref <- df_plot_long_ref[!is.na(df_plot_long_ref$value), ]
boo_plot_ref <- ifelse(nrow(df_plot_long_ref)>0, TRUE, FALSE)
}##IF~nrow(cluster.ref.chem.data)~END
## Plot, Data, Target Site
df_plot_targ_wide <- as.data.frame(site.chem[, gpcoolvar])
colnames(df_plot_targ_wide) <- gpcoolvar
df_plot_targ_wide_valminusmin <- sweep(df_plot_targ_wide, 2, df_plot_wide_min, FUN="-")
df_plot_targ_wide_mod <- sweep(df_plot_targ_wide_valminusmin, 2, df_plot_wide_diff, FUN="/")
df_plot_long_targ <- reshape2::melt(df_plot_targ_wide_mod, measure.vars=gpcoolvar, variable.name = "GrpNm")
df_plot_long_targ <- df_plot_long_targ[!is.na(df_plot_long_targ$value), ]
boo_plot_targ <- ifelse(nrow(site.chem)!=0, TRUE, FALSE)
## Plot, Variables, Strings
str_Group <- as.character(groupnames[g,1])
str_title <- TargetSiteID
str_subtitle <- paste0("Cluster ", site.Clusters[1,nolu.cluster], " (all sites)")
str_xlab <- "Standardized Values"
str_ylab <- str_Group
## Plot, Variables, Colors
col_sites_all <- "dark gray"
col_sites_all_ref <- "blue"
col_sites_cl <- "cyan3"
col_sites_cl_ref <- col_sites_all_ref
col_sites_targ <- "red"
col_line <- "black"
## Plot, Variables, Fill
fill_sites_all <- col_sites_all
fill_sites_all_ref <- fill_sites_all
fill_sites_cl <- col_sites_cl
fill_sites_cl_ref <- fill_sites_cl
fill_sites_targ <- col_sites_targ
## Plot, Variables, Points
pch_sites_all <- 19 # solid circle
pch_sites_all_ref <- 21 # circle outline
pch_sites_cl <- 19
pch_sites_cl_ref <- pch_sites_all_ref
pch_sites_targ <- 17 # triangle
## Plot, Variables, Sizes
cex_mod <- 3
cex_sites_all <- cex_mod*1
cex_sites_all_ref <- cex_sites_all
cex_sites_cl <- cex_mod*0.95
cex_sites_cl_ref <- cex_sites_cl
cex_sites_targ <- cex_mod*1.2
## Plot, Variables, Legend
leg_name <- "Sites"
leg_labels <- c("cluster ref", "target")
leg_shape <- c(pch_sites_cl_ref, pch_sites_targ)
leg_col <- c(col_sites_cl_ref, col_sites_targ)
leg_fill <- c(fill_sites_cl_ref, fill_sites_targ)
# ggplot, main
p_SL <- ggplot2::ggplot(data=df_plot_long) +
ggplot2::geom_boxplot(ggplot2::aes(x=GrpNm, y=value)) +
ggplot2::coord_flip() +
ggplot2::labs(title=str_title, subtitle=str_subtitle, y=str_xlab, x=str_ylab) +
ggplot2::theme(plot.title=ggplot2::element_text(hjust=0.5), plot.subtitle=ggplot2::element_text(hjust=0.5), axis.text.x = ggplot2::element_blank(), axis.ticks.x=ggplot2::element_blank())
#
# ggplot, points subsets
## Cluster, Ref
if(boo_plot_ref==TRUE){##IF~boo_plot_ref~START
p_SL <- p_SL + ggplot2::geom_jitter(data=df_plot_long_ref, width=0.1, ggplot2::aes(x=GrpNm, y=value, color="cl_ref", shape="cl_ref", fill="cl_ref"), size=1)
} else {
p_SL <- p_SL + ggplot2::geom_blank(ggplot2::aes(color="cl_ref", shape="cl_ref", fill="cl_ref"))
}##IF~boo_plot_ref~END
## Target Site
if(boo_plot_targ==TRUE){##IF~boo_plot_targ~START
p_SL <- p_SL + ggplot2::geom_jitter(data=df_plot_long_targ, width=0.1, ggplot2::aes(x=GrpNm, y=value, color="targ", shape="targ", fill="targ"), size=1)
} else {
p_SL <- p_SL + ggplot2::geom_blank(ggplot2::aes(color="targ", shape="targ", fill="targ"))
}##IF~boo_plot_targ~END
#
# ggplot, Legend
p_SL <- p_SL + ggplot2::scale_shape_manual(name=leg_name, labels=leg_labels, values=leg_shape) +
ggplot2::scale_color_manual(name=leg_name, labels=leg_labels, values=leg_col) +
ggplot2::scale_fill_manual(nam=leg_name, labels=leg_labels, values=leg_fill)
#
print(p_SL)
plots.g[[g]] <- grDevices::recordPlot()
#
fn_jpg <- file.path(wd, dir.sub, dir.sub2, dir.sub3, paste0(TargetSiteID, ".boxes.", make.names(groupnames[g,]), ".jpg"))
ggplot2::ggsave(fn_jpg, p_SL, width=plot_W, height=plot_H, units="in")
}##IF.n.END
}##FOR.g.END
# PDF ####
# Create PDF from list
fn_pdf <- file.path(wd, dir.sub, dir.sub2, dir.sub3, paste0(TargetSiteID,".boxes.ALL.pdf"))
grDevices::pdf(file=fn_pdf, width=plot_W, height=plot_H)
for (i in plots.g){##FOR.gp.START
#grDevices::replayPlot(g.plot)
if(is.null(i)==TRUE) {next}
grDevices::replayPlot(i)
}##FOR.gp.END
grDevices::dev.off()
rm(plots.g)
# Data File ####
chem.pctrank <- apply(cluster.chem[,3:ncol(cluster.chem)], 2, function(x) dplyr::percent_rank(x))
data.chem.pctrank <- as.data.frame(chem.pctrank)
data.chem.pctrank <- cbind(cluster.chem$StationID_Master,
cluster.chem$ChemSampleID,data.chem.pctrank)
colnames(data.chem.pctrank)[1] <- "StationID_Master"
colnames(data.chem.pctrank)[2] <- "ChemSampleID"
row.names(data.chem.pctrank) <- NULL
fn.pctrank <- file.path(dir_results, TargetSiteID, dir.sub3, paste0(TargetSiteID, ".chem.pctrank.", biocomm, ".txt"))
utils::write.table(data.chem.pctrank, fn.pctrank, sep="\t", col.names=TRUE, row.names = FALSE)
site.pctrank <- subset(data.chem.pctrank, StationID_Master==TargetSiteID)
stressor <- c("none")
#
if(boo.DEBUG==TRUE){##IF.boo.DEBUG.START
c <- 3
}##IF.boo.DEBUG.END
for (c in 3:ncol(site.pctrank)) {
print(c)
chemname <- colnames(site.pctrank)[c]
bad <- is.na(site.pctrank[,c])
check <- site.pctrank[,c]
good <- check[!bad]
maxSiteVal <- max(good, na.rm = TRUE)
minSiteVal <- min(good, na.rm = TRUE)
# DirIncStress ####
# (not all in chem.info)
if(chemname %in% chem.info[, "StdParamName"]){
ExpDirIncStress <- (chem.info[chem.info[, "StdParamName"] == chemname, "DirIncStress"])[1]
} else {
ExpDirIncStress <- "unk"
}
if (ExpDirIncStress == "dec") {
if (minSiteVal <= probsLow) {
stressor <- c(stressor, chemname)
}
}
if ((ExpDirIncStress == "inc") && (maxSiteVal >= probsHigh)) {
stressor <- c(stressor, chemname)
}
}##FOR~c~END
stressorlist <- stressor
# LogTransf ####
# 20190110, get log transformation code from chem.info
# define pipe
`%>%` <- dplyr::`%>%`
#x <- unique(chem.info[chem.info$StdParamName %in% stressorlist, c("StdParamName", "LogTransf")])
# need to use max (default of 1) in case of duplicates
chem.info_LogTransf <- chem.info %>%
dplyr::group_by(StdParamName) %>%
dplyr::summarise(max_LogTransf=max(LogTransf, na.rm=TRUE))
stressorlist4merge <- data.frame(StdParamName=stressorlist, Sort=1:length(stressorlist))
# merge
LogTransf_merge <- merge(stressorlist4merge, chem.info_LogTransf, all.x=TRUE)
# sort
LogTransf_merge <- LogTransf_merge[order(LogTransf_merge$Sort), ]
# NA to 0
LogTransf_merge[is.na(LogTransf_merge[,"max_LogTransf"]), "max_LogTransf"] <- 0
# create output ####
myStressors <- list(stressors = stressorlist, site.stressor.pctrank = site.pctrank
, stressors_LogTransf=LogTransf_merge$max_LogTransf)
#
return(myStressors)
} # FUN end
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.