realTimeloads.R
In realTimeloads: Analyte Flux and Load from Estimates of Concentration and Discharge

## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>",
  out.width = "100%"
)

## ----setup--------------------------------------------------------------------
### Load library and example data ----
library(realTimeloads)
InputData <- realTimeloads::ExampleData
?realTimeloads::ExampleData

## ----Process and regress data-------------------------------------------------
### Assign list items to variables used in functions ------------------------ 
Site <- InputData$Site
ADCP <- InputData$ADCP
Echo_Intensity_Beam_1 <- InputData$Echo_Intensity
Echo_Intensity_Beam_2 <- InputData$Echo_Intensity # example code assumes backscatter is equal across beams; however, backscatter between beams will differ in practice 
Sonde <- InputData$Sonde
Height <- InputData$Height
Discharge <- InputData$Discharge
Sediment_Samples <- InputData$Sediment_Samples

### Process acoustic backscatter data  --------------------------------------
ADCPOutput <- realTimeloads::acoustic_backscatter_processing(Site,ADCP,Height,
                                                             Sonde,Echo_Intensity_Beam_1,Echo_Intensity_Beam_2)

### Compute estimate of analyte timeseries using https://doi.org/10.1029/2007WR006088 --------

# threshold (minutes) for interpolation of Surrogate to Analyte timeseries
threshold <- 30

# Surrogate "X" (e.g, acoustic backscatter and/or turbidity)
tx <- ADCPOutput$time
x <-ADCPOutput$mean_sediment_corrected_backscatter_beam_1_and_beam_2_dB

# Analyte "y" (e.g., TSS, NOx, etc)
ty <- Sediment_Samples$time
y <- Sediment_Samples$SSCpt_mg_per_liter

# interpolate surrogate onto analyte timeseries
# Randomly sample timeseries n times to simulate sampling that would occur in the field 
n <- 50
ind_calibration <-sample(1:length(ty),n,replace=FALSE)
calibration <- realTimeloads::surrogate_to_analyte_interpolation(tx,x,
                                                            ty[ind_calibration],y[ind_calibration],30)

# Specify calibration variable names for later storage in regression data
names(calibration)[grepl('surrogate',names(calibration))] <- 'SNR_dB'
names(calibration)[grepl('analyte',names(calibration))] <- 'SSCpt_mg_per_liter'

# compute bootstrap regression parameters per https://doi.org/10.1029/2007WR006088
fun_eq <- "log10(SSCpt_mg_per_liter) ~ SNR_dB"

Regression <- realTimeloads::bootstrap_regression(calibration,fun_eq)

# predict concentration timeseries with uncertainty
Surrogate <- data.frame(ADCPOutput$time,
                        ADCPOutput$mean_sediment_corrected_backscatter_beam_1_and_beam_2_dB)
colnames(Surrogate) <- c('time','SNR_dB')

Estimated_Concentration <- realTimeloads::estimate_timeseries(Surrogate,Regression)

## ----Compute load-------------------------------------------------------------
### Interpolate discharge onto surrogate time series (e.g., Backscatter t.s) ----
# All data in ExampleData are on the same time-step, however in practice this may not be the case
Q <- realTimeloads::linear_interpolation_with_time_limit(Discharge$time,
                                                         Discharge$Discharge_m_cubed_per_s,tx,threshold = 1)$x_interpolated

# compute dt (second) for load integration
dt =  c()
dt[2:length(tx)] <- as.numeric(difftime(tx[2:length(tx)],tx[1:length(tx)-1],units = "secs"))
dt[1] = median(dt,na.rm=TRUE) # assume time step 1 using median dt

# compute load (L) (kt) at each time "t"
# Estimated_Concentration$estimated_timeseries is a matrix with simulated timeseries drawn from Monte Carlo simulations on the bootstrapped regression parameters. Rows are simulations, columns are time. Methods are from https://doi.org/10.1029/2007WR006088. 
iters <- nrow(Regression$regression_parameters) # number of Monte Carlo simulations
Qsi =  matrix(NA, nrow = iters, ncol = length(x))
for (i in 1:iters) {
  Qsi[i,] <- Estimated_Concentration$estimated_timeseries[i,]*Q*dt*1e-9 # assumes concentration is mg/l and discharge is cubic meters per sec, dt is in seconds
}

## ----Compute uncertainty------------------------------------------------------
### Compute uncertainty on concentration and load timeseries ---------------
cQs = rowSums(Qsi,na.rm=TRUE) # total load for each iteration
quants <- c(0.0527, 0.1587, 0.5, 0.8414, 0.9473) # +/- 1 and 2 sigma and median (i.e., reported) estimate

Total_load_kt <- data.frame(t(quantile(cQs,quants,na.rm=TRUE)))
colnames(Total_load_kt) = c('minus_two_sigma_confidence','minus_one_sigma_confidence','median_confidence','plus_one_sigma_confidence','plus_two_sigma_confidence')

Reported_real_time_load_estimate <- Total_load_kt$median_confidence

# explicitly name timeseries with uncertainty for plotting below 
Analyte_concentration_timeseries_mg_per_liter <- Estimated_Concentration$estimated_timeseries_quantiles

Analyte_flux_timeseries_kt <- data.frame(t(apply(Qsi, 2 , quantile , probs = quants , 
                                                 na.rm = TRUE )))

colnames(Analyte_flux_timeseries_kt) = c('minus_two_sigma_confidence','minus_one_sigma_confidence','median_confidence',
                                         'plus_one_sigma_confidence','plus_two_sigma_confidence')

## ----Plot timeseries,fig.dim = c(8,6),dpi=300---------------------------------
### Compare estimate of concentration to actual ----

# actual loads (kt)
SSCxs <- Sediment_Samples$SSCxs_mg_per_liter
SSCpt <- Sediment_Samples$SSCpt_mg_per_liter
Qspt <- SSCpt*Q*dt*1e-9 # load from SSC measured at a point
Qsxs <- SSCxs*Q*dt*1e-9 # load from depth-averaged, velocity-weighted SSC

indp <- seq(from = 1, to = nrow(ADCP), by = 1) # vector for plotting subset of timeseries if desired 

plot(tx[indp],Analyte_concentration_timeseries_mg_per_liter$median_confidence[indp],
     col='red',type = "l",lwd= 2,xlab = "time (AEST)",ylab="Analyte concentration (mg/l)",
     main = "Estimated versus actual concentration",ylim = c(0,1500))
lines(tx[indp],Analyte_concentration_timeseries_mg_per_liter$minus_two_sigma_confidence[indp],
      col='grey',lty = c(5))
lines(tx[indp],Analyte_concentration_timeseries_mg_per_liter$plus_two_sigma_confidence[indp],
      col='grey',lty = c(5))
points(ty[ind_calibration],y[ind_calibration],
       col = 'black',pch = c(21))
lines(tx,Sediment_Samples$SSCpt_mg_per_liter,
      col='black',lwd= 1.5)

legend("topright",legend = c("Estimated concentration","Estimation uncertainty","Actual concentration used in regression","Actual concentration timeseries"),
       lty = c(1, 5, 0,1),col = c('red', 'grey', 'black', 'black'),pch = c(-1,-1,21,-1))

###  Compare estimate of flux to actual ----
plot(tx[indp],Analyte_flux_timeseries_kt$median_confidence[indp]/dt[indp]*1e3,
     col='red',type = "l",lwd= 2,xlab = "time (AEST)",ylab="Analyte flux (ton per second)",
     main = "Estimated versus actual flux",ylim = c(0,20))
lines(tx[indp],Analyte_flux_timeseries_kt$minus_two_sigma_confidence[indp]/dt[indp]*1e3,
      col='grey')
lines(tx[indp],Analyte_flux_timeseries_kt$plus_two_sigma_confidence[indp]/dt[indp]*1e3,
      col='grey')
points(tx[ind_calibration],Qspt[ind_calibration]/dt[ind_calibration]*1e3,col = 'black',
       pch = c(21))
lines(tx,Qspt/dt*1e3,col = 'black',lwd= 1.5)
legend("topright",legend = c("Estimated flux","Estimation uncertainty","Actual flux of data used in regression","Actual flux timeseries"),
       lty = c(1, 5, 0,1),col = c('red', 'grey', 'black', 'black'),pch = c(-1,-1,21,-1))

# check estimated load / modeled load
#mean(cQs)/sum(Qspt,na.rm=TRUE) # relative to Cpt (biased) load
#mean(cQs)/sum(Qsxs,na.rm=TRUE) # relative to Cxs (actual) load

Output <- list("time" = tx,"surrogate_timeseries_used_for_prediction_of_analyte"= df,"regression_data" = calibration,"regression_parameters_estimated_from_bootstrap_resampling" = Regression,"Analyte_concentration_timeseries_mg_per_liter"= Analyte_concentration_timeseries_mg_per_liter,"Dicharge" = Discharge,"Analyte_flux_timeseries_kt" =Analyte_flux_timeseries_kt)

## ----Compare total loads,fig.dim = c(8,8),dpi=300-----------------------------
### Compare total loads in barplot ----
# check ratios of estimated load to modeled load
#Total_load_kt$median_confidence/sum(Qspt,na.rm=TRUE) # estimated load relative to load at Cpt
#Total_load_kt$median_confidence/sum(Qsxs,na.rm=TRUE) # estimate total load relative to actual load from Cxs

loads <- c('Estimated load from SSCpt(dB)'= Total_load_kt$median_confidence,'Load from SSCpt' = sum(Qspt,na.rm=TRUE),'Load from SSCxs' = sum(Qsxs,na.rm=TRUE))

y <- c(Total_load_kt$median_confidence)
y0 <- c(Total_load_kt$plus_two_sigma_confidence)
y1 <- c(Total_load_kt$minus_two_sigma_confidence)
  
hb <- barplot(loads,main="Total load comparison",
        ylab="Total load (kt)",border=NA,ylim = c(0,8000))
arrows(x0=hb[1],y0=y0,y1=y1,angle=90,code=3,length=0.1)


## ----simulate missing data and impute data,fig.dim = c(8,8),dpi=300-----------

### Impute data ----
xo <- ADCPOutput$mean_sediment_corrected_backscatter_beam_1_and_beam_2_dB
# simulate 50% missing data
idata <- sample(which(is.finite(xo)),round(sum(is.finite(xo))*0.50),replace=FALSE)
x <- rep(NA,length(xo))
x[idata] <- xo[idata]
flow_ratio <- imputeTS::na_interpolation(Q/x)
Xreg <- cbind(Q,flow_ratio)
# random sampling of training set in impute_data() can generate warnings in decision tree algorithm 
# In impute_data(), ptrain = 1 can be set to preclude uncertainty estimation and decrease code run time. 
# ptrain=0.8 indicates that 80% of the finite data in x will be used in training regression trees and 20% of the data will be held-out for computing validation accuracy
suppressWarnings(Imputation <- impute_data(tx,x,Xreg,MC=10,ptrain=0.8))

ximputed <- Imputation$Imputed_data$x
ximputed[is.finite(x)] <- NA

plot(xo,ximputed,xlab = 'Backscatter (dB)', ylab = 'Imputed backscatter (dB)',ylim = c(min(xo,na.rm = T),max(xo,na.rm = T)),xlim = c(min(xo,na.rm = T),max(xo,na.rm = T)))
points(xo,Imputation$Imputed_data$x_at_minus_two_sigma_confidence,col='red')
points(xo,Imputation$Imputed_data$x_at_plus_two_sigma_confidence,col='blue')
lines(c(min(xo,na.rm=TRUE),max(xo,na.rm=TRUE)))
legend("topleft",legend = c("Median imputed value","Plus 2-sigma uncertainty on imputed value","Minus 2-sigma uncertainty on imputed value"), lty = c(0, 0,0),col = c('black', 'red', 'blue'),pch = c(21,21,21))

Any scripts or data that you put into this service are public.

realTimeloads documentation built on Oct. 18, 2023, 5:09 p.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

realTimeloads
Analyte Flux and Load from Estimates of Concentration and Discharge

inst/doc/realTimeloads.R
In realTimeloads: Analyte Flux and Load from Estimates of Concentration and Discharge

Try the realTimeloads package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

realTimeloads Analyte Flux and Load from Estimates of Concentration and Discharge

inst/doc/realTimeloads.R In realTimeloads: Analyte Flux and Load from Estimates of Concentration and Discharge

Try the realTimeloads package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

realTimeloads
Analyte Flux and Load from Estimates of Concentration and Discharge

inst/doc/realTimeloads.R
In realTimeloads: Analyte Flux and Load from Estimates of Concentration and Discharge