# MFE metabolism function
## Support Functions
#' Floor Mins
#' Round all time down to nearest timeStep (e.g. if timeStep is 5, round 00:07 to 00:05)
#' @param dataIn A data frame
#' @param timeStep Time interval between DO measurements, minutes; defaults to 10 minutes
#' @return A vector
#' @export
floorMins <- function(dataIn, timeStep){
#Pull out datetime column and name it x
x <- dataIn$datetime
nRows <- length(x)
#Truncate each datetime to hour; convert to class numeric
floorHour <- as.POSIXct(trunc(x[1:nRows],"hour"),tz="America/Chicago")
floorNumeric <- as.numeric(floorHour)
#Create sequence from floorNumeric to next hour by timeStep (in seconds)
seqSec <- seq(0,3600,60*timeStep)
#Create matrix where each row is floorNumeric + the elements of seqSec
matSec <- matrix(rep(seqSec,nRows),nrow=nRows,byrow=T)
matSec <- floorNumeric + matSec
#Calculate abs(time difference) between each element of x and the timeStep intervals
difs <- abs(as.numeric(x) - matSec)
#Find the minimum absolute difference in each row and select the corresponding time from matSec
whichMin <- apply(difs,1,which.min)
rowNames <- as.numeric(rownames(data.frame(whichMin)))
matIndex <- (whichMin-1)*nRows + rowNames
matSecFlat <- matrix(matSec,ncol=1)
outTime <- as.POSIXct(matSecFlat[matIndex],origin="1970-01-01",tz="America/Chicago")
#Return outTime
return(outTime)
}
#' Find Not Dup Rows
#' Function to find duplicate datetime stamps. Returns the indices of rows where the datetime is NOT a duplicate of the datetime in a previous row.
#' @param dataIn a data frame
#' @return A vector, indices of rows where the datetime is NOT a duplicat of the datetime in a previous row.
#' @export
findNotDupRows <- function(dataIn){
#dataIn<-eval(parse(text=dataInName))
dataInName<-colnames(dataIn)[2]
#Find duplicated time stamps
dups <- duplicated(dataIn$datetime)
#If no duplicated time stamps, notDupRows=all rows in dataIn
if (all(dups==FALSE)){
notDupRows <- c(1:dim(dataIn)[1])
}else{
#If at least one time stamp is duplicated, warn, and notDupRows is indexes
#of rows where datetime is not duplicate of the datetime in a previous row
notDupRows <- which(dups==FALSE)
nDups <- dim(dataIn)[1]-length(notDupRows)
print(paste("Warning:",nDups,"rows with duplicate time stamps in",dataInName,"will be removed"))
}
#Return dupRows
return(notDupRows)
}
#' Calculate Z Mix Density
#' Using density instead of temp to indicate mixed layer, (though note that objects in this script are still labeled e.g. 'temps', not 'dens')
#' @param dataIn A density profile. First column is datetime, POSIXct. Second and subsequent columns are density (kg m-3) at depth. Column headers for these columns are e.g. temp0, temp0.5, temp1.0, temp2, temp2.3
#' @param thresh Threshold density change to indicate end of mixed layer. Units (kg m-3) m-1. Defaults to 0.075 (value Jordan Read is using in their project)
#' @return A data frame. First column is datetime, second column is zMix calculated at that datetime.
#' @export
calcZMixDens <- function(dataIn, thresh = 0.075){
#Useful things
nObs <- dim(dataIn)[1]
nCols <- dim(dataIn)[2]
#Extract vector of depths from column names of dataIn
depths <- as.numeric(substr(colnames(dataIn)[2:nCols],5,10))
# depths=c(0.1,2.0,3.0,4.0,10.0)
#Set up structure to hold calculated metaDepth for each time point
metaDepthOut <- data.frame(datetime=dataIn$datetime,zMix=rep(NA,nObs))
#Calculate zMix for each time in dataIn
for (i in 1:nObs){
#If all temp data at this time point are NA, return NA for zMix at this time
if (all(is.na(dataIn[i,2:nCols]))){
metaDepthOut[i,2] <- NA
next
}
#Get vectors of depths and temps, excluding cols where temp is NA
temps <- unlist(dataIn[i,2:nCols])
DEPTHS <- depths[!(is.na(temps))]
TEMPS <- temps[!(is.na(temps))]
#Find change in depth, change in temp, and change in temp per depth
dz <- diff(DEPTHS)
dT <- diff(TEMPS)
dTdz <- dT/dz
#If no rate of change exceeds thresh, return max depth for zMix at this time
if (all(dTdz<thresh)){
metaDepthOut[i,2] <- max(depths)
next
}
#Identify first depth where rate of change exceeds thresh
whichFirstBigChange <- min(which(dTdz >= thresh))
metaDepth <- mean(c(DEPTHS[whichFirstBigChange],DEPTHS[(whichFirstBigChange+1)]))
#Save result to metaDepthOut
metaDepthOut[i,2] <- metaDepth
}
#Return metaDepthOut
return(metaDepthOut)
}
#Inputs
# dataIn: A data.frame with two columns, first is "datetime", second is data
# maxLength: Maximum length of NA string that you are willing to interpolate across.
# NOTE that this is given in minutes
# timeStep: The time step of the data
#' Fill Holes
#' Linearly interpolate values for strings (up to specified length) of missing data. Created by CTS, 31 July 2009.
#' @param dataIn: A data frame with two columns, first is "datetime", second is data
#' @param maxLength: Maximum length of NA string that you are willing to interpolate across. NOTE that this is given in minutes.
#' @param timeStep The time step of the data.
#' @return A data frame with same format as dataIn, but with values interpolated.
#' @export
fillHoles <- function(dataIn, maxLength, timeStep){
#Number of rows in dataIn
nObs <- dim(dataIn)[1]
#Express maxLength as number of time steps instead of number of minutes
maxLength <- maxLength/timeStep
#Temporarily replace NAs in data with -99999
whichNA <- which(is.na(dataIn[,2]))
if(length(whichNA)){
dataIn[whichNA,2] <- -99999
}
#Identify strings of NA (-99999) values
rleOut <- rle(dataIn[,2])
which9 <- which(rleOut$values == -99999)
#If no NA strings in data, return dataIn
if (length(which9) == 0){
return(dataIn)
}else{
#Otherwise, continue
#Interpolate valus for each string of NA values
for (i in 1:length(which9)){
#Determine start and end index of string i, and calculate length of string
if (which9[i]==1){
stringStart <- 1
} else{
stringStart <- 1 + sum(rleOut$lengths[1:(which9[i]-1)])
}
stringEnd <- sum(rleOut$lengths[1:which9[i]])
stringLength <- stringEnd-stringStart+1
#Skip to next i if:
# -length of string exceeds maxLength,
# -stringStart is the first obs in dataIn
# -stringEnd is the last obs in dataIn
if (stringLength > maxLength | stringStart==1 | stringEnd==nObs) next else{
#Linearly interpolate missing values
interp <- approx(x=c(dataIn[stringStart-1,"datetime"],dataIn[stringEnd+1,"datetime"]),
y=c(dataIn[stringStart-1,2],dataIn[stringEnd+1,2]),
xout=dataIn[stringStart:stringEnd,"datetime"],
method="linear")
dataIn[stringStart:stringEnd,2] <- interp$y
}
}
#Replace any remaining -99999 with NA
dataIn[which(dataIn[,2]==-99999),2] <- NA
#Return result
return(dataIn)
}
}
#' Metab Predix Version 4
#' Return DOHat, residuals, atmFlux, and NLL given par estimates. Created by CTS, 18 August 2009.
#' @param parsIn A vector of parameters.
#' @param dataIn A data frame
#' @return A data frame
#' @export
metabPredix_v4 <- function (parsIn, dataIn) {
## Inputs
#Unpack parameters and exponentiate to force positive,
iota <- exp(parsIn[1])
rho <- exp(parsIn[2])
DOInit <- exp(parsIn[3])
#Label useful things
nObs <- dim(dataIn)[1]
kO2 <- dataIn$kO2
DOSat <- dataIn$DOSat
zMix <- dataIn$zMix
irr <- dataIn$irr
DOObs <- dataIn$DOObs
fluxDummy <- dataIn$fluxDummy
##
#Calculate predictions and residuals
#Set up output
DOHat <- rep(NA,nObs)
atmFlux <- rep(NA,nObs)
#Initialize DOHat
DOHat[1] <- DOInit
#Calculate atmFlux and predicted DO for each time point
#Fluxes out of lake have negative sign
for (i in 1:(nObs-1)) {
atmFlux[i] <- fluxDummy[i] * -kO2[i] * (DOHat[i] - DOSat[i]) / zMix[i]
DOHat[i+1] <- DOHat[i] + iota*irr[i] - rho + atmFlux[i]
}
#Compare observed and predicted DO; calculate residuals and NLL
#Exclude from calculation any cases where DOObs=NA
if (any(is.na(DOObs)))
{
NAObs <- which(is.na(DOObs))
res <- DOObs[-NAObs] - DOHat[-NAObs]
} else
{
res <- DOObs - DOHat
}
nRes <- length(res)
SSE <- sum(res^2)
sigma2 <- SSE/nRes
NLL <- 0.5*((SSE/sigma2) + nRes*log(2*pi*sigma2))
#Set up output structure
dataOut <- list(atmFlux=atmFlux,DOHat=DOHat,res=res,NLL=NLL)
#Return dataOut
return(dataOut)
}
#' Metab Loss, Version 4
#' Estimating Metabolism from DO data. This function calculates the likelihoods; use in conjunction with an optimization routine to minimize NLL. Use this for one day's data at a time. Created by CTS, 29 January 2009. Version 4 created August 2009.
#' @param dataIn A data frame, which includes the following columns: DOObs: DO (mg/L); DOSat: DO at saturation (mg/L); irr: irradiance (PAR) (mmol m-2 s-1) >>>> note that GLEON data may actually be in umol; check; kO2: piston velocity for O2, m (timeStep)-1; zMix: depth of the mixed layer (m); fluxDummy: 0 if zMix is above DO sensor depth (prevents atm flux); 1 otherwise.
#' @param parsIn A vector of parameters. First element is `iota` (primary productivity per unit of PAR; units are (mg L-1 timeStep-1) / (mmol m-2 s-1)). Second element is `rho` (nighttime respiration; units are (mg L-1 (timeStep)-1)). Third element is DOInit (initial DOHat value; units are mg L-1). TimeStep is the number of minutes between observations. `parsIn` is given in log units.
#' @return NLL, which is a scalar (I think? -KG)
#' @export
metabLoss_v4 <- function (parsIn, dataIn) {
##Inputs
#Unpack parameters and exponentiate to force positive
iota <- exp(parsIn[1])
rho <- exp(parsIn[2])
DOInit <- exp(parsIn[3])
#Label useful things
nObs <- dim(dataIn)[1]
kO2 <- dataIn$kO2
DOSat <- dataIn$DOSat
zMix <- dataIn$zMix
irr <- dataIn$irr
DOObs <- dataIn$DOObs
fluxDummy <- dataIn$fluxDummy
##
#Calculate predictions and residuals
#Set up output
DOHat <- rep(NA,nObs)
atmFlux <- rep(NA,nObs)
#Initialize DOHat
DOHat[1] <- DOInit
#Calculate atmFlux and predicted DO for each time point
#Fluxes out of lake have negative sign
for (i in 1:(nObs-1)) {
atmFlux[i] <- fluxDummy[i] * -kO2[i] * (DOHat[i] - DOSat[i]) / zMix[i]
DOHat[i+1] <- DOHat[i] + iota*irr[i] - rho + atmFlux[i]
}
#Compare observed and predicted DO; calculate residuals and NLL
#Exclude from calculation any cases where DOObs=NA
if (any(is.na(DOObs))){
NAObs <- which(is.na(DOObs))
res <- DOObs[-NAObs] - DOHat[-NAObs]
} else{
res <- DOObs - DOHat
}
nRes <- length(res)
SSE <- sum(res^2)
sigma2 <- SSE/nRes
NLL <- 0.5*((SSE/sigma2) + nRes*log(2*pi*sigma2))
#Return NLL
return(NLL)
}
#' Bootstrap Metabolism
#' Bootstrapping function for metabolism model; JAZ 2014-11-3; modified from Winslow and GLEON fellows
#' @param parGuess Log-transformed maximum likelihood estimates of the parameters from an initial fit of the model.
#' @param dataTemp A data frame
#' @param n Number of bootstrap iterations; default is 1000.
#' @param ar1.resids Logical; whether or not to maintain the ar1 component of the residuals.
#' @param timeStep timeStep of data
#' @return A data frame
#' @export
bootstrap.metab <- function(parGuess, dataTemp, n=1000, ar1.resids=FALSE, timeStep){
n.obs = length(dataTemp$DOObs)
doHat = metabPredix_v4(parGuess,dataTemp)$DOHat
resids = dataTemp$DOObs - doHat
#If we are maintaining the ar1 component of the residuals,
# we must estimate ar1 coeff and the ar1 residual standard deviation
if(ar1.resids){
ar1.lm = lm(resids[1:n.obs-1] ~ resids[2:n.obs]-1)
ar1.coeff = ar1.lm$coefficients
ar1.sd = sd(ar1.lm$residuals)
}
#Pre-allocate the result data frame
result <- data.frame(boot.iter = 1:n,
iota = rep(NA,n),
rho = rep(NA,n),
DOInit = rep(NA,n),
covergence = rep(NA,n),
nll = rep(NA,n),
GPP = rep(NA,n))
for(i in 1:n){
#Randomize the residuals using one of two methods
if(ar1.resids){ #residual randomization keeping the ar1 data structure
simRes = rep(NA, n.obs)
simRes[1] = sample(resids[!is.na(resids)],1)
for(j in 2:n.obs){
simRes[j] = ar1.coeff*simRes[j-1] + rnorm(n=1, sd=ar1.sd)
}
}else{ #Raw residual randomization
#Randomize residuals without replacement
simRes = sample(resids[!is.na(resids)], length(resids), replace=FALSE)
}
doSim = doHat + simRes
#Run optim again with new simulated DO signal
dataBoot<-dataTemp
dataBoot$DOObs<-doSim
optimTemp <- optim(parGuess,metabLoss_v4,dataIn=dataBoot)
result[i,2:3] <- exp(optimTemp$par[1:2])*(1440/timeStep) #iota and rho to units of mg O2 L-1 day-1
result[i,4] <- exp(optimTemp$par[3]) #initial DO estimate
result[i,5] <- optimTemp$convergence #did model converge or not (0=yes, 1=no)
result[i,6] <- optimTemp$value #value of nll
result[i,7] <- (result[i,2]/(60*60*24))*sum(dataTemp$irr)*timeStep*60 #GPP in units of mg O2 L-1 d-1
}
return(result)
}
#' mfeMetab
#' Functionality for generating lake metabolism estimates from MFE database. Putting Solomon metabolism code into a function and adding some options for different K models. Also allows for pulling input data directly from MFE sensor database. WARNING! This function makes some modifications to your R workspace, which isn't best practice for R package functions. I don't have time to fix it now. It sets the system time zone, and it also saves your workspace to an .Rdata file. Use caution. -KG
#' SEJ with code from CTS, JAZ, CRO, and CJT
#' @param lakeID Lake to estimate metabolism for
#' @param minDate First date to estimate metabolism for
#' @param maxDate Last date to estimate metabolism for
#' @param outName Prefix for output files
#' @param dirDump Directory where outputs should be dumped, e.g. 'C:/GLEON/Acton/Results'. No default value.
#' @param maxZMix Back up zmix if temp profile data is unavailable; defaults to 4, but should pick a value that makes sense.
#' @param k Model for piston velocity - options are: "cole&caraco", a constant k600 specified by the desired value, or "read" (coming soon)
#' @param fluxDummyToggle Use convention where atmospheric exchange is shut off during microstratification conditions.
#' @param bootstrapping `yes` or `no`. Warning! Will take a while to run if `yes`. Defaults to `no`.
#' @param lat Latitude of lake, decimal degrees, north positive; defaults to UNDERC (46.15)
#' @param elev Elevation of lake surface, m above sea level; defaults to UNDERC (518)
#' @param windHeight Height above lake surface at which wind speed is measured, m; defaults to UNDERC (2)
#' @param timeStep Time interval between DO measurements, minutes; defaults to 10 minutes
#' @param sensorDepth Depth of DO sensor, m; defaults to UNDERC (0.7?)
#' @param outName Text to use in lableing outputs, e.g. 'Acton2008'. Character. No default.
#' @export
mfeMetab <- function(lakeID, minDate, maxDate, outName, dirDump, maxZMix = 8,
k = "cole&caraco", fluxDummyToggle = TRUE,
bootstrap = 'no', lat = 46.16, elev = 535, windHeight = 2,
timeStep = 10, sensorDepth = 0.7){
library(LakeMetabolizer)
#For troubleshooting:
#maxZMix=8;k="cole&caraco";fluxDummyToggle=TRUE;bootstrap='no';lat=46.16;elev=535;windHeight=2;timeStep=10;sensorDepth=0.7
# Set environment tz variable to CDT
Sys.setenv(tz = "America/Chicago")
########################################
# Read and organize data from database - assumes database functions have been sourced.
# DO
if(lakeID%in%c("EL","WL")){
rawDOmd <- sensordbTable("DO_CORR", lakeID = lakeID, minDate = minDate, maxDate = maxDate)
rawDOysi <- sensordbTable("YSI_CORR", lakeID=lakeID, minDate = minDate, maxDate = maxDate)
rawDO <- rbind(rawDOmd[,c("dateTime","cleanedDO_mg_L")], rawDOysi[,c("dateTime","cleanedDO_mg_L")])
dataDO <- aggregate(x = as.numeric(rawDO$cleanedDO_mg_L), by = list(rawDO$dateTime), FUN = mean,
na.rm = TRUE)
colnames(dataDO) <- c("datetime", "DO")
}else{
rawDO<-sensordbTable("DO_CORR", lakeID = lakeID, minDate = minDate, maxDate = maxDate)
dataDO<-rawDO[,c("dateTime","cleanedDO_mg_L")]
colnames(dataDO)<-c("datetime","DO")
}
#Water temp at depth of DO sensor
if(lakeID%in%c("EL","WL")){
rawSensorTemp<-rbind(rawDOmd[,c("dateTime","cleanedTemp_C")],rawDOysi[,c("dateTime","cleanedTemp_C")])
dataSensorTemp<-aggregate(x=as.numeric(rawSensorTemp$cleanedTemp_C),by=list(rawSensorTemp$dateTime),
FUN=mean,na.rm=TRUE)
colnames(dataSensorTemp)<-c("datetime","TEMP")
}else{
dataSensorTemp<-rawDO[,c("dateTime","cleanedTemp_C")]
colnames(dataSensorTemp)<-c("datetime","TEMP")
}
#Temp profile
tempChain<-sensordbTable("HOBO_TCHAIN_CORR",lakeID=lakeID,minDate=minDate,maxDate=maxDate)
tempChain2<-tempChain[,c("dateTime","depth_m","cleanedTemp_C")]
colnames(tempChain2)=c('datetime','depth_m','temp')
dataTempProfile<-reshape(tempChain2,timevar="depth_m",idvar="datetime",direction="wide",sep="")
metDataEL<-sensordbTable("HOBO_METSTATION_CORR",lakeID="EL",minDate=minDate,maxDate=maxDate)
metDataWL<-sensordbTable("HOBO_METSTATION_CORR",lakeID="WL",minDate=minDate,maxDate=maxDate)
metDataFE<-sensordbTable("HOBO_METSTATION_CORR",lakeID="FE",minDate=minDate,maxDate=maxDate)
metData=rbind(metDataEL,metDataWL, metDataFE)
#PAR
#Divide PAR by 1000 to convert from measured units (umol m-2 s-1) to model units (mmol m-2 s-1)
#made choice to average PAR readings from WL & EL/FE
dataPAR<-aggregate(x=as.numeric(metData$cleanedPAR_uE_m2_s),by=list(metData$dateTime),
FUN=mean,na.rm=TRUE)
colnames(dataPAR)=c("datetime","PAR")
dataPAR$PAR <- dataPAR$PAR/1000
#Wind speed
dataWind<-aggregate(x=as.numeric(metData$cleanedWindSpeed_m_s),by=list(metData$dateTime),
FUN=mean,na.rm=TRUE)
colnames(dataWind)=c("datetime","WS")
##
#Display some info about time grain of measurements
#Print first five time readings for each variable
firstFiveTimes <- data.frame(DO=dataDO$datetime[1:5],PAR=dataPAR$datetime[1:5],windSpeed=dataWind$datetime[1:5],sensorTemp=dataSensorTemp$datetime[1:5],tempProfile=dataTempProfile$datetime[1:5])
print('First five time readings for each variable'); print(firstFiveTimes)
#Calculate first differences of time readings, display unique values
cat('\n','First differences of datetime', '\n')
difTimesDO <- diff(dataDO$datetime); print(table(difTimesDO))
difTimesPAR <- diff(dataPAR$datetime); print(table(difTimesPAR))
difTimesWindSpeed <- diff(dataWind$datetime); print(table(difTimesWindSpeed))
difTimesSensorTemp <- diff(dataSensorTemp$datetime); print(table(difTimesSensorTemp))
difTimesTempProfile <- diff(dataTempProfile$datetime); print(table(difTimesTempProfile))
##
#Remove rows with duplicate datetime stamps (and warn)
notDupRows <- findNotDupRows(dataDO)
dataDO <- dataDO[notDupRows,]
notDupRows <- findNotDupRows(dataPAR)
dataPAR <- dataPAR[notDupRows,]
notDupRows <- findNotDupRows(dataWind)
dataWind <- dataWind[notDupRows,]
notDupRows <- findNotDupRows(dataSensorTemp)
dataSensorTemp <- dataSensorTemp[notDupRows,]
notDupRows <- findNotDupRows(dataTempProfile)
dataTempProfile <- dataTempProfile[notDupRows,]
dataDO$datetime <- floorMins(dataDO,timeStep=timeStep)
dataPAR$datetime <- floorMins(dataPAR,timeStep=timeStep)
dataWind$datetime <- floorMins(dataWind,timeStep=timeStep)
dataSensorTemp$datetime <- floorMins(dataSensorTemp,timeStep=timeStep)
dataTempProfile$datetime <- floorMins(dataTempProfile,timeStep=timeStep)
#Repeat check for dup rows in case any introduced during floorMins
notDupRows <- findNotDupRows(dataDO)
dataDO <- dataDO[notDupRows,]
notDupRows <- findNotDupRows(dataPAR)
dataPAR <- dataPAR[notDupRows,]
notDupRows <- findNotDupRows(dataWind)
dataWind <- dataWind[notDupRows,]
notDupRows <- findNotDupRows(dataSensorTemp)
dataSensorTemp <- dataSensorTemp[notDupRows,]
notDupRows <- findNotDupRows(dataTempProfile)
dataTempProfile <- dataTempProfile[notDupRows,]
#Find the latest first time point and the earliest last time point of all the data
startTime <- max(min(dataDO$datetime),min(dataPAR$datetime),min(dataWind$datetime),min(dataSensorTemp$datetime),min(dataTempProfile$datetime))
endTime <- min(max(dataDO$datetime),max(dataPAR$datetime),max(dataWind$datetime),max(dataSensorTemp$datetime),max(dataTempProfile$datetime))
#Data.frame with one column "datetime" which is sequence of times at time interval of timeStep, from startTime to endTime
completeTimes <- data.frame(datetime=seq(startTime,endTime,paste(timeStep,"mins")))
#Merge all of input data.frames with completeTimes, so that they now all extend from startTime to endTime by timeStep
dataDO <- merge(completeTimes,dataDO,by="datetime",all.x=T)
dataPAR <- merge(completeTimes,dataPAR,by="datetime",all.x=T)
dataWind <- merge(completeTimes,dataWind,by="datetime",all.x=T)
dataSensorTemp <- merge(completeTimes,dataSensorTemp,by="datetime",all.x=T)
dataTempProfile <- merge(completeTimes,dataTempProfile,by="datetime",all.x=T)
########################################
#Calculate sunrise, sunset
#Days of year for which to calculate sunrise and sunset
daysVec <- seq.POSIXt(trunc(startTime,"day"),trunc(endTime,"day"),"1 day")
day <- as.numeric(format(daysVec,format="%j"))
#Factors to convert degrees to radians and vice versa
degToRad <- 2*pi/360
radToDeg <- 180/pi
#Day angle "gamma" (radians). Iqbal 1983 Eq. 1.2.2
dayAngle <- 2*pi*(day-1)/365
#Declination of the sun "delta" (radians). Iqbal 1983 Eq. 1.3.1
dec <- 0.006918 - 0.399912*cos(dayAngle) + 0.070257*sin(dayAngle) - 0.006758*cos(2*dayAngle) + 0.000907*sin(2*dayAngle) - 0.002697*cos(3*dayAngle) + 0.00148*sin(3*dayAngle)
#Sunrise hour angle "omega" (degrees). Iqbal 1983 Eq. 1.5.4
latRad <- lat*degToRad
sunriseHourAngle <- acos(-tan(latRad)*tan(dec))*radToDeg
#Sunrise and sunset times (decimal hours, relative to solar time) Iqbal 1983 Ex. 1.5.1 - +5 is to put it in America/Chicago
sunrise <- 12 - sunriseHourAngle/15+5
sunset <- 12 + sunriseHourAngle/15+5
# As number of seconds from midnight
sunrise <- sunrise/24*86400
sunset <- sunset/24*86400
# As number of seconds from beginning of year
sunrise <- sunrise+(day-1)*86400
sunset <- sunset+(day-1)*86400
# Convert to POSIXct and round to nearest minute
yr <- format(daysVec,format="%Y")
origin <- paste(yr,"01","01",sep="-")
sunrise <- round(as.POSIXct(sunrise,origin=origin,tz="America/Chicago"),"mins")
sunset <- round(as.POSIXct(sunset,origin=origin,tz="America/Chicago"),"mins")
# One final note: I found out a while ago that the metabolism code doesn't take daylight
# savings time into account. All the datasets that Chris was getting for GLEON were in
# normal time, not daylight savings time. If you're using CDT times with the pelagic data
# you've been running through the codes, you should check the sunrise and sunset times the
# code is calculating. They'll probably be offset by one hour, which would affect the GPP
# and R results. If the data you're inputting is exclusively in daylight savings time (like
# our data from the summer), you can just add these two lines of code into script:
# #
sunrise <- sunrise + 3600
sunset <- sunset + 3600
# #
# Comment the shit out of those lines so you don't forget about them later.
# Those lines would go right before:
# #Create data.frame with sunrise, sunset times for each day
# sun <- data.frame(day=daysVec,sunrise,sunset)
# NOT QUITE SURE ABOUT THIS YET...
#sunrise <- sunrise + 3600
#sunset <- sunset + 3600
#Create data.frame with sunrise, sunset times for each day
sun <- data.frame(day=daysVec,sunrise,sunset)
########################################
#Re-trim data sets so that they start at or after first sunrise, and end at last time before last sunrise
# i.e. lop off partial day at end
#Trim
startTrim <- min(sun$sunrise)
endTrim <- max(sun$sunrise)
dataDO <- dataDO[dataDO$datetime >= startTrim & dataDO$datetime < endTrim,]
dataPAR <- dataPAR[dataPAR$datetime >= startTrim & dataPAR$datetime < endTrim,]
dataWind<- dataWind[dataWind$datetime >= startTrim & dataWind$datetime < endTrim,]
dataSensorTemp <- dataSensorTemp[dataSensorTemp$datetime >= startTrim & dataSensorTemp$datetime < endTrim,]
dataTempProfile <- dataTempProfile[dataTempProfile$datetime >= startTrim & dataTempProfile$datetime < endTrim,]
completeTimes <- data.frame(datetime=completeTimes[completeTimes$datetime >= startTrim & completeTimes$datetime < endTrim,],stringsAsFactors=FALSE)
#(Useful later) Vector giving which solar day each time in completeTimes belongs to
solarDaysBreaks <- unique(sun$sunrise[sun$sunrise <= endTrim])
solarDaysVec <- cut.POSIXt(completeTimes$datetime,breaks=solarDaysBreaks)
########################################
#Fill gaps in data
##
#DO - do not fill gaps
##
#PAR - linearly interpolate gaps up to 60 min long
dataPAR <- fillHoles(dataPAR,maxLength=60,timeStep=timeStep)
##
#sensorTemp - linearly interpolate gaps up to 60 min long
dataSensorTemp <- fillHoles(dataSensorTemp,maxLength=200,timeStep=timeStep)
##
#windSpeed - fill with daily average as long as at least 80% of data are available
#Loop over days
for (i in 1:length(unique(solarDaysVec))){
cat(i, " ")
#Extract data between sunrise on day i and sunrise on day i+1
timeSlice <- c(sun$sunrise[i], sun$sunrise[i+1])
dataTemp <- dataWind[dataWind$datetime>=timeSlice[1] & dataWind$datetime<timeSlice[2],]
#Determine total number of observations, and number that are NA
nTot <- length(dataTemp$WS)
nNA <- length(which(is.na(dataTemp$WS)))
#If >20% of obs are NA, skip to next i
if(nrow(dataTemp)>0){
if (nNA/nTot > 0.20){
next
}else{
#Calculate mean windSpeed and sub in for NA values
meanSpeed <- mean(dataTemp$WS,na.rm=T)
naRows <- as.numeric(row.names(dataTemp[is.na(dataTemp$WS),]))
dataWind$WS[naRows] <- meanSpeed
}
}else{
next
}
}
##
#tempProfile - linearly interpolate gaps up to 60 min long
nCols <- dim(dataTempProfile)[2]
#Loop over the columns of dataTempProfile
for(i in 2:nCols){
dataTemp <- dataTempProfile[,c(1,i)]
dataTemp[,2]=as.numeric(dataTemp[,2])
#***** set maxLength to enable long interpolations or not
dataTempFilled <- fillHoles(dataTemp,maxLength=1000000,timeStep=timeStep)
dataTempProfile[,i] <- dataTempFilled[,2]
}
########################################
#Calculate zMix and fluxDummy
#If temperature measured at only one depth, use maxZMix as zMix at every time
if(ncol(dataTempProfile) <= 2){
dataZMix <- data.frame(datetime=dataTempProfile$datetime,
zMix=rep(maxZMix,length(dataTempProfile$datetime)))
}else{
#Otherwise calculate zMix from data
#Convert tempProfile data to density
#Density of water (kg m-3) as function of temp from McCutcheon (1999)
#Note there is a different formula if salinity is appreciable; formula below ignores that
dataDensProfile <- dataTempProfile
dataDensProfile[,-1] <- 1000*(1-((dataDensProfile[,-1]+288.9414)/(508929.2*(dataDensProfile[,-1]+68.12963)))*(dataDensProfile[,-1]-3.9863)^2)
#Calc zMix
dataZMix <- calcZMixDens(dataDensProfile)
}
#Plot zMix
maxDepth <- max(as.numeric(substr(colnames(dataTempProfile)[2:nCols],5,10)))
pdf(file=paste(outName,'zMix.pdf'))
plot(zMix~datetime,data=dataZMix,ylim=c(maxDepth,0))
dev.off()
#if using fluxDummy, then...
if(fluxDummyToggle){
#Identify when to shut off atmospheric flux
# If zMix > sensorDepth, then sensor is in mixed layer and fluxDummy = 1
# If zMix <= sensorDepth, then there is stratification at or above sensor and fluxDummy = 0 -- shut off atmosphere at this time step
fluxDummy <- as.numeric(dataZMix$zMix>sensorDepth)
}else{
fluxDummy<-rep(1,nrow(dataTempProfile))
}
########################################
#Merge data for convenience
#Merge
data1 <- merge(dataDO,dataPAR,by="datetime",all=T)
data1 <- merge(data1,dataWind,by="datetime",all=T)
data1 <- merge(data1,dataSensorTemp,by="datetime",all=T)
data1 <- merge(data1,dataZMix,by="datetime",all=T)
########################################
#Report on lengths of NA strings in data
#For each variable in data1, find the length of each run of NA in the data
#Have to temporarily sub in -99999 for NA to get rle() to work as desired
data1Temp <- as.matrix(data1[,2:6])
whichNA <- which(is.na(data1Temp))
data1Temp[whichNA] <- -99999
cat('\n','Lengths of NA strings after fillHoles etc.','\n')
#DO
rleOut <- rle(data1Temp[,"DO"])
whichNA <- which(rleOut$values==-99999)
print('Lengths of DO NA strings')
print(sort(rleOut$lengths[whichNA]))
#PAR
rleOut <- rle(data1Temp[,"PAR"])
whichNA <- which(rleOut$values==-99999)
print('Lengths of PAR NA strings')
print(sort(rleOut$lengths[whichNA]))
#windSpeed
rleOut <- rle(data1Temp[,"WS"])
whichNA <- which(rleOut$values==-99999)
print('Lengths of windSpeed NA strings')
print(sort(rleOut$lengths[whichNA]))
#sensorTemp
rleOut <- rle(data1Temp[,"TEMP"])
whichNA <- which(rleOut$values==-99999)
print('Lengths of sensorTemp NA strings')
print(sort(rleOut$lengths[whichNA]))
#zMix
rleOut <- rle(data1Temp[,"zMix"])
whichNA <- which(rleOut$values==-99999)
print('Lengths of zMix NA strings')
print(sort(rleOut$lengths[whichNA]))
rm(data1Temp)
########################################
#Calculate DOSat and kO2 at each time step
##
#Calculate average atmospheric pressure at elevation of lake
#Using the 'barometric formula' - see e.g. U.S. Standard Atmosphere 1976 or
# Jacobs 1999 Atmospheric Chemistry, Eqn 2.9
#Values of Rstar, g0, M are according to US Standard Atmosphere 1976; could use SI instead
#Constants
Pb <- 101325 #static pressure, pascals
Tb <- 288.15 #standard temp, K
Lb <- -0.0065 #standard temp lapse rate, K m-1
h <- elev #elevation above sea level, m
hb <- 0 #elevation at bottom of atmospheric layer 0, m (note layer 0 extends to 11000 masl)
Rstar <- 8.31432 #universal gas constant, N m mol-1 K-1 (equiv to J K-1 mol-1) SI: 8.314472
g0 <- 9.80665 #acceleration of gravity, m s-1
M <- 0.0289644 #molar mass of Earth's air, kg mol-1
#Pressure, in Pa (pascals)
P <- Pb * (Tb/(Tb+Lb*(h-hb)))^(g0*M/(Rstar*Lb))
# In mmHg
atmPres <- P*0.00750061683
##
#Calculate DO saturation
#Use eqn from Weiss 1970 Deep Sea Res. 17:721-735; simplified since salinity=0
# ln DO = A1 + A2 100/T + A3 ln T/100 + A4 T/100
#Convert sensorTemp to Kelvin
sensorTempK <- dataSensorTemp[,2] + 273.15
#Weiss equation
A1 <- -173.4292; A2 <- 249.6339; A3 <- 143.3483; A4 <- -21.8492
DOSat <- exp(((A1 + (A2*100/sensorTempK) + A3*log(sensorTempK/100) + A4*(sensorTempK/100))))
#Correction for local average atmospheric pressure
u <- 10^(8.10765 - (1750.286/(235+dataSensorTemp[,2])))
DOSat <- (DOSat*((atmPres-u)/(760-u))) #ml/L
DOSat <- DOSat/1000 #L/L
#Convert using standard temperature and pressure.
#Similar to calculating saturation DO at STP in ml/L, converting to mg?L (at STP),
#and then doing the above temperature and pressure conversions.
R <- 0.082057 #L atm deg-1 mol-1
O2molWt <- 15.999*2
convFactor <- O2molWt*(1/R)*(1/273.15)*(760/760) #g/L
DOSat <- DOSat*convFactor*1000 #mg/L
##
#Calculate kO2 depending on what model is specified
if(k=="cole&caraco"){
wp <- 0.15 #exponent of wind profile power relationship, Smith 1985 Plant, Cell & Environment 8:387-398
wind10 <- (10/windHeight)^wp * dataWind[,2]
k600 <- 2.07 + 0.215*wind10^1.7 #k600 in cm hr-1 per Cole and Caraco 1998;
k600 <- k600*24/100 #k600 in m day-1
schmidt <- 1800.6 - 120.1*dataSensorTemp[,2] + 3.7818*dataSensorTemp[,2]^2 - 0.047608*dataSensorTemp[,2]^3
kO2 <- k600*(schmidt/600)^-0.5 #Jahne et al. 87. exp could also be -.67
kO2 <- kO2*(timeStep/1440) #change kO2 to units of m/(timeStep*min)
}else if(k=="read"){
#### need to work on this sometime -> look at "metabFUnc_Mesocosm_ReadKModel.R
}else if(is.numeric(k)){
k600=rep(k,nrow(data1))
schmidt <- 1800.6 - 120.1*dataSensorTemp[,2] + 3.7818*dataSensorTemp[,2]^2 - 0.047608*dataSensorTemp[,2]^3
kO2 <- k600*(schmidt/600)^-0.5 #Jahne et al. 87. exp could also be -.67
kO2 <- kO2*(timeStep/1440) #change kO2 to units of m/(timeStep*min)
}
########################################
#Fit model to find parameter estimates for each day
##
#Set up
#Organize input data
data2 <- data.frame(datetime=data1$datetime, DOObs=data1$DO, DOSat=DOSat, irr=data1$PAR, kO2=kO2, zMix=data1$zMix, fluxDummy=fluxDummy)
#Set up data.frame to store output of optimizations
nDays <- dim(sun)[1] - 1 #Not sure if this indexing works appropriately for all lakes
dateRange <- sun$day[c(1,nDays)]
outDays <- seq(dateRange[1],dateRange[2],"1 day")
optimOut <- data.frame(solarDay=outDays,nll=rep(NA,nDays), iotaEst=rep(NA,nDays),
rhoEst=rep(NA,nDays), DOInitEst=rep(NA,nDays), optimCode=rep(NA,nDays),
R2=rep(NA,nDays),iotaSd=rep(NA,nDays),rhoSd=rep(NA,nDays),GPPSd=rep(NA,nDays))
#Calculate appropriate starting guesses for iota and rho parameters
# This takes as reasonable guesses
# iota = 1 (mg L-1 d-1)/(mmol m-2 s-1)
# rho = 0.5 mg L-1 d-1
# And converts them to appropriate units given time step of model
iotaGuess <- 3*timeStep/1440
rhoGuess <- 0.5*timeStep/1440
#Remove some stuff
rm(DOSat, kO2, fluxDummy)
##
#Useful stuff for plotting
#Limits for y-axis for drivers
irrLims <- range(data2$irr,na.rm=T)
zMixLims <- c(max(data2$zMix,na.rm=T),0)
atmFluxLims <- c(-10*max(data2$kO2,na.rm=T)*min(data2$zMix,na.rm=T),10*max(data2$kO2,na.rm=T)*min(data2$zMix,na.rm=T))
#Set up pdf device
pdf(file=paste(outName,'daily fits.pdf'),width=11,height=8.5)
layout(rbind(matrix(c(1:14),nrow=2,byrow=F),matrix(c(15:28),nrow=2,byrow=F)),heights=c(1,1.5,1,1.5))
par(mar=c(1,2,0,0)+0.1)
#Save workspace
save(list=ls(),file=paste(outName,'.RData',sep=""))
##
#Run optimization for each day
for (i in 1:nDays){
#Print occasional progress report on i
if (i %in% seq(1,nDays,10)) {print(paste("Starting day",i))}
#Extract data between sunrise on day i and sunrise on day i+1
timeSlice <- c(sun$sunrise[i], sun$sunrise[i+1])
dataTemp <- data2[data2$datetime>=timeSlice[1] & data2$datetime<timeSlice[2],]
####### adding a check to force negative DO values to zero to avoid fitting issues
dataTemp$DOObs[dataTemp$DOObs<=0]=1e-4
#If we have less than 75% of the day, more than 20% of DOObs is missing, or if any NA in DOSat, irr, kO2, or zMix,
# return NA for optimization results and plot blank plots
nTot <- length(dataTemp$DOObs)
nNA <- length(which(is.na(dataTemp$DOObs)))
if((nTot<(24*60/timeStep*0.75)) | (nNA/nTot > 0.30) | any(is.na(dataTemp[,3:6]))){
optimOut[i,2:6] <- NA
frame(); frame()
next
}else{
#Otherwise, fit model and make plots
#For guess of initial DOHat, use first obs unless that is NA, in which case use min obs
if (is.na(dataTemp$DOObs[1])==F){
DOInit <- dataTemp$DOObs[1]
}else{
DOInit <- min(dataTemp$DOObs,na.rm=T)
}
#Find parameter values by minimizing nll
parGuess <- log(c(iotaGuess,rhoGuess,DOInit))
optimTemp <- optim(parGuess,metabLoss_v4,dataIn=dataTemp)
#Save min nll
optimOut[i,2] <- optimTemp$value
#Save parameter estimates
# Multiply by 1440/timeStep to get from units of timeStep^-1 to units of day^-1
optimOut[i,3:4] <- exp(optimTemp$par[1:2])*(1440/timeStep)
optimOut[i,5] <- exp(optimTemp$par[3])
#Save code indicating whether nlm completed successfully
optimOut[i,6] <- optimTemp$convergence
#Calculate atmFlux and DOHat given max likelihood parameter estimates
predix <- metabPredix_v4(optimTemp$par,dataTemp)
DOHat <- predix$DOHat
atmFlux <- predix$atmFlux
res <- predix$res
#Calculate SST, SSE, and R2, and save R2
SST <- sum(dataTemp$DOObs^2,na.rm=T)
SSE <- sum(res^2,na.rm=T)
R2 <- (SST-SSE)/SST
optimOut[i,"R2"] <- R2
#bootstrapping
if(tolower(bootstrap)=='yes'){
bootResult<-bootstrap.metab(parGuess = parGuess,dataTemp = dataTemp,n = 100, timeStep = timeStep)
optimOut[i,9] = sd(bootResult$rho)
optimOut[i,8] = sd(bootResult$iota)
optimOut[i,10] = sd(bootResult$GPP)
}
#Plot irradiance (orange points), zMix (dashed line), atmFlux (hollow black points)
#y-axis tick labels are for atmFlux; positive values are flux into lake and negative values are flux out of lake
par(mar=c(1,2,0,0)+0.1)
plot(dataTemp$irr~dataTemp$datetime, ylim=irrLims, axes=F, xlab="", ylab="", pch=18, col="dark orange")
axis.POSIXct(1,dataTemp$datetime,labels=F); box()
text(x=min(dataTemp$datetime),y=irrLims[2],labels=format(dataTemp$datetime[1],format="%d-%b"),adj=c(0,1))
par(new=T); plot(dataTemp$zMix~dataTemp$datetime, ylim=zMixLims, type="l", lty=2, axes=F, xlab="", ylab="")
par(new=T); plot(atmFlux~dataTemp$datetime, ylim=atmFluxLims, axes=F, xlab="", ylab=""); axis(2)
#Plot observed and predicted DO
yLims <- range(c(DOHat,dataTemp$DOObs),na.rm=T)
par(mar=c(2,2,0,0)+0.1)
plot(DOHat ~ dataTemp$datetime, ylim=yLims, type="l", axes=F, xlab="", ylab="")
axis.POSIXct(1,dataTemp$datetime,format="%H:%M")
axis(2)
box()
points(dataTemp$DOObs ~ dataTemp$datetime)
meanDOSat <- round(mean(dataTemp$DOSat,na.rm=T),1)
text(x=min(dataTemp$datetime),y=yLims[2],labels=paste('DOSat',meanDOSat),adj=c(0,1))
}
} #end loop over nDays
#Close pdf graphics device
dev.off()
#Dump optimOut to dirDump
write.table(optimOut,paste(outName,'optimOut.txt'))
#Calculate model-fitting version of GPP from iotaEst and total daily PAR
#First calculate total mmol photons m-2 for each time step
solarFlux <- data2$irr*timeStep*60 #mmol m-2 timeStep-1
#Convert iota units
iotaNewUnits <- optimOut$iotaEst/(60*60*24) #(mg L-1 s-1) / (mmol m-2 s-1)
#Aggregate solarFlux - sum by day
aggSolarFlux <- aggregate(solarFlux,by=list(solarDay=solarDaysVec),sum,na.rm=T)
aggSolarFlux$solarDay <- as.POSIXct(trunc.POSIXt(as.POSIXct(aggSolarFlux$solarDay,tz="America/Chicago"),"days"),tz="America/Chicago")
aggSolarFlux <- merge(aggSolarFlux,data.frame(solarDay=optimOut$solarDay,stringsAsFactors=FALSE),all.y=T)
totalSolarFlux <- aggSolarFlux[,2] #mmol m-2 d-1
#Calc GPP from fitted iota
GPPFit <- iotaNewUnits*totalSolarFlux #mg L-1 d-1
GPPFitOut <- data.frame(solarDay=optimOut$solarDay,GPPFit,stringsAsFactors=FALSE)
maxZmix<-aggregate(dataZMix[,2],by=list(strftime(strptime(dataZMix[,1],"%Y-%m-%d %H:%M:%S"),format="%Y-%m-%d")),FUN=max,na.rm=TRUE) #applies the 'mean()' function to Zmix data by the given date - i.e. averages all Zmix data for a given date and stores in aveZmix
colnames(maxZmix)<-c("solarDay","zMix") #make column headers standard for merging later on
colnames(GPPFitOut)<-c("solarDay","GPP") #make column headers standard
GPPFitOut[,1]<-as.character(GPPFitOut[,1])
GPPFitOut<-merge(GPPFitOut,maxZmix,by.x="solarDay",all=T) #merges the two data vectors of metabolism and Zmix by given date. i.e. matches up data based on date
write.table(GPPFitOut,paste(outName,"GPPFitOut.txt"))
optimOut=optimOut[as.character(optimOut$solarDay)%in%GPPFitOut$solarDay,]
GPPFitOut=GPPFitOut[GPPFitOut$solarDay%in%as.character(optimOut$solarDay),]
return(list(optimOut=optimOut,GPPFit=GPPFitOut))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.