#' analyzes data without outliers
#' @keywords internal
reEvaluateSites <- function(dfInp,tafNA,startdate,enddate,totT,
y0,ylog,e,e2,mods,vpredl, outlierDatesbySite,
outSites,cati,strtYrMo,endYrMo){
df <- dfInp
weeklyB <- df$weeklyB
startdateStr <- df$startdateStr
enddateStr <- df$enddateStr
use36 <- df$use36
siteAdd <- df$siteAdd
outlierDatesbySite <- c(outlierDatesbySite,df$outlierDatesbySite[[1]])
siteOutliers <- df$siteOutliers
comp <- df$comp
plotMulti <- df$plotMulti
sitePlot <- df$sitePlot
plotAll <- df$plotAll
writeMat <- df$writeMat
seas <- df$seas
r <- df$r
k <- df$k
if (is.null(sitePlot)){
plotB = FALSE
}else{plotB = TRUE}
rv <- integer(5); if(r != 0){rv[1:r] <- rep(1L,r)}
kv <- integer(5); if(k != 0){kv[1:k] <- rep(1L,k)}
kk <- k
obs <- comp
si <- 1
obsi <- 1
siObs<- length(obs)
oc <- 1
if(plotAll){
grDevices::dev.new(width = 6, height = 5.5, noRStudioGD = T, unit = "in")
graphics::par(mfrow = c(2,2),
oma = c(0,0,0,0) + 0.1,
mar = c(0,0,0,0) + 0.1)
}
for(s in 1:length(outSites)){
#need to test this part for multiple compounds
ss <- s # index for site plots, see bottom of loop
s <- match(outSites[s],cati)
si <- s
s <- s + (obsi-1)*length(cati)
sitem <- cbind(tafNA[[s]], rep(0, times= length(tafNA[[s]])),y0[[s]],ylog[[s]])
colnames(sitem) <- c("t","filler","Conc","log")
sitem <- data.frame(sitem)
if (length(outlierDatesbySite) != 0){
if(outlierDatesbySite[oc] == cati[si]){
for(i in (oc+1):length(outlierDatesbySite)){
ifInt <- suppressWarnings(as.integer(outlierDatesbySite[i]))
if (!is.na(ifInt)){
j <- as.integer(outlierDatesbySite[i])
index <- match(j, sitem$t)
if (!is.na(index)){
sitem[index,] <- NA
}
}else{break}
}
oc = i #outlier site counter #skips t
}
}
#deterministic univariate model for one site
p <- 1
sitem <- stats::na.omit(sitem)
tafNA[[s]] <- sitem$t
y0[[s]] <- sitem$Conc
ylog[[s]] <- sitem$log
y1 <- sitem[,4]
t <- sitem$t
cyclicTrend <- (I(cos(t*(2*pi/seas))^p) + I(sin(t*(2*pi/seas))^p))*kv[1] +
(I(cos(t*(2*pi/seas)*2)^p) + I(sin(t*(2*pi/seas)*2)^p))*kv[2] +
(I(cos(t*(2*pi/seas)*3)^p) + I(sin(t*(2*pi/seas)*3)^p))*kv[3] +
(I(cos(t*(2*pi/seas)*4)^p) + I(sin(t*(2*pi/seas)*4)^p))*kv[4] +
(I(cos(t*(2*pi/seas)*5)^p) + I(sin(t*(2*pi/seas)*5)^p))*kv[5] +
t*rv[1] + (t^2)*rv[2] + (t^3)*rv[3] + (t^4)*rv[4] + (t^5)*rv[5]
df <- data.frame(cbind(y1,cyclicTrend))
df <- data.frame(cbind(df,t))
mod <- definelm(y1,t,df,r,kk,seas)
er <- residuals(mod)
summary(mod)
mods[[s]] <- summary(mod)
tc <- data.frame(1:totT)
currResi <- data.frame(cbind(t,er))
colnames(currResi) <- c("t","error")
colnames(tc) <- c("t")
cR <- merge(tc,currResi,by = "t", all = TRUE) #merge(dfRes,currRes, by = "t", all = F)
e2[[s]] <- cR[,2]
#fill in missing t
inter1 <- unname(mod$coefficients[1])
se1 <- sqrt(deviance(mod)/df.residual(mod))
fi <- 0 #to loop back
maxfi<- totT - length(t)
fe <- 1:(maxfi) #fake end to keep loop going #ignore if t=48
t <- c(t,fe)
y1 <- c(y1,fe)
er <- c(er,fe)
kl <- 1
#store predicted value vector
options(warn = -1)
if(length(tafNA[[s]]) < totT){
new <- data.frame(1:totT)
vpred <- predict(mod,newdata = new)
}else{vpred <- predict(mod)}
options(warn = 0)
#fill in missing values
for(i in 1:(totT+maxfi-1)){
if(t[kl] != i-fi){ #if t is skipped
cy1 <- vpred[i-fi]
ry1 <- y1[kl:length(y1)]
e0 <- rnorm(1,0,se1)
y1 <- c(y1[1:kl-1],cy1+e0,ry1)
er <- c(er[1:kl-1],e0,er[kl:length(er)]) # change to stochastic #change to 0
t <- c(t[1:kl-1],i-fi,t[kl:length(t)])
fi <- fi + 1
}else{kl = kl + 1}
}
if(length(t)>totT){t<-t[1:totT]; y1 <- y1[1:totT];er<-er[1:totT]}
#store parameters
e[[s]] <- unname(er)
vpredl[[s]] <- vpred
#plot all sites if indicated by user
if(plotAll){
if(ss%%4 == 1 && ss!=1){
grDevices::dev.new(width = 6, height = 5.5, noRStudioGD = T, unit = "in")
graphics::par(mfrow = c(2,2))}
graphics::par(mar=c(4,4,2,2))
tc <- 1:totT
graphics::plot(sitem$t,sitem$log, ylab="Log concentration",main = paste(cati[si],obs[obsi]), xlab = "t (months)")
graphics::par(new=TRUE)
graphics::lines(x = tc, y = vpred, col =3)
}
si <- si + 1
}
##################
options(warn=-1)
dfRes <- NULL
dfResNA <- NULL
si <- 2
obsi <- 1
tc <- 1:totT
#loop to create dataframe of all residuals
dfRes <- cbind(tc,e[[1]])
dfResNA <- cbind(tc,e2[[1]]) #with NA values
colnames(dfRes) <- c("t",paste(cati[1],obs[1], sep=""))
colnames(dfResNA) <- c("t",paste(cati[1],obs[1], sep=""))
for( i in 2:(length(cati)*length(obs))){
if (i%%(length(cati)) == 1){obsi = obsi + 1; si = 1}
if(!is.na(e[[i]])){
currRes <- cbind(tc,e[[i]])
colnames(currRes) <- c("t",paste0(cati[si],obs[obsi]))
dfRes <- merge(dfRes,currRes, by = "t", all = F)
}
currRes2 <- cbind(tc,e2[[i]])
colnames(currRes2) <- c("t",paste0(cati[si],obs[obsi]))
dfResNA <- merge(dfResNA,currRes2, by = "t", all.x = TRUE,all.y = TRUE)
si <- si+1
}
#create covariance matrix
covxx <- data.frame(cov(dfRes[,-1]))
options(warn=0)
graphics::par(mar=c(1,1,1,1))
MVDw <- mvn(dfRes[,-1], subset = NULL, mvnTest = "mardia", covariance = TRUE, tol = 1e-25, alpha = 0.5, scale = FALSE, desc = TRUE, transform = "none", univariateTest = "SW", univariatePlot = "none", multivariatePlot = "none", multivariateOutlierMethod = "none", bc = FALSE, bcType = "rounded", showOutliers = FALSE,showNewData = FALSE)
univariateTest <- MVDw$univariateNormality
if(plotMulti){
grDevices::dev.new(width = 4, height = 5, noRStudioGD = TRUE)
#graphics::par(mfrow=c(1,2))
MVDw <- mvn(dfRes[,-1], subset = NULL, mvnTest = "mardia", covariance = TRUE,
tol = 1e-25, alpha = 0.5, scale = FALSE, desc = TRUE, transform = "none",
univariateTest = "SW", univariatePlot = "none", multivariatePlot = "qq",
multivariateOutlierMethod = "none", bc = FALSE, bcType = "rounded",
showOutliers = FALSE, showNewData = FALSE)
graphics::title(sub="Plot without outliers")
}
if(plotB && (cati %contain% sitePlot[[1]])){
tc <- 1:totT
for (g in 1:length(sitePlot[[1]])){
grDevices::dev.new(width = 6, height = 5.5, noRStudioGD = T, unit = "in")
graphics::par(mar=c(4,4,2,2))
i = match(sitePlot[[1]][g], cati)
if(!is.na(i)){
graphics::plot(tafNA[[i]],ylog[[i]], ylab="Log concentration without outliers",main = paste(cati[i],obs[1]),xlab = "t (months)")
graphics::par(new=TRUE)
graphics::lines(x = tc, y = vpredl[[i]], col = 3)
}else if(plotB){warning("Site in sitePlot was not in the vector of sites that were analyzed. Make sure the site ID in sitePlot is in the column siteAdd of the input data frame.")}
}
}
big_list <- list("mvn" = MVDw, "residualData" = dfRes, "residualDataNA" = dfResNA,"cov" = covxx, "pred" = vpredl, "newMod" = mods)
return(big_list)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.