knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)

This script pre-processes raw sapflux data and saves it as and Adenostoma_2016_mphillips_preprocess. The raw data is available as .csv's in the source package.

sapflux version 1.0.0 was used here.

library(sapflux)
setwd("../inst/extdata")
cat("\n")
read_files <- list(
  "22May2016_Adenostema_FLUX.dat",
  "CR1000_FLUX_06_29_16.dat",
  "CR1000_FLUX_1_11_16.csv",
  "CR1000_FLUX_1_21_16.csv",
  "CR1000_FLUX_1_25_16.csv",
  "CR1000_FLUX_1_29_16.csv",
  "CR1000_FLUX_2_05_16.csv",
  "CR1000_FLUX_2_12_16.csv",
  "CR1000_FLUX_2_25_16.csv",
  "CR1000_FLUX_3_16_16.csv",
  "CR1000_FLUX_3_4_16.csv",
  "CR1000_FLUX_4_12_16.dat"
  )
flux_data <- sapflux::ImportRawFlux(
  file = read_files,
  metadata.file = "8May2016_probe_metadata.csv",
  filetype = "campbell",
  datatype = "voltages",
  diam = "ellipse",
  diam.fill = TRUE
)
print(flux_data@log[1])
flux_data <- sapflux::BindRawFlux(flux = flux_data)
flux_data@log[2:3]
message("Ports used:")
print(flux_data@metadata[["port.tag"]])
cat("Dates installed:\n")
print(flux_data@metadata[["date.install"]])
cat("Dates removed:\n")
print(flux_data@metadata[["date.removed"]])

First check:

sapflux::plot(flux_data, fastplot = TRUE)

Run the auto outlier dropping, then check again:

flux_data <- sapflux::AutoDropOutliers(flux = flux_data, byIQR = FALSE)
flux_data@log[4:length(flux_data@log)]
sapflux::plot(flux_data, fastplot = TRUE)

Drop rainy days:

cutoff <- 5
rain <- read.csv("../inst/extdata/SDEF_rain_events.csv", 
                 stringsAsFactors = FALSE)
colnames(rain) <- c("time", "rain")
rain$rain <- rain$rain * 10 # Convert cm to mm
rain$time <- strptime(rain$time, format = "%m/%d/%Y")
plot(rain, ylab = "Precipitation (mm)")
abline(h = cutoff)
flux_data <- sapflux::DropRainyDays(flux = flux_data, met = rain, cutoff = cutoff)
flux_data@log[5]

Additional graphical check:

sapflux::plot(flux_data, fastplot = TRUE)

Hand edits:

na.prev <- table(is.na(flux_data@data))[["TRUE"]] # Count NA's
data <- slot(flux_data, "data")
time <- slot(flux_data, "time")
data[, 1] <- ifelse(data[, 1] < 0.2, NA, data[, 1])
data[, 2] <- ifelse(data[, 2] < 0.4, NA, data[, 2])
data[, 3] <- ifelse(data[, 3] < 0.6, NA, data[, 3])
#plot(time, data[, 4], main = "Col 4, before trim")
data[, 4] <- ifelse(data[, 4] < 0.4, NA, data[, 4])
#plot(time, data[, 4], main = "Col 4, after trim")
data[, 5] <- ifelse(data[, 5] < 0.55, NA, data[, 5])
data[, 6] <- ifelse(data[, 6] < 0.3, NA, data[, 6])
#message("Port 7 is a bad port - check graph for example!!")
#plot(time[2000:2500], data[2000:2500, 7], main = "Col 7 - ex. of bad data")
data[, 7] <- NA
data[, 8] <- ifelse(data[, 8] < 0.15, NA, data[, 8])
#plot(time, data[, 9], main = "Col 9 - before clip")
data[, 9] <- ifelse(data[, 9] < 0.16, NA, data[, 9])
#plot(time, data[, 9], main = "Col 9 - after first trim")
data[28500:nrow(data), 9] <- NA
#plot(time, data[, 9], main = "Col 9 - after second trim")
data[, 10] <- ifelse(data[, 10] < 0.18, NA, data[, 10])
data[, 10] <- ifelse(data[, 10] > 0.70, NA, data[, 10])
sub10 <- data[35000:nrow(data), 10]
sub10 <- ifelse(sub10 < 0.415, NA, sub10)
data[35000:nrow(data), 10] <- sub10
rm(sub10)
data[, 11] <- ifelse(data[, 11] < 0.2, NA, data[, 11])
data[, 12] <- ifelse(data[, 12] < 0.18, NA, data[, 12])
data[, 13] <- ifelse(data[, 13] < 0.28, NA, data[, 13])
data[, 14] <- ifelse(data[, 14] < 0.31, NA, data[, 14])
data[, 15] <- ifelse(data[, 15] > 0.20, NA, data[, 15])
data[, 15] <- ifelse(data[, 15] < 0.06, NA, data[, 15])
data[0:10000, 15] <- NA
data[35000:nrow(data), 15] <- NA
# Ports 16 and up are higher-quality data
data[, 16] <- ifelse(data[, 16] > 0.4, NA, data[, 16])
data[, 16] <- ifelse(data[, 16] < 0.16, NA, data[, 16])
data[, 17] <- ifelse(data[, 17] > 0.4, NA, data[, 17])
data[, 17] <- ifelse(data[, 17] < 0.134, NA, data[, 17])
data[, 18] <- ifelse(data[, 18] > 0.4, NA, data[, 18])
data[, 18] <- ifelse(data[, 18] < 0.15, NA, data[, 18])
data[0:10000, 18] <- NA
data[, 19] <- NA
data[0:20000, 20] <- NA
data[, 20] <- ifelse(data[, 20] < 0.095, NA, data[, 20])
#plot(time, data[, 21], main = "Col 21 - before trim")
data[0:20000, 21] <- NA
data[, 21] <- ifelse(data[, 21] < 0.1, NA, data[, 21])
#plot(time, data[, 21], main = "Col 21 - after trim")
data[, 22] <- ifelse(data[, 22] < 0.165, NA, data[, 22])
slot(flux_data, "data") <- data
na.post <- table(is.na(flux_data@data))[["TRUE"]] # Count NA's
na.diff <- na.post - na.prev
log.message <- paste("Manual editing of sapflux data removed",
                     na.diff, "bad data points.")
slot(flux_data, "log") <- c(flux_data@log, log.message)
rm(data)

Further plot checks:

sapflux::plot(flux_data, fastplot = TRUE)

Convert to flux density.

start.time <- Sys.time()
flux_data <- sapflux::GranierConversions(flux = flux_data)
end.time <- Sys.time()
time.taken <- end.time - start.time
cat(time.taken, "\n")

Graphical check:

sapflux::plot(flux_data, fastplot = TRUE)

Save the result:

Adenostoma_2016_mphillips_preprocessed <- flux_data
#use_data(Adenostoma_2016_mphillips_preprocessed)

End pre-processing sccript. The analysis is continued in Adenostoma_2016_mphillips_process.Rmd.



bmcnellis/SDEF.analysis documentation built on June 4, 2019, 10 a.m.