knitr::opts_chunk$set( collapse = TRUE, comment = "#>" )
# golem::document_and_reload() library(sear) library(dplyr) library(lubridate) library(clock) library(microbenchmark) library(purrr) library(plotly) library(readr) library(reticulate) library(stringr) library(tidyr)
HOCR data is stored in file with a mix of ASCII and binary format.
To use kaitai struct in c++, have to install the runtime library.
take a look at cpp11 instead of Rcpp as it seem a more up to date version.
To read the binary HOCR file we will pass by the kaitai struct parser trough python (or cpp as it may improve speed, have to test).
To code live in python use reticulate::repl_python()
, to get object from python or r envs use py$<name>
or r.<name>
, source_python()
to source function in python file.
BinFile <- system.file("data-raw", "reduced", "DATA_20220705_105732.bin", package = "sear", mustWork = T) reticulate::source_python(system.file("py", "hocr.py", package = "sear", mustWork = T)) hocr_raw <- purrr::map(BinFile, Hocr$from_file) hocr_raw <- unlist(purrr::map(hocr_raw, ~ .x$packets)) # Unefficent way of removing custom class python object dependencies resulting in null pointer # How to avoid: Error in py_ref_to_r(x) : Embedded NUL in string ? (packet 27281 of AlgaeValidation (2022/07/05)) hocr_raw <- purrr::imap(hocr_raw, ~ tryCatch( list( "channel" = .$channel, "checksum" = .$checksum, "darkaverage" = .$darkaverage, "darksample" = .$darksample, "frame" = .$frame, "gpstime" = .$gpstime, "instrument" = as.character(.$instrument, errors = "ignore"), "inttime" = .$inttime, "loggerport" = .$loggerport, "mysterydate" = .$mysterydate, "sampledelay" = .$sampledelay, "sn" = as.character(.$sn, errors = "ignore"), "spectemp" = as.character(.$spectemp, errors = "ignore"), "timer" = as.character(.$timer, errors = "ignore") ), error = function(...) NA )) if (any(is.na(hocr_raw))) { hocr_raw <- hocr_raw[-which(is.na(hocr_raw))] } # check for invalid packet ValidInd <- purrr::map_lgl(hocr_raw, ~ str_detect(as.character(.x$instrument, errors = "ignore"), "SAT(HPL|HSE|HED|PLD)")) if (any(!ValidInd)) { message("Invalid HOCR packets detected and removed: ", length(which(!ValidInd))) hocr_raw <- hocr_raw[ValidInd] } else { hocr_raw <- hocr_raw }
Some file appear to be corrupted: DATA_20211111_160607.bin, DATA_20211111_171729.bin
First noticed appeared with python UnicodeDecodeError: 'utf-8' codec can't decode byte 0xb2 in position 3: invalid start byte
, when trying to decode ASCII string instrument, sn, timer, spectrometer_temperature
. The problem doesnt come from ASCII/UTF-8 difference (even so I should use ASCII to decode HOCR packet, Datalogger packet is of unkown encoding).
This look like some bytes are just missing in the DataLogger memory, just like for the GPGGA NMEA string inside the .txt file ... So the DataLogger is likely the source of the problem ... Not the HOCR(s) nor the Applanix.
If this is confirmed might be a major problem for operational deployment. Also little data appear to be contaminated by this bug.
The SatView logging format differ by the date and time field that are written before every HOCR packet.
The HOCR data is quite large and mainly compose of bad data, acquired in transit, that we don“t want to loose time to process. Therefore, only the selected point need to enter the processing chain. For that we use filters on the hocr_raw
list of HOCR packets.
Ideally for this time filter, we should use the datalogger binary packet added before every HOCR packets, for now we only have determine the time format (in ms), the date format remain unknown. The quick fix is to use date from the Applanix.
Creating the time index for filtering take some time, so it is created on data uploading. The filtering is made as early as possible on the list object of HOCR packet. This time index list still take time to create, ~ 3 min for 44826 packets (22.2 MB) and result in a list of 25.8 MB. It could be a great improvement to optimize this step.
# end at 49:25 # ymd_hms("2021-11-11 18:49:16"), ymd_hms("2021-11-11 18:50:25") Station1PaP <- interval(ymd_hms("2022-07-05 14:15:35"), ymd_hms("2022-07-05 14:15:43")) Apladate <- "2022-07-05"
# # Small test data with invalid packet: DATA_20211111_160607.bin # # BenchConFile <- system.file("extdata", "DATA_20211111_160607.bin", package = "sear", mustWork = T) # # # Python function # Benchhocr_raw <- Hocr$from_file(BenchConFile) # # Benchhocr_raw <- unlist(Benchhocr_raw$packets, recursive = T) # # # Filter invalid HOCR packets # # BenchValidInd <- purrr::map_lgl(Benchhocr_raw, ~ str_detect(as.character(.x$instrument, errors="ignore"), "SAT(HPL|HSE|HED|PLD)")) # # if (any(!BenchValidInd)){ # warning("Invalid HOCR packets detected and removed: ", length(which(!BenchValidInd))) # # Benchhocr_raw <- Benchhocr_raw[BenchValidInd] # } else { # Benchhocr_raw <- Benchhocr_raw # } # # # pur_lub <- function(hocr_raw, Apladate){ # purrr::map(.x = hocr_raw, ~ lubridate::ymd_hms(paste0(Apladate,"T",hms::as_hms(.x$gpstime/1000)))) # } pur_clock <- function(hocr_raw, Apladate) { purrr::map(.x = hocr_raw, ~ clock::date_time_parse(paste0(Apladate, " ", hms::as_hms(.x$gpstime / 1000)), zone = "UTC")) } # # pur_base <- function(hocr_raw, Apladate){ # purrr::map(.x = hocr_raw, ~ base::strptime(paste0(Apladate,"T",hms::as_hms(.x$gpstime/1000)), format = "%Y-%m-%dT%height_watercolumn:%M:%S", tz = "UTC")) # } # # pur_lub2 <- function(hocr_raw, Apladate){ # purrr::map(.x = hocr_raw, ~ lubridate::fast_strptime(paste0(Apladate,"T",hms::as_hms(.x$gpstime/1000)), format = "%Y-%m-%dT%height_watercolumn:%M:%S", tz = "UTC")) # } # # microbenchmark(times = 10, # pur_lub(Benchhocr_raw, Apladate), # pur_clock(Benchhocr_raw, Apladate), # pur_base(Benchhocr_raw, Apladate), # pur_lub2(Benchhocr_raw, Apladate) # ) # gc()
For now it's the base::strptime
function which is the fastest. But strangely return a series of nested list ...
So using clock::date_time_parse
.
The resulting list is larger than the original HOCR object list, quite strange ... likely come from the size of the POISIXct object class.
HOCRtimeIndex <- pur_clock(hocr_raw, Apladate) # test <- unlist(HOCRtimeIndex, recursive = T)
To select the raw HOCR data corresponding to an observation maybe use
greater and lower than number of second, maybe faster than %within%
time.
filter_hocr_time <- function(hocr_raw, HOCRtimeIndex, timeInt) { Ind <- purrr::map_lgl(.x = HOCRtimeIndex, ~ .x %within% timeInt) hocr_raw[Ind] } filter_hocr_numeric <- function(hocr_raw, HOCRtimeIndex, timeInt) { Ind <- purrr::map_lgl( .x = as.numeric(HOCRtimeIndex), ~ .x >= as.numeric(int_start(timeInt)) & .x <= as.numeric(int_end(timeInt)) ) hocr_raw[Ind] } microbenchmark( times = 10, filter_hocr_time(hocr_raw, HOCRtimeIndex, timeInt = Station1PaP), filter_hocr_numeric(hocr_raw, HOCRtimeIndex, timeInt = Station1PaP) ) Filthocr_raw <- filter_hocr_numeric(hocr_raw, HOCRtimeIndex, timeInt = Station1PaP)
The acquisition rate of the HOCR is dependent on the light level to optimized the S/N. In water the integration time is about 0.512 second, in air it is about 0.063 second. Considering that dark frame are acquired with the same integration time and that the Shutter frame output
parameter seems to be set to 5. This can diminish the number of light frame acquired in a second to 2 or apparently even less. When less than one is available the interpolation fail. 4 kts (speed defined as optimal for multibeam acquisition) is equal to 2 m/s meaning that we can end up with not enought points to discretize on a 2 meter distance.
Should think about the optimal HOCR parameters and/or acquisition speed.
Have to read the central wavelength from .cal file for each instruments and associate it with the correct data frame (row in wide tidy format). It appear that passing a (unique or multiple even numbers) list to tibble make evry atomic element be repeated over the length of the list. Easy way to have a long data format to add wavelength {Intrument,sn} wise (sn would be enougth has it uniquely identify an instrument). For calibration coefficient however, it is necessary to use the key {instrument,sn} has dark and light calibration file for the same instrument does not yield the same values and therefore represents unique frame types: SATHSE1397, SATHED1397, SATHPL1415, SATHPL1416, SATPLD1415, SATPLD1416.
Take advantage of the purrr family to apply calibration function on data associated in a tibble nested per {instrument, sn}. Compact efficient way of applying the same function on a set of data frame.
To develop and illustrate the HOCR processing, we focus on a subset of raw_data
, selected with the help of sear_view()
as the first station in PaP.
CalFiles <- list.files(system.file("cal", "hocr", package = "sear", mustWork = T), full.names = T) read_hocr_cal <- function(CalFiles) { # CalFiles is a list of calibration files # Fit type specific coefficients # OPTIC3 fit type: a0, a1, im, cint # THERM1 fit type: m0 m1 m2 m3 Tr # POLYU fit type: a0 a1 # Create one tidy data frame per fit type CalFile <- read_lines(CalFiles, skip_empty_rows = T) CalFile <- CalFile[!str_detect(CalFile, "#")] # Cal file have tow types of delimiter ... space for definition line and tab for calibration coefficient # Working code over there ! CalFile <- str_replace_all(CalFile, "\\t", " ") # Get index of definition file indi <- purrr::imap_dbl(CalFile, ~ ifelse(str_detect(.x, "1 (OPTIC3|THERM1|POLYU)"), .y, NA)) # Calibration data is on the next line cal_data <- CalFile[indi + 1] # Put definition and calibration on the same line CalFile <- tibble(Def = CalFile, Cal = cal_data) # Dirty Stuff to remove calibration lines: offset index by one to match calibration lines and logically remove them CalFile <- CalFile %>% filter(is.na(!append(indi[-length(indi)], NA, 0))) CalFile <- CalFile %>% separate( col = Def, into = c("type", "id", "units", "field_length", "data_type", "cal_lines", "fit_type"), sep = " ", remove = T, convert = T ) CalID <- CalFile %>% filter(type %in% c("INSTRUMENT", "sn")) %>% select(type, id) %>% pivot_wider( names_from = type, values_from = id ) %>% rename(instrument = INSTRUMENT) OPTIC3 <- CalFile %>% filter(type == "ES" | type == "LU") %>% separate( col = Cal, into = c("a0", "a1", "im", "cint"), sep = " ", remove = T, convert = T ) %>% bind_cols(CalID) %>% rename(wavelength = "id") %>% mutate(wavelength = as.numeric(wavelength)) THERM1 <- CalFile %>% filter(fit_type == "THERM1") %>% separate( col = Cal, into = c("m0", "m1", "m2", "m3", "Tr"), sep = " ", remove = T, convert = T ) %>% bind_cols(CalID) # POLYU span from a0 to an, should write code to take that into account # Sep in cal file appear to be two spaces ... cloud manage all those with "[[:blank:]]" POLYU <- CalFile %>% filter(fit_type == "POLYU") %>% separate( col = Cal, into = c("a0", "a1"), sep = "[[:blank:]]{2}", remove = T, convert = T ) %>% bind_cols(CalID) # tibble(fit_type = c("OPTIC3","THERM1","POLYU"), cal_data = list(OPTIC3 = OPTIC3, THERM1 = THERM1, POLYU = POLYU)) return(list(OPTIC3 = OPTIC3, THERM1 = THERM1, POLYU = POLYU)) } # test <- test %>% # mutate( # fit_type = case_when( # str_detect(text, "POLYU") ~ "POLYU" # )) CalList <- purrr::map(CalFiles, read_hocr_cal) FlatCal <- unlist(CalList, recursive = F) OPTIC3 <- bind_rows(FlatCal[names(FlatCal) == "OPTIC3"]) THERM1 <- bind_rows(FlatCal[names(FlatCal) == "THERM1"]) POLYU <- bind_rows(FlatCal[names(FlatCal) == "POLYU"])
tidy_hocr <- function(Packets) { tibble::tibble( # Applanix time added by the DataLogger in millisecond # Unkown date format in the binary file, so take the one in the txt file GPStime = as.POSIXct(paste0(Apladate, hms::as_hms(Packets$gpstime / 1000)), format = "%Y-%m-%d %H:%M:%OS", tz = "UTC"), # HOCR Packets # Fix missing byte bug by ignoring decoding error instrument = as.character(Packets$instrument, errors = "ignore"), sn = as.character(Packets$sn, errors = "ignore"), integration_time = Packets$inttime, sample_delay = Packets$sampledelay, dark_sample = Packets$darksample, dark_average = Packets$darkaverage, spectrometer_temperature = as.character(Packets$spectemp, errors = "ignore"), frame = Packets$frame, timer = as.character(Packets$timer, errors = "ignore"), # How many seconds since initialization sequence checksum = Packets$checksum, channel = Packets$channel ) } raw_data <- purrr::map_df(Filthocr_raw, tidy_hocr)
Add wavelength and calibration data to raw_data from the calibration file, join by {instrument, sn, subquerry(rownumber)}. This assume that each packet have 180 channel
sorted from short to long wavelength.
# taken from: https://gist.github.com/mdlincoln/528a53939538b07ade86 row_rep <- function(df, n) { df[rep(1:nrow(df), times = n), ] } glob_cal <- raw_data %>% group_by(instrument, sn) %>% nest(raw_data = !matches("instrument|sn")) glob_cal <- glob_cal %>% filter(instrument %in% c("SATHPL", "SATHSE", "SATHED", "SATPLD")) OPTIC3 <- OPTIC3 %>% group_by(instrument, sn) %>% nest(OPTIC3 = !matches("instrument|sn")) THERM1 <- THERM1 %>% group_by(instrument, sn) %>% nest(THERM1 = !matches("instrument|sn")) # Two sensor have POLYU fit type, better to match by sensor, should make the rest consistent INTTIME <- POLYU %>% filter(type == "INTTIME") %>% group_by(instrument, sn) %>% nest(INTTIME = !matches("instrument|sn")) SAMPLE <- POLYU %>% filter(type == "SAMPLE") %>% group_by(instrument, sn) %>% nest(SAMPLE = !matches("instrument|sn")) glob_cal <- left_join(glob_cal, OPTIC3, by = c("instrument", "sn")) glob_cal <- left_join(glob_cal, THERM1, by = c("instrument", "sn")) glob_cal <- left_join(glob_cal, INTTIME, by = c("instrument", "sn")) glob_cal <- left_join(glob_cal, SAMPLE, by = c("instrument", "sn")) glob_cal <- glob_cal %>% mutate(raw_data = purrr::map2(raw_data, OPTIC3, ~ bind_cols(.x, row_rep(.y, nrow(.x) / nrow(.y)))))
This data can be use to asses and to correct the photodiode array thermal responsivity. Need to contact the manufacturer to learn about this processing.
To compute $aint$ from the INTTIME sensor (POLYU fit type):
$y = a_0x^0+a_1x^1$
Convert INTTIME to seconds.
cal_inttime <- function(raw_data, INTTIME) { a0 <- INTTIME$a0 a1 <- INTTIME$a1 modify_in(raw_data, .where = "integration_time", ~ (a0 * .x^0) + (a1 * .x^1)) } glob_cal <- glob_cal %>% mutate(cal_data = purrr::map2(.x = raw_data, .y = INTTIME, .f = ~ cal_inttime(.x, .y)))
The raw file can be processed with calibration files (light and dark) of the instruments.
From those specification we can read the binary in .raw file and from section 11 "Data Processing Equations" (Prosoft manual v7.7) we can compute the calibrated radiometric quantities.
To convert numeric counts to calibrated physical units according to the equation provided in Satlantic instrument File Format.
If the sensor is in water: $y = im . a_1.(x-a_0).(cint/aint)$
If the sensor is in air: $y = 1.a_1.(x-a_0).(cint/aint)$
Where $y$: Calibrated physical unit $x$: Numerical count recorded by the instruments $cint$: integration time used during calibration $aint$: adaptive integration time used during sampling, this coefficient is determined from the INTTIME sensor
cal_optic3 <- function(.x, instrument) { if (str_detect(instrument, "HSE|HED")) { # In air mutate(.data = .x, channel = 1.0 * a1 * (channel - a0) * (cint / integration_time)) } else if (str_detect(instrument, "HPL|PLD")) { # In water mutate(.data = .x, channel = im * a1 * (channel - a0) * (cint / integration_time)) } else { warning(paste0("instrument name not valid: ", instrument)) } } glob_cal <- glob_cal %>% mutate(cal_data = purrr::map2(.x = cal_data, .y = instrument, ~ cal_optic3(.x, .y)))
At this stage data is in L1b processing level, meaning that each instrument have it's own time and wavelength co-ordinates. Before we can go further in the display and processing (L1b -> L2) we need to map the data to common vector of time and wavelength co-ordinates.
As the OPTIC3 calibration data is no longer needed we can remove it. From there I also think we can remove all metadata as it will not be used further down. This could be reconsidered in the future, for now it simplify the interpolation operation. integration_time
is used to calculate interpolation time co-ordinate, have to keep it.
hocr_long <- glob_cal %>% # OPTIC3 mutate(cal_data = purrr::map( cal_data, ~ select(.x, !all_of(c("units", "field_length", "data_type", "cal_lines", "fit_type", "a0", "a1", "im", "cint"))) )) %>% # Packet metadata mutate(cal_data = purrr::map( cal_data, ~ select(.x, !all_of(c("sample_delay", "dark_sample", "dark_average", "spectrometer_temperature", "frame", "timer", "checksum"))) )) %>% select(instrument, sn, cal_data) %>% filter(str_detect(instrument, "HSE|HPL"))
During L1b to L2 conversion, data is interpolated onto a master time coordinate calculated from the $\text{e}\text{s}$ (HSE) sensor. The min and max time value of the $\text{e}\text{s}$ sensor serve as boundary between which the time increased by a fixed step value. The optical data is then linearly interpolated onto this common coordinates vector.
In ProSoft, the data is interpolated on the highest acquisition rate instrument (HSE). This imply that data with lower acquisition rate (HED in water sensors) is "replicated" by the interpolation and this seem to me an unecessary computation that will not change the averaged or median value retained after discretization. POINT TO DISCUSS. For now, the sear app choose to interpolate the data following this logic: 1. Select the instrument with the shortest number of observation. 2. Select the shortest integration time for this data frame. 3. Use this integration time as time step in the time interval.
This seems a good trade off between keeping as much as relevant information, creating non-observation data from interpolation and computation efficiency.
We need to pass from the long to the wide format with the pivot_wider()
function. We also remove empty channel data with the where()
selection helper.
hocr_wide <- hocr_long %>% mutate(cal_data = purrr::map( cal_data, ~ pivot_wider( ., names_from = all_of(c("type", "wavelength")), names_sep = "_", values_from = channel ) )) %>% mutate(cal_data = purrr::map(cal_data, ~ select(., where(function(x) all(!is.na(x)))))) ShortNobs <- hocr_wide %>% mutate(Nobs = purrr::map_dbl(cal_data, ~ length(rownames(.)))) ShortNobs <- ShortNobs %>% filter(Nobs == min(ShortNobs$Nobs)) %>% unnest(cols = c(cal_data)) Mintime <- min(ShortNobs$GPStime) # format(Mintime, "%Y-%m-%d %H:%M:%OS3") Maxtime <- max(ShortNobs$GPStime) # format(Maxtime, "%Y-%m-%d %H:%M:%OS3") timeSeq <- seq.POSIXt(Mintime, Maxtime, by = min(ShortNobs$integration_time)) # format(timeSeq, "%Y-%m-%d %H:%M:%OS3")
Once We obtain the timeSeq
we go further by interpolating the instruments data on this common time co-ordinates. We can now remove the integration_time
variable. This workflow does not seem optimal ... Could rethink it.
hocr_wide <- hocr_wide %>% mutate(cal_data = purrr::map(cal_data, ~ select(., !integration_time))) approx_tbl <- function(., timeSeq) { tbl <- tibble(date_time = timeSeq) for (i in seq_along(colnames(.))[-1]) { coord <- approx(x = .[[1]], y = .[[i]], xout = timeSeq, method = "linear") tbl <- bind_cols(tbl, x = coord[[2]]) colnames(tbl)[i] <- colnames(.)[i] } tbl %>% mutate(id = seq_along(timeSeq), .before = date_time) } hocr_wide <- hocr_wide %>% mutate(AproxData = purrr::map(cal_data, ~ approx_tbl(., timeSeq)))
Hyperspectral data (OPTIC3) is calibrated by prosoft with the shutter dark acquired typically (depending on user request) every five light samples. The first test station that was processed last 7 seconds during which no dark data was acquired. THIS POINT NEED DISCUSSION. a solution that may be acceptable in case no shutter dark is present in the data selected by the user, is to find the closest dark measured before and after the selected points.
The steps for shutter dark correction are defined in prosoft manual as the following:
Correct shutter dark by dark offset, computed as the difference between shutter dark and capped dark: $L_{CountsDarkDat} = L_{CountsDarkDat} - L_{CountsDarkOffset}$ First question, do we usually compute the dark offset on the field ? After discussion with Simon, the dark offset for HOCR is disregarded
Convert numerical count to engineering units, same OPTIC3 fit type as for light data.
Deglitch dark data with a first difference filter (optional)
Correct light data using shutter dark data: $L = L_{light}-L_{dark}$
The shutter dark data is identified by HED for the in air and by PLD for the in water sensors.
Darkhocr_raw <- hocr_raw[purrr::map_lgl(hocr_raw, ~ str_detect(.$instrument, "HED|PLD"))]
Darkraw_data <- purrr::map_df(Darkhocr_raw, tidy_hocr) Darkglob_cal <- Darkraw_data %>% group_by(instrument, sn) %>% nest(raw_data = !matches("instrument|sn")) Darkglob_cal <- Darkglob_cal %>% filter(instrument %in% c("SATHPL", "SATHSE", "SATHED", "SATPLD")) Darkglob_cal <- left_join(Darkglob_cal, OPTIC3, by = c("instrument", "sn")) Darkglob_cal <- left_join(Darkglob_cal, THERM1, by = c("instrument", "sn")) Darkglob_cal <- left_join(Darkglob_cal, INTTIME, by = c("instrument", "sn")) Darkglob_cal <- left_join(Darkglob_cal, SAMPLE, by = c("instrument", "sn")) Darkglob_cal <- Darkglob_cal %>% mutate(raw_data = purrr::map2(raw_data, OPTIC3, ~ bind_cols(.x, row_rep(.y, nrow(.x) / nrow(.y))))) Darkglob_cal <- Darkglob_cal %>% mutate(cal_data = purrr::map2(.x = raw_data, .y = INTTIME, .f = ~ cal_inttime(.x, .y))) Darkglob_cal <- Darkglob_cal %>% mutate(cal_data = purrr::map2(.x = cal_data, .y = instrument, ~ cal_optic3(.x, .y)))
The dark value are interpolated every minute along time.
Darkhocr_long <- Darkglob_cal %>% # OPTIC3 mutate(cal_data = purrr::map( cal_data, ~ select(.x, !all_of(c("units", "field_length", "data_type", "cal_lines", "fit_type", "a0", "a1", "im", "cint"))) )) %>% # Packet metadata mutate(cal_data = purrr::map( cal_data, ~ select(.x, !all_of(c("sample_delay", "dark_sample", "dark_average", "spectrometer_temperature", "frame", "timer", "checksum"))) )) %>% select(instrument, sn, cal_data)
Darkhocr_wide <- Darkhocr_long %>% mutate(cal_data = purrr::map( cal_data, ~ pivot_wider( ., names_from = all_of(c("type", "wavelength")), names_sep = "_", values_from = channel ) )) %>% mutate(cal_data = purrr::map(cal_data, ~ select(., where(function(x) all(!is.na(x)))))) ShortNobs <- Darkhocr_wide %>% mutate(Nobs = purrr::map_dbl(cal_data, ~ length(rownames(.)))) ShortNobs <- ShortNobs %>% filter(Nobs == min(ShortNobs$Nobs)) %>% unnest(cols = c(cal_data)) Mintime <- min(ShortNobs$GPStime) # format(Mintime, "%Y-%m-%d %H:%M:%OS3") Maxtime <- max(ShortNobs$GPStime) # format(Maxtime, "%Y-%m-%d %H:%M:%OS3") timeSeq <- seq.POSIXt(Mintime, Maxtime, by = 60) # one dark value per minute # format(timeSeq, "%Y-%m-%d %H:%M:%OS3")
Darkhocr_wide <- Darkhocr_wide %>% mutate(cal_data = purrr::map(cal_data, ~ select(., !integration_time))) approx_tbl <- function(., timeSeq) { tbl <- tibble(date_time = timeSeq) for (i in seq_along(colnames(.))[-1]) { coord <- approx(x = .[[1]], y = .[[i]], xout = timeSeq, method = "linear") tbl <- bind_cols(tbl, x = coord[[2]]) colnames(tbl)[i] <- colnames(.)[i] } tbl %>% mutate(id = seq_along(timeSeq), .before = date_time) } Darkhocr_wide <- Darkhocr_wide %>% mutate(AproxData = purrr::map(cal_data, ~ approx_tbl(., timeSeq)))
Find the center time of obs interval and select nearest dark values (one dark per minute).
Obstime <- int_end(Station1PaP / 2) Darkhocr_wide <- Darkhocr_wide %>% mutate(DarkAproxData = purrr::map(AproxData, ~ .x[which.min(abs(.x$date_time - Obstime)), ])) %>% ungroup() %>% select(sn, DarkAproxData)
hocr_wide <- left_join(hocr_wide, Darkhocr_wide, by = c("sn")) cor_dark <- function(.x, .y) { id <- .x[, 1] date_time <- .x[, 2] Data <- .x[, -1:-2] - row_rep(.y[, -1:-2], nrow(.x[, -1:-2])) bind_cols(id, date_time, Data) } hocr_wide <- hocr_wide %>% mutate(AproxData = purrr::map2(AproxData, DarkAproxData, cor_dark))
Now that we have the data interpolated to common time co-ordinate and corrected for dark offset we can transform it back to long format. For convenience and as we can trace back the physical variable (ES or LU) back to the instrument sn
, we use the term channel
(as in HOCR packets) to name the variables
hocr_long <- hocr_wide %>% mutate(AproxData = purrr::map( AproxData, ~ pivot_longer( ., cols = matches("[[:alpha:]]{2}_[[:digit:]]{3}(.[[:digit:]]{1,2})?"), values_to = "channel", names_to = c("wavelength"), names_prefix = "[[:alpha:]]{2}_", names_transform = list(wavelength = as.numeric) ) %>% group_by(id) ))
This data table is used to display the quality check. At beginning of L1b all data is considered good qc = 1
, the user can then click on a spectrum to mark it as bad (0), click again to change it back to good. Here, we flag id 91 as bad.
QCData <- hocr_long$AproxData[[1]] %>% select(id, date_time) %>% unique() %>% mutate(qc = "1") %>% mutate(qc = ifelse(id %in% c(6, 10), "0", "1"))
To harmonize the visual identity of the app we need to define global display parameter for plotly graph.
PlyFont <- list(family = "times New Roman", size = 18) BlackSquare <- list(type = "rect", fillcolor = "transparent", line = list(width = 0.5), xref = "paper", yref = "paper", x0 = 0, x1 = 1, y0 = 0, y1 = 1) L1bDatalong <- hocr_long %>% select(instrument, sn, AproxData) %>% mutate(AproxData = purrr::map(AproxData, ~ left_join(., QCData, by = c("id", "date_time")))) ply <- L1bDatalong %>% # filter(str_detect(instrument, "HPL")) %>% mutate(Plot = purrr::map2(.x = AproxData, .y = sn, ~ plot_ly(.x, text = ~id) %>% add_lines(x = ~wavelength, y = ~channel, name = .y, showlegend = F, color = ~qc, colors = c("1" = "seagreen", "0" = "red")) %>% add_annotations( text = ~.y, x = 0.5, y = 1, yref = "paper", xref = "paper", xanchor = "middle", yanchor = "top", showarrow = FALSE, font = list(size = 15) ) %>% layout( shapes = BlackSquare, yaxis = list( rangemode = "nonnegative" # title = list(text = ~paste0(unique(.x$type), unique(.x$units))) ), xaxis = list(rangemode = "nonnegative") ))) Lu <- ply %>% filter(str_detect(instrument, "HPL")) %>% subplot(shareX = T, shareY = T) Ed <- ply %>% filter(str_detect(instrument, "HSE")) Ed <- Ed$Plot subplot(Ed[[1]], Lu, nrows = 2, margin = 0.035) %>% add_annotations( text = ~ TeX("\\text{wavelength [nm]}"), x = 0.5, y = -0.1, yref = "paper", xref = "paper", xanchor = "middle", yanchor = "bottom", showarrow = FALSE, font = list(size = 18) ) %>% layout( font = PlyFont, yaxis = list(title = list(text = TeX("\\text{e}_\\text{d}"))), yaxis2 = list(title = list(text = TeX("\\text{L}_\\text{u}"))) # , # xaxis3 = list(title = list(text = TeX("\\text{wavelength}"))) ) %>% config(mathjax = "local", displayModeBar = T)
L1bDataWide <- hocr_wide %>% select(instrument, sn, AproxData) %>% mutate(AproxData = purrr::map(AproxData, ~ left_join(., QCData, by = c("date_time", "id")))) L1bDataWide <- L1bDataWide %>% mutate(AproxData = purrr::map(AproxData, ~ filter(., qc == "1"))) L1bDataWide <- L1bDataWide %>% mutate(AproxData = purrr::map(AproxData, ~ summarise(.x, across(.cols = !matches("id|qc"), ~ mean(.x, na.rm = T)))))
ProSoft offer the possibility to interpolate the hyperspectral instruments wavelengths to a common user defined wavelength co-ordinates. Alternatively, if the common interpolation is not performed, wavelength tolerance difference is set at 0.2 nm in L4 calculation. The sear app goes with the first option and require interpolation to common wavelength co-ordinates. a sensible default is computed, which the user can further modify if desired. The wavelength co-ordinates choosed is saved for the project.
L1bAveragelong <- L1bDataWide %>% mutate(AproxData = purrr::map( AproxData, ~ pivot_longer( ., cols = matches("[[:alpha:]]{2}_[[:digit:]]{3}(.[[:digit:]]{1,2})?"), values_to = "channel", names_to = c("type", "wavelength"), names_sep = "_", # names_prefix = "[[:alpha:]]{2}_", names_transform = list(wavelength = as.numeric) ) )) wave_seq <- seq(353, 800, 3) # Function That takes as input a long spectral dataset with columns: # c("date_time", "type", "wavelength", "channel") approx_wave <- function(., wave_seq) { tbl <- tibble( date_time = unique(.$date_time), type = unique(.$type), wavelength = wave_seq ) for (i in seq_along(colnames(.))[-1:-3]) { coord <- approx(x = .[[3]], y = .[[i]], xout = wave_seq, method = "linear") tbl <- bind_cols(tbl, x = coord[[2]]) colnames(tbl)[i] <- colnames(.)[i] } tbl # tbl %>% mutate(id = seq_along(timeSeq)) } l1b_approx_long <- L1bAveragelong %>% mutate(IntData = purrr::map(AproxData, ~ approx_wave(., wave_seq)))
L1bAproxWide <- l1b_approx_long %>% mutate(IntData = purrr::map( IntData, ~ pivot_wider( ., names_from = all_of(c("type", "wavelength")), names_sep = "_", values_from = channel ) )) %>% ungroup() Es <- L1bAproxWide %>% select(!AproxData) %>% filter(sn == "1397") %>% unnest(cols = c(IntData)) %>% select(!matches("instrument|sn|date_time")) LuZ1 <- L1bAproxWide %>% select(!AproxData) %>% filter(sn == "1416") %>% unnest(cols = c(IntData)) %>% select(!matches("instrument|sn|date_time")) LuZ2 <- L1bAproxWide %>% select(!AproxData) %>% filter(sn == "1415") %>% unnest(cols = c(IntData)) %>% select(!matches("instrument|sn|date_time"))
DeltaDepth <- 0.30 # 30 cm KLuWide <- (log(LuZ1) - log(LuZ2)) / DeltaDepth KLuWide <- rename_with(KLuWide, ~ str_replace(.x, "LU", "KLu"))
KLulong <- KLuWide %>% pivot_longer( ., cols = matches("[[:alpha:]]{2}_[[:digit:]]{3}(.[[:digit:]]{1,2})?"), values_to = "KLu", names_to = c("wavelength"), # names_sep = "_", names_prefix = "[[:alpha:]]{3}_", names_transform = list(wavelength = as.numeric) ) KLulong %>% plot_ly() %>% add_lines(x = ~wavelength, y = ~KLu, showlegend = F)
# Uncorrected Rrs uRrsWide <- 0.54 * LuZ1 / Es
uRrslong <- uRrsWide %>% pivot_longer( ., cols = matches("[[:alpha:]]{2}_[[:digit:]]{3}(.[[:digit:]]{1,2})?"), values_to = "Rrs", names_to = c("wavelength"), # names_sep = "_", names_prefix = "[[:alpha:]]{2}_", names_transform = list(wavelength = as.numeric) ) uRrslong %>% plot_ly() %>% add_lines(x = ~wavelength, y = ~Rrs, showlegend = F)
LuZ1Depth <- 0.10 # 10 cm Lw <- 0.54 * LuZ1 / exp(-LuZ1Depth * KLuWide) RrsWide <- Lw / Es
Rrslong <- RrsWide %>% pivot_longer( ., cols = matches("[[:alpha:]]{2}_[[:digit:]]{3}(.[[:digit:]]{1,2})?"), values_to = "Rrs", names_to = c("wavelength"), # names_sep = "_", names_prefix = "[[:alpha:]]{2}_", names_transform = list(wavelength = as.numeric) ) Rrslong %>% plot_ly() %>% add_lines(x = ~wavelength, y = ~Rrs, showlegend = F)
l2_data <- left_join(Rrslong, KLulong, by = "wavelength")
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.