R/read_raw_sensor_output.R

Defines functions read_raw_sensor_output

Documented in read_raw_sensor_output

#' read_raw_sensor_ouput
#'
#' @param path Path to file containing raw output files
#' @return Tidy long data frame with date_time, port_id and value as column names
#' @export
#'
read_raw_sensor_output <- function(thefilepath){

  #--should add an assert that thefilepath contains a .xls

  #--for trouble shooting
  #thefilepath <- "data-raw/sensordata/raw-files/EM13812 31May19-1006.xls"

  suppressMessages({

    #--read in top '3' things
    theheader <-
      read_excel(thefilepath, sheet = 1, n_max = 2) %>%
      select(-1)

    #--extract the info, create unique port names based on sensor it is reading
    #--NOTE: must keep all ports so the names list is the right length
    theinfo <-
      as_tibble(cbind(port = names(theheader), t(theheader)), .name_repair = "unique") %>%
      #--sometimes port gets ... after it, should use regex but oh well
      #str_extract(fruit, regex("nana"))
      mutate(port = str_sub(port, 1, 6),
             port = str_replace_all(port, " ", "")) %>%
      rename("sensor_type" = 2,
             "sensor_unit" = 3) %>%
      #--NOTE, if there is anything else (??) it will become NA
      mutate(
        sensor_type_simp = str_sub(sensor_type, 1, 3),
        # sensor_unit_simp = case_when(
        #   str_detect(sensor_unit, 'VWC') ~ "soilVWC",
        #   (str_detect(sensor_unit, 'Temp') & str_detect(sensor_type_simp, "5TM")) ~ "soilT_C",
        #   str_detect(sensor_unit, 'Water Level') ~ "wtdepth_mm")
      ) %>%
      mutate(
        port_id = paste(port, sensor_type_simp, sensor_unit, sep = "_")
      )

    #--create a vector that will be the name
    thenames <- c("date_time", theinfo %>% select(port_id) %>% pull())

    #--actually read in the data
    suppressWarnings({
      therawdat <- read_excel(thefilepath,
                              sheet = 1,
                              skip = 2,
                              na = "#N/A",
                              col_types = c("date", rep("numeric", length(thenames) - 1)))
    })

    #--give it the new tidy names
    names(therawdat) <- thenames

    #--make it long format
    thetidydata <-
      therawdat %>%
      pivot_longer(-date_time, names_to = "port_id", values_to = "value") %>%
      #--we want to keep it if the value is NA, it might just be a glitch in the sensor
      mutate(
        sensor_unit_simp = case_when(
        str_detect(port_id, '5TM') & str_detect(port_id, 'VWC') ~ "soilm_VWC",
        str_detect(port_id, '5TM') & str_detect(port_id, 'Temp') ~ "soilt_C",
        str_detect(port_id, 'CTD') & str_detect(port_id, 'Water Level') ~ "wtdepth_mm")
    ) %>%
      #--instead filter out the SENSORS we don't care about
      filter(!is.na(sensor_unit_simp)) %>%
      #--create a cleaner port_id
      separate(port_id, into = c("port", "sensor_type_simp", "ugly_units")) %>%
      mutate(port_id = paste(port, sensor_type_simp, sensor_unit_simp, sep = "-"),
             sensor_port = parse_number(port)) %>%
      select(date_time, port_id, sensor_port, sensor_type_simp, sensor_unit_simp, value) %>%
      rename(sensor_unit = sensor_unit_simp)

  }) #--end suppress messages

  return(thetidydata)
}
vanichols/JustTheFACTS documentation built on May 24, 2020, 5:31 a.m.