data-raw/sensordata/code_sensordata-preprocess.R

###############################################################################
# Author: Gina
# Created: may 18 2020
# purpose: read in data, put it in tidy format, save as rds (in chunks) to raw-files-rds
# NOTES:
#
# last updated:
#
###############################################################################

library(usethis)
library(dplyr)
library(readr)
library(readxl)
library(lubridate)
library(purrr)
library(tidyr)
library(tibble)
library(stringr)


library(JustTheFACTS) #--for read_raw_sensor_output function



# see which file have already sucessfully been compiled ------------------

yacompfiles <- list.files("data-raw/sensordata/compiled-list/")
yadir <- "data-raw/sensordata/compiled-list/"



read_ya_comp_files <- function(file, dir){

  mycompfiles <- readRDS(paste0(dir, file))
  mysquashedfiles <- paste(mycompfiles, collapse = ",")

  return(mysquashedfiles)

}


if (length(yacompfiles) > 0) {

  old <-
    tibble(file = yacompfiles) %>%
    #group_by(file) %>%
    rowwise %>%
    mutate(yaread =
             read_ya_comp_files (file = file, dir = yadir)) %>%
    separate_rows(yaread, sep = ",") %>%
    select(yaread) %>%
    distinct()
} else {

  old <- NULL

}



old

# read in new files -------------------------------------------------------


thepath <- "data-raw/sensordata/practice-raw-files/"


# Note: File names are meaningful, keep them consistent


# Find files with good names ----------------------------------------------


therawxls <-
  tibble(file = list.files(thepath)) %>%
  mutate(path = paste0(thepath, file),
         file = toupper(file)) %>%
  filter(grepl(".XLS", file)) %>%
  filter(!(file %in% old$yaread)) %>%
  # Change to all upper case, split into logger, date, seomthing, extension
  # Note: will use logger column to bind inventory
  separate(file, into = c("logger", "date_read", "something", "extension"), remove = F) %>%
  select(file, logger, date_read, path)

#--if the file name doesn't contain the logger name, needs particular attention
myuhohs <- therawxls %>%
  filter(!grepl('EM', logger))


if ( nrow(myuhohs) > 0 ) {
  warning(paste("Some file don't have an EM in their names"))
  write_csv(myuhohs, "data-raw/sensordata/problem-file/")
}

emraw <- therawxls %>%
  filter(grepl("EM", logger))

emraw

# Read file paths ---------------------------------------------------------


therawdata <-
    emraw %>%
    group_by(logger, file) %>%
    # Read the file paths, store the read data
    # Uses JustTheFACTS function to read the file
    mutate(data = path %>% map(read_raw_sensor_output)) %>%
    # Get rid of ones w/o data
    filter(class(data)[1] != "try-error") %>%
    unnest(cols = c(data)) %>%
  mutate(value = as.numeric(value),
         year = year(date_time))

# make sure we don't read them again --------------------------------------

# Save file names in 'compiled-list' folder as rds w/todays date as name
saveRDS(
  as.character(therawdata$file),
  paste0("data-raw/sensordata/compiled-list/COMPILED_", today(),
         ".rds")
)

# separate into years, save as the rds data -------------------------------------------------------

therawdata %>%
  filter(year == 2016) %>%
  write_rds(paste0("data-raw/sensordata/raw-files-rds/2016/", today(), ".rds"))

therawdata %>%
  filter(year == 2017) %>%
  write_rds(paste0("data-raw/sensordata/raw-files-rds/2017/", today(), ".rds"))

therawdata %>%
  filter(year == 2018) %>%
  write_rds(paste0("data-raw/sensordata/raw-files-rds/2018/", today(), ".rds"))


therawdata %>%
  filter(year == 2019) %>%
  write_rds(paste0("data-raw/sensordata/raw-files-rds/2019/", today(), ".rds"))


inventory
therawdata
vanichols/JustTheFACTS documentation built on May 24, 2020, 5:31 a.m.