###############################################################################
# Author: Gina
# Created: may 18 2020
# purpose: read in data, put it in tidy format, save as rds (in chunks) to raw-files-rds
# NOTES:
#
# last updated:
#
###############################################################################
library(usethis)
library(dplyr)
library(readr)
library(readxl)
library(lubridate)
library(purrr)
library(tidyr)
library(tibble)
library(stringr)
library(JustTheFACTS) #--for read_raw_sensor_output function
# see which file have already sucessfully been compiled ------------------
yacompfiles <- list.files("data-raw/sensordata/compiled-list/")
yadir <- "data-raw/sensordata/compiled-list/"
read_ya_comp_files <- function(file, dir){
mycompfiles <- readRDS(paste0(dir, file))
mysquashedfiles <- paste(mycompfiles, collapse = ",")
return(mysquashedfiles)
}
if (length(yacompfiles) > 0) {
old <-
tibble(file = yacompfiles) %>%
#group_by(file) %>%
rowwise %>%
mutate(yaread =
read_ya_comp_files (file = file, dir = yadir)) %>%
separate_rows(yaread, sep = ",") %>%
select(yaread) %>%
distinct()
} else {
old <- NULL
}
old
# read in new files -------------------------------------------------------
thepath <- "data-raw/sensordata/practice-raw-files/"
# Note: File names are meaningful, keep them consistent
# Find files with good names ----------------------------------------------
therawxls <-
tibble(file = list.files(thepath)) %>%
mutate(path = paste0(thepath, file),
file = toupper(file)) %>%
filter(grepl(".XLS", file)) %>%
filter(!(file %in% old$yaread)) %>%
# Change to all upper case, split into logger, date, seomthing, extension
# Note: will use logger column to bind inventory
separate(file, into = c("logger", "date_read", "something", "extension"), remove = F) %>%
select(file, logger, date_read, path)
#--if the file name doesn't contain the logger name, needs particular attention
myuhohs <- therawxls %>%
filter(!grepl('EM', logger))
if ( nrow(myuhohs) > 0 ) {
warning(paste("Some file don't have an EM in their names"))
write_csv(myuhohs, "data-raw/sensordata/problem-file/")
}
emraw <- therawxls %>%
filter(grepl("EM", logger))
emraw
# Read file paths ---------------------------------------------------------
therawdata <-
emraw %>%
group_by(logger, file) %>%
# Read the file paths, store the read data
# Uses JustTheFACTS function to read the file
mutate(data = path %>% map(read_raw_sensor_output)) %>%
# Get rid of ones w/o data
filter(class(data)[1] != "try-error") %>%
unnest(cols = c(data)) %>%
mutate(value = as.numeric(value),
year = year(date_time))
# make sure we don't read them again --------------------------------------
# Save file names in 'compiled-list' folder as rds w/todays date as name
saveRDS(
as.character(therawdata$file),
paste0("data-raw/sensordata/compiled-list/COMPILED_", today(),
".rds")
)
# separate into years, save as the rds data -------------------------------------------------------
therawdata %>%
filter(year == 2016) %>%
write_rds(paste0("data-raw/sensordata/raw-files-rds/2016/", today(), ".rds"))
therawdata %>%
filter(year == 2017) %>%
write_rds(paste0("data-raw/sensordata/raw-files-rds/2017/", today(), ".rds"))
therawdata %>%
filter(year == 2018) %>%
write_rds(paste0("data-raw/sensordata/raw-files-rds/2018/", today(), ".rds"))
therawdata %>%
filter(year == 2019) %>%
write_rds(paste0("data-raw/sensordata/raw-files-rds/2019/", today(), ".rds"))
inventory
therawdata
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.