###############################################################################
# Author: Gina, PhD student w/Sotiris (vnichols@iastate.edu)
# Pat, field coordinator w/Sotiris
#
# Date created: March 13 2019
#
# Purpose: take user-interface excel sheet and create a tidy version
#
# Inputs: raw inventory sheets
#
# NOtes: Look at list of accepted sensors - will probably need to be updated
#
#
# Date last modified: March 19 2019 (look-over after break)
# Aug 26 2019 (get this thing working!)
# Nov 1 2019 (getting 2019 data to work, date_out drop down was bad)
# may 18 2020 (made into package)
###############################################################################
library(usethis)
library(dplyr)
library(readr)
library(readxl)
library(lubridate)
library(purrr)
library(tidyr)
# 2018 --------------------------------------------------------------------
myxl18 <- "data-raw/inventory/raw-inventories/2018/inventory_2018.xlsx"
#--What sites do we have?
mytabs18 <- readxl::excel_sheets(myxl18)
#--Get rid of first 'inventory' tab, if it exists (which it should for 2016-2018)
if (mytabs18[1] == "inventory") {
mytabs18 <- mytabs18[-1]
} else{
mytabs18 <- mytabs18
}
#--Create an empty vector to contain site-data in each element
dlst <- vector("list", length(mytabs18))
#--Read each tab and store it in the df vector
for (i in 1:length(mytabs18) ) {
mysite <- mytabs18[i]
dlst[[i]] <- readxl::read_excel(myxl18, sheet = paste(mysite))
}
#--Bind all of them together and voila
dat18 <-
dlst %>%
# &$*king plot is sometimes a character and sometimes a number
purrr::map(~mutate_at(.x, .vars = vars(plot), .f = funs(as.character))) %>%
dplyr::bind_rows()
#--make it tidy
dat18_filled <-
dat18 %>%
tidyr::fill(year, site, logger_id, incharge) %>% #--drop values down
dplyr::group_by(year, site, logger_id) %>%
tidyr::fill(date_in, date_out) #--drop values down (I think this is ok, not positive)
dat18_filled %>%
ungroup() %>%
select(sensor_type) %>%
distinct()
mysensors18 <- c("5TM", "CTD-10", "ECH2O-TE")
# get rid of unacceptable sensors' info
# If it's not in the mysensors list, make:
# plot, sensor_depth, sensor_type NAs
dat18_filtered <-
dat18_filled %>%
mutate(plot = ifelse(sensor_type %in% mysensors18,
plot, NA),
sensor_depth_cm = ifelse(sensor_type %in% mysensors18,
sensor_depth_cm, NA),
sensor_type = ifelse(sensor_type %in% mysensors18,
sensor_type, NA)) %>%
mutate(plot = ifelse(!is.na(sensor_depth_cm),
plot, NA), #--Make sure unused ports have no plot assignment
date_in = date(date_in), #--get rid of timestamps
date_out = date(date_out)) %>%
# if date_in is NA, get rid of it, Rafa had it so it must happen...
filter(!is.na(date_in)) %>%
mutate(date_out = if_else(is.na(date_out),
ymd("2018-12-31"), date_out)) %>% # If date_out is blank, change to 12-31-2018
select(-incharge, -notes) %>%
filter(!is.na(plot)) #--Lastly, filter out if plot is NA
# 2019 --------------------------------------------------------------------
myxl19 <- "data-raw/inventory/raw-inventories/2019/inventory_2019.xlsx"
#--What sites do we have?
mytabs19 <- excel_sheets(myxl19)
#--Get rid of first 'inventory' tab, if it exists (which it should for 2016-2018)
if (mytabs19[1] == "inventory") {
mytabs19 <- mytabs19[-1]
} else{
mytabs19 <- mytabs19
}
#--Create an empty vector to contain site-data in each element
dlst <- vector("list", length(mytabs19))
# Read each tab and store it in the df vector
#
for (i in 1:length(mytabs19) ) {
mysite <- mytabs19[i]
dlst[[i]] <- read_excel(myxl19, sheet = paste(mysite))
}
#--Bind all of them together and voila
dat19 <-
dlst %>%
# Plot is sometimes a character and sometimes a number.
# Every tab needs someone incharge
# Make sure blank tabs are deleted! Otherwise they mess up logger_ids.
map(~mutate_at(.x, .vars = vars(plot, notes), .f = funs(as.character))) %>%
bind_rows()
# tidy --------------------------------------------------------------------
dat19_filled <-
dat19 %>%
fill(year, site, logger_id, incharge) %>% #--drop values down
group_by(year, site, logger_id) %>%
fill(date_in, date_out) %>% #--drop values down (not ok)
mutate(date_in = as_date(date_in),
date_out = as_date(date_out),
diff = difftime(date_out, date_in, unit = "days")) %>%
# fix dropped down date_outs
mutate(date_out = as_date(ifelse(diff < 0, NA, date_out))) %>%
mutate(date_out = if_else(is.na(date_out),
ymd("2019-12-31"), date_out)) %>% # If date_out is blank, change to 12-31-2018
select(-diff)
#NOTE: AMES3N has date_in as 12/6/2019. Is that right?
dat19_filled %>%
ungroup() %>%
select(sensor_type) %>%
distinct()
#--based on convos w/Pat, these are the only acceptable types
mysensors19 <- c("5TM", "CTD-10", "ECH2O-TE")
#--if it's not in this list, make: plot, sensor_depth, sensor_type NA and filter it out
dat19_filtered <-
dat19_filled %>%
mutate(plot = ifelse(sensor_type %in% mysensors19, plot, NA),
sensor_depth_cm = ifelse(sensor_type %in% mysensors19, sensor_depth_cm, NA),
sensor_type = ifelse(sensor_type %in% mysensors19, sensor_type, NA)) %>%
#--Make sure unused ports have no plot assignment
mutate(plot = ifelse(is.na(sensor_depth_cm), NA, plot),
date_in = date(date_in), #--get rid of timestamps
date_out = date(date_out)) %>%
#--if date_in is NA get rid of it (just to be sure)
filter(!is.na(date_in)) %>%
#--if date_out is blank, change to today's date
mutate(date_out = if_else(is.na(date_out), as_date("2019-12-31"), date_out)) %>%
# filter out if plot is NA
filter(!is.na(plot)) %>%
select(-incharge, -notes)
# combine -----------------------------------------------------------------
tidyinv <-
bind_rows(dat18_filtered, dat19_filtered) %>%
ungroup() %>%
mutate(site = toupper(site),
plot = toupper(plot),
site_id = stringr::str_sub(site, 1, 4),
plot = paste0("P", plot),
plot_id = paste(year, site_id, plot, sep = "_")) %>%
select(year, site, logger_id, date_in, date_out, plot_id, sensor_port, sensor_depth_cm, sensor_type)
# checks ------------------------------------------------------------------
#--is date out later than date in?
tidyinv %>%
mutate(date_check = date_out > date_in) %>%
filter(date_check == 0)
#--is AMES3N still have weird date_ins?
tidyinv %>%
filter(site == "AMES3N",
date_in > as_date("2019-12-01"))
# write it ----------------------------------------------------------------
inventory <- tidyinv
inventory %>%
write_csv("data-raw/inventory/facts_inventory.csv")
use_data(inventory, overwrite = T)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.