data-raw/inventory/code_inventory.R

###############################################################################
# Author: Gina, PhD student w/Sotiris (vnichols@iastate.edu)
#         Pat, field coordinator w/Sotiris
#
# Date created: March 13 2019
#
# Purpose: take user-interface excel sheet and create a tidy version
#
# Inputs: raw inventory sheets
#
# NOtes: Look at list of accepted sensors - will probably need to be updated
#
#
# Date last modified: March 19 2019 (look-over after break)
#                     Aug 26 2019 (get this thing working!)
#                     Nov 1 2019 (getting 2019 data to work, date_out drop down was bad)
#                     may 18 2020 (made into package)
###############################################################################

library(usethis)
library(dplyr)
library(readr)
library(readxl)
library(lubridate)
library(purrr)
library(tidyr)

# 2018 --------------------------------------------------------------------

myxl18 <- "data-raw/inventory/raw-inventories/2018/inventory_2018.xlsx"

#--What sites do we have?
mytabs18 <- readxl::excel_sheets(myxl18)

#--Get rid of first 'inventory' tab, if it exists (which it should for 2016-2018)
if (mytabs18[1] == "inventory") {
  mytabs18 <- mytabs18[-1]
} else{
  mytabs18 <- mytabs18
}

#--Create an empty vector to contain site-data in each element
dlst <- vector("list", length(mytabs18))

#--Read each tab and store it in the df vector
for (i in 1:length(mytabs18) ) {
  mysite <- mytabs18[i]
  dlst[[i]]  <- readxl::read_excel(myxl18, sheet = paste(mysite))
}

#--Bind all of them together and voila

dat18 <-
  dlst %>%
  # &$*king plot is sometimes a character and sometimes a number
  purrr::map(~mutate_at(.x, .vars = vars(plot), .f = funs(as.character))) %>%
  dplyr::bind_rows()


#--make it tidy
dat18_filled <-
  dat18 %>%
    tidyr::fill(year, site, logger_id, incharge) %>% #--drop values down
    dplyr::group_by(year, site, logger_id) %>%
    tidyr::fill(date_in, date_out)   #--drop values down (I think this is ok, not positive)


dat18_filled %>%
  ungroup() %>%
  select(sensor_type) %>%
  distinct()

mysensors18 <- c("5TM", "CTD-10", "ECH2O-TE")

# get rid of unacceptable sensors' info
  # If it's not in the mysensors list, make:
  #  plot, sensor_depth, sensor_type NAs
dat18_filtered <-
  dat18_filled %>%
  mutate(plot = ifelse(sensor_type %in% mysensors18,
                       plot, NA),
         sensor_depth_cm = ifelse(sensor_type %in% mysensors18,
                                  sensor_depth_cm, NA),
         sensor_type = ifelse(sensor_type %in% mysensors18,
                              sensor_type, NA)) %>%
  mutate(plot = ifelse(!is.na(sensor_depth_cm),
                       plot, NA), #--Make sure unused ports have no plot assignment
         date_in = date(date_in), #--get rid of timestamps
         date_out = date(date_out)) %>%
# if date_in is NA, get rid of it, Rafa had it so it must happen...
    filter(!is.na(date_in)) %>%
  mutate(date_out = if_else(is.na(date_out),
                              ymd("2018-12-31"), date_out)) %>% # If date_out is blank, change to 12-31-2018
    select(-incharge, -notes) %>%
  filter(!is.na(plot)) #--Lastly, filter out if plot is NA




# 2019 --------------------------------------------------------------------


myxl19 <- "data-raw/inventory/raw-inventories/2019/inventory_2019.xlsx"

#--What sites do we have?
mytabs19 <- excel_sheets(myxl19)

#--Get rid of first 'inventory' tab, if it exists (which it should for 2016-2018)
if (mytabs19[1] == "inventory") {
  mytabs19 <- mytabs19[-1]
} else{
  mytabs19 <- mytabs19
}

#--Create an empty vector to contain site-data in each element
dlst <- vector("list", length(mytabs19))

# Read each tab and store it in the df vector
#
for (i in 1:length(mytabs19) ) {
  mysite <- mytabs19[i]
  dlst[[i]]  <- read_excel(myxl19, sheet = paste(mysite))
}

#--Bind all of them together and voila
dat19 <-
  dlst %>%
  # Plot is sometimes a character and sometimes a number.
  # Every tab needs someone incharge
  # Make sure blank tabs are deleted! Otherwise they mess up logger_ids.
  map(~mutate_at(.x, .vars = vars(plot, notes), .f = funs(as.character))) %>%
  bind_rows()



# tidy --------------------------------------------------------------------

dat19_filled <-
  dat19 %>%
  fill(year, site, logger_id, incharge) %>% #--drop values down
    group_by(year, site, logger_id) %>%
  fill(date_in, date_out) %>%  #--drop values down (not ok)
  mutate(date_in = as_date(date_in),
         date_out = as_date(date_out),
         diff = difftime(date_out, date_in, unit = "days")) %>%
  # fix dropped down date_outs
  mutate(date_out = as_date(ifelse(diff < 0, NA, date_out))) %>%
  mutate(date_out = if_else(is.na(date_out),
                            ymd("2019-12-31"), date_out)) %>% # If date_out is blank, change to 12-31-2018
  select(-diff)

#NOTE: AMES3N has date_in as 12/6/2019. Is that right?

dat19_filled %>%
  ungroup() %>%
  select(sensor_type) %>%
  distinct()

#--based on convos w/Pat, these are the only acceptable types
mysensors19 <- c("5TM", "CTD-10", "ECH2O-TE")

#--if it's not in this list, make: plot, sensor_depth, sensor_type NA and filter it out

dat19_filtered <-
  dat19_filled %>%
    mutate(plot = ifelse(sensor_type %in% mysensors19, plot, NA),
         sensor_depth_cm = ifelse(sensor_type %in% mysensors19, sensor_depth_cm, NA),
         sensor_type = ifelse(sensor_type %in% mysensors19, sensor_type, NA)) %>%
  #--Make sure unused ports have no plot assignment
  mutate(plot = ifelse(is.na(sensor_depth_cm), NA, plot),
         date_in = date(date_in), #--get rid of timestamps
         date_out = date(date_out)) %>%
  #--if date_in is NA get rid of it (just to be sure)
  filter(!is.na(date_in)) %>%
  #--if date_out is blank, change to today's date
  mutate(date_out = if_else(is.na(date_out), as_date("2019-12-31"), date_out)) %>%
  # filter out if plot is NA
  filter(!is.na(plot)) %>%
  select(-incharge, -notes)


# combine -----------------------------------------------------------------

tidyinv <-
  bind_rows(dat18_filtered, dat19_filtered) %>%
  ungroup() %>%
  mutate(site = toupper(site),
         plot = toupper(plot),
         site_id = stringr::str_sub(site, 1, 4),
         plot = paste0("P", plot),
         plot_id = paste(year, site_id, plot, sep = "_")) %>%
  select(year, site, logger_id, date_in, date_out, plot_id, sensor_port, sensor_depth_cm, sensor_type)

# checks ------------------------------------------------------------------

#--is date out later than date in?
tidyinv %>%
  mutate(date_check = date_out > date_in) %>%
  filter(date_check == 0)

#--is AMES3N still have weird date_ins?
tidyinv %>%
  filter(site == "AMES3N",
         date_in > as_date("2019-12-01"))


# write it ----------------------------------------------------------------

inventory <- tidyinv

inventory %>%
  write_csv("data-raw/inventory/facts_inventory.csv")


use_data(inventory, overwrite = T)
vanichols/JustTheFACTS documentation built on May 24, 2020, 5:31 a.m.