#' procFulcrum
#'
#' \code{procFulcrum} processes raw Fulcrum data read into R with the \code{readFulcrum} function.
#' Processing consists of dropping unused variables, renaming variables for joining, and adding flags to collection data.
#'
#' @param data A list of dataframes generated by the \code{readFulcrum} function.
#' @return A list of up to five processed dataframes.
#' \tabular{ll}{
#' field_sampling_proc \tab a processed dataframe from the nematode_field_sampling.csv\cr
#' field_field_sampling_sample_photo_proc \tab a processed dataframe from the nematode_field_sampling_sample_photo.csv\cr
#' isolation_proc \tab a processed dataframe from the nematode_isolation.csv\cr
#' isolation_s_labeled_plates_proc \tab a processed dataframe from the nematode_isolation_s_labeled_plates.csv\cr
#' nematode_isolation_photos_proc \tab a processed dataframe from the nematode_isolation_photos.csv\cr
#' }
#' @import dplyr
#' @import lubridate
#' @import stringr
#' @importFrom runner streak_run
#' @export
procFulcrum <- function(data) {
# find names of data
data_names <- names(data)
# make processed data list
proc_data <- list()
# process field_sampling
if(TRUE %in% (stringr::str_detect(data_names, pattern = "_field_sampling$"))) {
message(glue::glue("Processing {stringr::str_subset(data_names, pattern = '_field_sampling$')}"))
field_sampling_proc <- data[[stringr::str_which(data_names, pattern = "_field_sampling$")]] %>%
dplyr::mutate(c_label = stringr::str_to_upper(c_label)) %>%
# name created_by to specify who picked up the sample
dplyr::rename(collection_by = created_by,
collection_fulcrum_latitude = latitude,
collection_fulcrum_longitude = longitude,
fulcrum_altitude = gps_altitude,
collection_local_time = time) %>%
dplyr::select(-updated_at,
-system_created_at,
-system_updated_at,
-date,
-sample_photo_caption, # not needed here, can bring in from photo export
-sample_photo_url,
-updated_by, -version, -status, -assigned_to, -geometry) %>% # make custom urls later with function
# this is UTC time (very important if you want to convert to local time)
dplyr::mutate(collection_datetime_UTC = lubridate::ymd_hms(created_at, tz = "UTC")) %>%
# again this is UTC date (very important if you want to convert to local date)
dplyr::mutate(collection_date_UTC = lubridate::date(created_at)) %>%
dplyr::select(-created_at) %>%
# Flag Fahrenheit observations and fix in proc
dplyr::mutate(flag_substrate_temperature = ifelse(substrate_temperature > 40, TRUE, FALSE),
proc_substrate_temperature = ifelse(substrate_temperature > 40,
FtoC(substrate_temperature),
substrate_temperature)) %>%
# Rename sub_temp with raw prefix
dplyr::rename(raw_substrate_temperature = substrate_temperature) %>%
# Fix ambient temp F to C
dplyr::mutate(flag_ambient_temperature = ifelse(ambient_temperature_c > 40, TRUE, FALSE),
proc_ambient_temperature = ifelse(ambient_temperature_c > 40,
FtoC(ambient_temperature_c),
ambient_temperature_c)) %>%
# Rename ambient_temp with raw prefix
dplyr::rename(raw_ambient_temperature = ambient_temperature_c) %>%
# force ambient temp to numeric
dplyr::mutate(raw_ambient_temperature = as.numeric(raw_ambient_temperature)) %>%
# add flags for runs of temperature data
dplyr::arrange(collection_datetime_UTC) %>%
dplyr::mutate(flag_ambient_temperature_run = ifelse(ambient_humidity == dplyr::lag(ambient_humidity) &
raw_ambient_temperature == dplyr::lag(raw_ambient_temperature) & gridsect == "no", TRUE, FALSE),
run_length = runner::streak_run(flag_ambient_temperature_run),
flag_ambient_temperature_run = dplyr::case_when((run_length >= 4 & flag_ambient_temperature_run == TRUE) ~ T,
(dplyr::lead(run_length, n = 3L) >= 4 & flag_ambient_temperature_run == TRUE) ~ T,
(dplyr::lead(run_length, n = 2L) >= 4 & flag_ambient_temperature_run == TRUE) ~ T,
(dplyr::lead(run_length, n = 1L) >= 4 & flag_ambient_temperature_run == TRUE) ~ T,
TRUE ~ F)) %>%
dplyr::select(-run_length) %>%
# flag duplicated C-labels
dplyr::group_by(c_label) %>%
dplyr::mutate(flag_duplicated_c_label_field_sampling = ifelse(dplyr::n() > 1, TRUE, FALSE)) %>%
dplyr::ungroup() %>%
# looks for commas in the sample_photo name
dplyr::mutate(flag_unusual_sample_photo_num = ifelse(is.na(stringr::str_count(sample_photo, pattern = ",")) |
stringr::str_count(sample_photo, pattern = ",") != 0, TRUE, FALSE)) %>%
# break apart multiple sample photos. This takes the first sample photo and warns if additional photos are discarded
tidyr::separate(col = sample_photo, into = c("sample_photo1", "sample_photo2", "sample_photo3"), sep = ",", extra = "drop", fill = "right") %>%
# force gridsect variables to correct class b/c they will default to logical if no gridsects are present
dplyr::mutate(gridsect_index = as.character(gridsect_index),
grid_sect_direction = as.character(grid_sect_direction),
gridsect_radius = as.character(gridsect_radius))
# add to processed list
proc_data["field_sampling_proc"] <- list(field_sampling_proc)
}
# Process field_sampling_sample_photo
if(TRUE %in% (stringr::str_detect(data_names, pattern = "_field_sampling_sample_photo"))) {
message(glue::glue("Processing {stringr::str_subset(data_names, pattern = '_field_sampling_sample_photo')}"))
field_sampling_sample_photo_proc <- data[[stringr::str_which(data_names, pattern = "_field_sampling_sample_photo")]] %>%
dplyr::group_by(fulcrum_parent_id) %>% # group to find best precision among photos
dplyr::arrange(exif_gps_dop) %>%
dplyr::ungroup() %>%
dplyr::distinct(fulcrum_parent_id, .keep_all = T) %>%
dplyr::mutate(best_photo_gps_dop_logical = TRUE) %>%
dplyr::select(fulcrum_id, exif_gps_latitude, exif_gps_longitude, exif_gps_altitude, best_photo_gps_dop_logical, best_photo_gps_dop = exif_gps_dop, best_photo_caption = caption)
# add to list
proc_data["field_sampling_sample_photo_proc"] <- list(field_sampling_sample_photo_proc)
}
# Process isolation
if(TRUE %in% (stringr::str_detect(data_names, pattern = "isolation$"))) {
message(glue::glue("Processing {stringr::str_subset(data_names, pattern = 'isolation$')}"))
isolation_proc <- data[[stringr::str_which(data_names, pattern = "isolation$")]] %>%
dplyr::group_by(c_label) %>%
dplyr::mutate(flag_duplicated_isolation_for_c_label = ifelse(dplyr::n() > 1, TRUE, FALSE)) %>% # could use count here without grouping?
dplyr::ungroup() %>%
# this is UTC time (very important if you want to convert to local time)
dplyr::mutate(isolation_datetime_UTC = lubridate::ymd_hms(created_at, tz = "UTC")) %>%
# again this is UTC date (very important if you want to convert to local date)
dplyr::mutate(isolation_date_UTC = lubridate::date(created_at)) %>%
dplyr::rename(c_label_id = c_label,
isolation_id = fulcrum_id,
isolation_by = created_by,
isolation_local_time = time,
isolation_latitude = latitude,
isolation_longitude = longitude,
isolation_photo = photos) %>% # TAC
dplyr::mutate(flag_unusual_isolation_photo_num = ifelse(is.na(stringr::str_count(isolation_photo, pattern = ",")) |
stringr::str_count(isolation_photo, pattern = ",") != 0, TRUE, FALSE)) %>%
# break apart multiple isolation photos. This takes the first isolation photo and warns if additional photos are discarded
tidyr::separate(col = isolation_photo, into = c("isolation_photo"), sep = ",", extra = "drop", fill = "right") %>%
dplyr::select(-created_at, -project, -geometry, -photos_caption, -photos_url, -gps_altitude, -gps_horizontal_accuracy,
-gps_vertical_accuracy, -gps_speed, -gps_course) # TAC , -photos
# add to list
proc_data["isolation_proc"] <- list(isolation_proc)
}
if(TRUE %in% (stringr::str_detect(data_names, pattern = "isolation_s_labeled_plates"))) {
message(glue::glue("Processing {stringr::str_subset(data_names, pattern = 'isolation_s_labeled_plates')}"))
isolation_s_labeled_plates_proc <- data[[stringr::str_which(data_names, pattern = "isolation_s_labeled_plates")]] %>%
dplyr::select(fulcrum_parent_id, s_label) %>%
# flag duplicated S-labels
dplyr::group_by(s_label) %>%
dplyr::mutate(flag_duplicated_s_label_isolation_s_labeled_plates = ifelse(dplyr::n() > 1, TRUE, FALSE),
flag_duplicated_s_label_isolation_s_labeled_plates = ifelse(flag_duplicated_s_label_isolation_s_labeled_plates == TRUE &
is.na(s_label), FALSE, flag_duplicated_s_label_isolation_s_labeled_plates)) %>%
dplyr::ungroup() %>%
# flag missing S-labels
dplyr::mutate(flag_missing_s_label_isolation_s_labeled_plates = ifelse(is.na(s_label), TRUE, FALSE))
# add to list
proc_data["isolation_s_labeled_plates_proc"] <- list(isolation_s_labeled_plates_proc)
}
if(TRUE %in% (stringr::str_detect(data_names, pattern = "isolation_photos$"))) {
message(glue::glue("Processing {stringr::str_subset(data_names, pattern = 'isolation_photos$')}"))
# read in isolation photos dataframe
isolation_photos_proc <- data[[stringr::str_which(data_names, pattern = "isolation_photos$")]]
# add to list
proc_data["isolation_photos_proc"] <- list(isolation_photos_proc)
}
# return list
return(proc_data)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.