#' joinFulcrum
#'
#' \code{joinFulcrum} joins the processed Fulcrum dataframes from the
#' procFulcrum function, and selects latitude, longitude, and altitude methods
#' with simple priority algorithm.
#'
#' @param data A list of processed dataframes generated by the
#' \code{procFulcrum} function. Valid \code{data} can include only Fulcrum
#' filed sampling data or all Fulcrum data including field sampling and
#' isolation data.
#' @param select_vars Logical, TRUE will return only the default variables,
#' FALSE will return all variables. FALSE is recommended if using customized
#' Fulcrum applications other than "Nematode field sampling" and "Nematode
#' isolation". FALSE is default.
#' @return A single, joined dataframe from the processed dataframes supplied in
#' the \code{data} list.
#' @importFrom glue glue
#' @import geosphere
#' @import dplyr
#' @export
joinFulcrum <- function(data, select_vars = F) {
# prevent scientific notation
options(scipen = 999)
# check which data is present in processed data list and send message
data_names <- as.data.frame(names(data)) %>%
dplyr::rename(data_name = `names(data)`)
message("Attempting to join:")
for (i in unique(data_names$data_name)) {
message(glue::glue("{substitute(data)}${i}"))
}
# join just sampling data
if(!(FALSE %in% (data_names$data_name %in% c("field_sampling_proc", "field_sampling_sample_photo_proc"))) &&
!(data_names$data_name %in% c("isolation_proc", "isolation_s_labeled_plates_proc", "isolation_photos_proc")) &&
nrow(data_names) == 2) {
# send message
message("Complete fulcrum isolation data not detected, joining field sampling data only.")
# join sampling data only
# join field_sampling_proc with isolation_proc
joined_data <- data$field_sampling_proc %>%
dplyr::mutate(best_photo = case_when(
sample_photo1 %in% data$field_sampling_sample_photo_proc$fulcrum_id ~ sample_photo1,
sample_photo2 %in% data$field_sampling_sample_photo_proc$fulcrum_id ~ sample_photo2,
sample_photo3 %in% data$field_sampling_sample_photo_proc$fulcrum_id ~ sample_photo3,
!(sample_photo1 %in% data$field_sampling_sample_photo_proc$fulcrum_id) & !(sample_photo2 %in% data$field_sampling_sample_photo_proc$fulcrum_id) &
!(sample_photo3 %in% data$field_sampling_sample_photo_proc$fulcrum_id) ~ NA_character_)) %>%
# Join field_sampling_sample_photo to above. In some cases there is not position data from the photos, returns NA for exif.
dplyr::left_join(data$field_sampling_sample_photo_proc, by = c("best_photo" = "fulcrum_id")) %>%
# Create flag to track if lat and long come from record or photo
dplyr::mutate(collection_lat_long_method = ifelse(is.na(exif_gps_latitude), "fulcrum", "photo")) %>%
# In cases where lat/lon are not available from photo set to collection_fulcrum_latitude and collection_fulcrum_longitude
dplyr::mutate(collection_latitude = ifelse(is.na(exif_gps_latitude), collection_fulcrum_latitude, exif_gps_latitude),
collection_longitude = ifelse(is.na(exif_gps_longitude), collection_fulcrum_longitude, exif_gps_longitude)) %>%
# Calculate the Haversine distance between fulcrum record_latitude and record_longitude and photo latitude and longitude
dplyr::rowwise() %>%
dplyr::mutate(collection_lat_long_method_diff = geosphere::distHaversine(c(collection_longitude, collection_latitude),
c(collection_fulcrum_longitude, collection_fulcrum_latitude)),
# adjust collection_lat_long_method_diff to NA if there is only a fulcrum GPS position
collection_lat_long_method_diff = ifelse(collection_lat_long_method == "fulcrum", NA, collection_lat_long_method_diff),
flag_collection_lat_long_method_diff_extreme = case_when(is.na(collection_lat_long_method_diff) ~ FALSE,
!is.na(collection_lat_long_method_diff) & collection_lat_long_method_diff < 500 ~ FALSE,
!is.na(collection_lat_long_method_diff) & collection_lat_long_method_diff >= 500 ~ TRUE,
TRUE ~ NA)) %>%
dplyr::ungroup() %>%
# fix altitude method and altitude
dplyr::mutate(collection_altitude = ifelse(collection_lat_long_method == "photo" & !(is.na(exif_gps_altitude)), exif_gps_altitude,
ifelse(is.na(exif_gps_altitude) & !(is.na(fulcrum_altitude)), fulcrum_altitude,
ifelse(is.na(exif_gps_altitude) & is.na(fulcrum_altitude), NA, NA))),
collection_altitude_method = ifelse(collection_lat_long_method == "photo" & !(is.na(exif_gps_altitude)), "photo",
ifelse(is.na(exif_gps_altitude) & !(is.na(fulcrum_altitude)), "fulcrum",
ifelse(is.na(exif_gps_altitude) & is.na(fulcrum_altitude), NA, NA))))
if(select_vars == TRUE) {
# set variable order
joined_data_selected <- join_data %>%
dplyr::select(project,
c_label,
flag_ambient_temperature,
flag_ambient_temperature_run,
flag_substrate_temperature,
flag_unusual_sample_photo_num,
flag_duplicated_c_label_field_sampling,
collection_by,
collection_datetime_UTC,
collection_date_UTC,
collection_local_time,
collection_fulcrum_latitude,
collection_fulcrum_longitude,
exif_gps_latitude,
exif_gps_longitude,
collection_latitude,
collection_longitude,
collection_lat_long_method,
collection_lat_long_method_diff,
flag_collection_lat_long_method_diff_extreme,
fulcrum_altitude,
exif_gps_altitude,
collection_altitude,
collection_altitude_method,
landscape,
sky_view,
ambient_humidity,
substrate,
substrate_notes,
substrate_other,
raw_ambient_temperature,
proc_ambient_temperature,
raw_substrate_temperature,
proc_substrate_temperature,
gridsect,
gridsect_index,
gridsect_radius,
grid_sect_direction,
sample_photo1,
sample_photo2,
sample_photo3,
best_photo, # used to be best_exif_dop_photo
best_photo_gps_dop,
best_photo_caption,
gps_course,
gps_horizontal_accuracy,
gps_speed,
gps_vertical_accuracy)
}
}
# join all data
else if(!(FALSE %in% (data_names$data_name %in% c("field_sampling_proc", "field_sampling_sample_photo_proc","isolation_proc",
"isolation_s_labeled_plates_proc", "isolation_photos_proc"))) &&
nrow(data_names) == 5) {
# send message
message("Complete fulcrum data detected, joining all data.")
# join field_sampling_proc with isolation_proc
joined_data <- dplyr::full_join(data$isolation_proc, data$field_sampling_proc, by = c("c_label_id" = "fulcrum_id")) %>%
dplyr::select(c_label,
everything(),
-c_label_id) %>%
# provide best_photo for joining
dplyr::mutate(best_photo = case_when(
sample_photo1 %in% data$field_sampling_sample_photo_proc$fulcrum_id ~ sample_photo1,
sample_photo2 %in% data$field_sampling_sample_photo_proc$fulcrum_id &
!(sample_photo1 %in% data$field_sampling_sample_photo_proc$fulcrum_id) &
!(sample_photo3 %in% data$field_sampling_sample_photo_proc$fulcrum_id) ~ sample_photo2,
sample_photo3 %in% data$field_sampling_sample_photo_proc$fulcrum_id &
!(sample_photo1 %in% data$field_sampling_sample_photo_proc$fulcrum_id) &
!(sample_photo2 %in% data$field_sampling_sample_photo_proc$fulcrum_id) ~ sample_photo3,
!(sample_photo1 %in% data$field_sampling_sample_photo_proc$fulcrum_id) &
!(sample_photo2 %in% data$field_sampling_sample_photo_proc$fulcrum_id) &
!(sample_photo3 %in% data$field_sampling_sample_photo_proc$fulcrum_id) ~ NA_character_)) %>%
# Join field_sampling_sample_photo to above. In some cases there is not position data from the photos, returns NA for exif.
dplyr::left_join(data$field_sampling_sample_photo_proc, by = c("best_photo" = "fulcrum_id")) %>%
# Create flag to track if lat and long come from record or photo
dplyr::mutate(collection_lat_long_method = ifelse(is.na(exif_gps_latitude), "fulcrum", "photo")) %>%
# In cases where lat/lon are not available from photo set to collection_fulcrum_latitude and collection_fulcrum_longitude
dplyr::mutate(collection_latitude = ifelse(is.na(exif_gps_latitude), collection_fulcrum_latitude, exif_gps_latitude),
collection_longitude = ifelse(is.na(exif_gps_longitude), collection_fulcrum_longitude, exif_gps_longitude)) %>%
# Add flag for missing isolation records
dplyr::mutate(flag_missing_isolation_record = ifelse(is.na(isolation_by), TRUE, FALSE)) %>%
# Calculate the Haversine distance between fulcrum record_latitude and record_longitue and photo latitude and longitude
dplyr::rowwise() %>%
dplyr::mutate(collection_lat_long_method_diff = geosphere::distHaversine(c(collection_longitude, collection_latitude),
c(collection_fulcrum_longitude, collection_fulcrum_latitude)),
# adjust collection_lat_long_method_diff to NA if there is only a fulcrum GPS position
collection_lat_long_method_diff = ifelse(collection_lat_long_method == "fulcrum", NA, collection_lat_long_method_diff),
flag_collection_lat_long_method_diff_extreme = case_when(is.na(collection_lat_long_method_diff) ~ FALSE,
!is.na(collection_lat_long_method_diff) & collection_lat_long_method_diff < 500 ~ FALSE,
!is.na(collection_lat_long_method_diff) & collection_lat_long_method_diff >= 500 ~ TRUE,
TRUE ~ NA)) %>%
# fix altitude method and altitude
dplyr::mutate(collection_altitude = ifelse(collection_lat_long_method == "photo" & !(is.na(exif_gps_altitude)), exif_gps_altitude,
ifelse(is.na(exif_gps_altitude) & !(is.na(fulcrum_altitude)), fulcrum_altitude,
ifelse(is.na(exif_gps_altitude) & is.na(fulcrum_altitude), NA, NA))),
collection_altitude_method = ifelse(collection_lat_long_method == "photo" & !(is.na(exif_gps_altitude)), "photo",
ifelse(is.na(exif_gps_altitude) & !(is.na(fulcrum_altitude)), "fulcrum",
ifelse(is.na(exif_gps_altitude) & is.na(fulcrum_altitude), NA, NA)))) %>%
# Flag extreme altitude values
dplyr::mutate(flag_collection_altitude_extreme = ifelse(collection_altitude > 10000 | collection_altitude < 0,
TRUE, FALSE)) %>%
# Flag extreme temperature values
dplyr::mutate(flag_substrate_temperature_extreme = ifelse(proc_substrate_temperature > 40 | proc_substrate_temperature < 0,
TRUE, FALSE),
flag_ambient_temperature_extreme = ifelse(proc_ambient_temperature > 40 | proc_ambient_temperature < 0,
TRUE, FALSE)) %>%
dplyr::ungroup() %>%
# join c-plates to s-plates with isolation_s_labeled_plates
dplyr::full_join(data$isolation_s_labeled_plates_proc, ., by = c("fulcrum_parent_id" = "isolation_id")) %>%
dplyr::select(-fulcrum_parent_id)
# chose the selected data or not
if(select_vars == TRUE) {
# set varible order
joined_data_selected <- joined_data %>%
dplyr::select(project,
c_label,
s_label,
flag_ambient_temperature,
flag_ambient_temperature_extreme,
flag_ambient_temperature_run,
flag_substrate_temperature,
flag_substrate_temperature_extreme,
flag_unusual_sample_photo_num,
flag_duplicated_c_label_field_sampling,
flag_duplicated_isolation_for_c_label,
flag_duplicated_s_label_isolation_s_labeled_plates,
flag_missing_s_label_isolation_s_labeled_plates,
flag_missing_isolation_record,
flag_unusual_isolation_photo_num,
collection_by,
collection_datetime_UTC,
collection_date_UTC,
collection_local_time,
collection_fulcrum_latitude,
collection_fulcrum_longitude,
exif_gps_latitude,
exif_gps_longitude,
collection_latitude,
collection_longitude,
collection_lat_long_method,
collection_lat_long_method_diff,
flag_collection_lat_long_method_diff_extreme,
fulcrum_altitude,
exif_gps_altitude,
collection_altitude,
collection_altitude_method,
flag_collection_altitude_extreme,
landscape,
sky_view,
ambient_humidity,
substrate,
substrate_notes,
substrate_other,
raw_ambient_temperature,
proc_ambient_temperature,
raw_substrate_temperature,
proc_substrate_temperature,
gridsect,
gridsect_index,
gridsect_radius,
grid_sect_direction,
sample_photo1,
sample_photo2,
sample_photo3,
best_photo, # used to be best_exif_dop_photo
best_photo_gps_dop,
best_photo_caption,
gps_course,
gps_horizontal_accuracy,
gps_speed,
gps_vertical_accuracy,
isolation_by,
isolation_datetime_UTC,
isolation_date_UTC,
isolation_local_time,
isolation_latitude,
isolation_longitude,
isolation_photo,
worms_on_sample)
}
}
else {
message("Invalid list of dataframes supplied to joinFulcrum. Are there 5 or 2 processed dataframes in the list? If so, are they named correctly? These conditions must be satisfied for joinFulcrum to work.")
}
# return data
if(select_vars == TRUE){
message("returning joined and selected data, set select_vars to FALSE if variables are missing")
return(joined_data_selected)
}
else{
message("returning joined data, set select_vars to TRUE if you want to select default variables")
return(joined_data)
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.