R/join_rza_to_fastrcat.R

#' @title Link RZA data to FastCAT data
#' @description Links temperature and salinity generated from FastrCAT to
#' Rapid Zooplankton Assesment data. Six new colums are added to the RZA
#' data, these are mean of the water column, surface which is defined as the
#' mean of the top 5 meters, and bottom which is defined as the mean of the
#' bottom 5 meters; for both temperature and salinity. These are joined
#' by the station.haul names.
#' @param rza_path The path to the directory where the .xlxs rza dataframe
#' is located along with the .xlxs file name. This is a dataframe specifically
#' formated for rza data entry.
#' @param fastrcat_path The path to the directory of where the .csv file
#' generated by FastrCAT::make_dataframe_fc(). This must be from the same
#' Cruise as the RZA data.
#' @param depth_range The depth range wanted to define surface and bottom. The
#' default is 5 meters, this means the surface is defined as 0 to 5
#' meters and the bottom is defined as maximum tow depth to the 5 meters above.
#' @return writes a new .csv file to the RZA folder of the linked data. Date/time
#' and latitude/longitude returned will be from the fastrcat.
#' @export join_rza_to_fastrcat

join_rza_to_fastrcat <- function(rza_path, fastrcat_path, depth_range = 5){

# reads in the first sheet in the .xlxs rza sheet------------------------------
  rza <- readxl::read_excel(rza_path, sheet = 1)%>%
    dplyr::select(CRUISE, STATION_NAME, HAUL_ID, FOCI_GRID, GEAR_NAME, NET,
                  SORTER, JELLY_FOULING, PHYTOPLANKTON_FOULING, SAMPLE_COLOR,
                  BEAKER_VOLUME, SUBSAMPLE_VOLUME, RZA_TAXA, SUBSAMPLE_COUNT,
                  TOTAL_COUNT, VOLUME_FILTERED, EST_NUM_PERM3)%>%
    tidyr::unite(STATION_NAME, HAUL_ID, col = "STATION_HAUL", sep = ".",
                 remove = FALSE)

# reads in the fastrcat .csv file----------------------------------------------
  fastrcat <- readr::read_csv(fastrcat_path,
                              col_types = readr::cols_only(
                                LAT = readr::col_double(),
                                LON = readr::col_double(),
                                DATE = readr::col_date(),
                                TIME = readr::col_character(),
                                DEPTH = readr::col_integer(),
                                DEPTH_BOTTOM = readr::col_integer(),
                                TEMPERATURE1 = readr::col_double(),
                                SALINITY1 = readr::col_double(),
                                CRUISE = readr::col_character(),
                                STATION_NAME = readr::col_integer(),
                                HAUL_NAME = readr::col_integer(),
                                FOCI_GRID = readr::col_character()))%>%
    tidyr::unite(STATION_NAME, HAUL_NAME, col = "STATION_HAUL", sep = ".",
                 remove = FALSE)


# checks if the Cruises are the same-------------------------------------------

  if(na.omit(unique(rza$CRUISE)) != na.omit(unique(fastrcat$CRUISE))){
    stop("Cruises don't match. Check file paths and make sure they are pointing
          to rza and fastrcat data from the same cruise")
  }

# make means of water entire water coloum and surface and bottom

# mean of the water column for each station.haul-------------------------------
  mean_tot_col <- fastrcat %>%
    dplyr::group_by(STATION_HAUL)%>%
    dplyr::summarise(MEAN_TEMP = mean(TEMPERATURE1, na.rm = TRUE),
                     MEAN_SALT = mean(SALINITY1, na.rm = TRUE))

# mean of the surface from 0 meters to param depth_range-----------------------
  mean_surf <- fastrcat %>%
    dplyr::filter(DEPTH <= depth_range)%>%
    dplyr::group_by(STATION_HAUL)%>%
    dplyr::summarise(SURF_TEMP = mean(TEMPERATURE1, na.rm = TRUE),
                     SURF_SALT = mean(SALINITY1, na.rm = TRUE))

# mean of the bottom from max tow depth to param depth_range above max---------
  mean_bot <- fastrcat %>%
    dplyr::group_by(STATION_HAUL)%>%
    dplyr::filter(DEPTH >= (max(DEPTH, na.rm = TRUE) - depth_range))%>%
    dplyr::summarise(BOTT_TEMP = mean(TEMPERATURE1, na.rm = TRUE),
                     BOTT_SALT = mean(SALINITY1, na.rm = TRUE))

# Bind the means together by station.haul--------------------------------------

means_all <- mean_tot_col %>%
  dplyr::left_join(mean_surf, by = "STATION_HAUL")%>%
  dplyr::left_join(mean_bot, by = "STATION_HAUL")


# Reduce fastrcat data to uniques from Station.haul----------------------------

rza_eco <- fastrcat %>%
  dplyr::select(CRUISE, STATION_NAME, HAUL_NAME, STATION_HAUL, FOCI_GRID,
                DATE, TIME, LAT, LON, DEPTH_BOTTOM)%>%
  dplyr::distinct(STATION_HAUL, .keep_all = TRUE)%>%
  dplyr::left_join(means_all, by = "STATION_HAUL")%>%
  dplyr::right_join(rza %>%
                      dplyr::select(STATION_HAUL, GEAR_NAME, NET, SORTER,
                                    JELLY_FOULING, PHYTOPLANKTON_FOULING,
                                    SAMPLE_COLOR, BEAKER_VOLUME,
                                    SUBSAMPLE_VOLUME, RZA_TAXA, SUBSAMPLE_COUNT,
                                    TOTAL_COUNT, VOLUME_FILTERED, EST_NUM_PERM3),
                    by = "STATION_HAUL")

# make path to write file to rza folder----------------------------------------
remove <- length(stringr::str_count(unlist(stringr::str_split(rza_path,"/"))))

folder_path <- stringr::str_c(unlist(stringr::str_split(
  rza_path,"/"))[-c(remove + 1, remove)], collapse = "/")

file_path <- paste(folder_path,
                   paste("/", na.omit(unique(rza_eco$CRUISE)),
                         "_RZA_Temp_Sal.csv", sep = ""), sep = "")

readr::write_csv(rza_eco, folder_path)

}
Copepoda/rrza documentation built on May 31, 2019, 4:52 a.m.