terradactyl: Tools for Creating and Using TerrADat

Documented in gather_gap_terradat

#' Convert gap data into a tall, tidy data frame
#'
#' @description Given wide format gap data, create a tall format data frame
#' usable by other terradactyl functions.
#' @param dsn Character string. The full filepath and filename (including file
#' extension) of the geodatabase or text file containing the table of interest.
#' This field is unnecessary if you provide either both of tblGapDetail and
#' tblGapHeader (AIM/DIMA/TerrADat) or both of GINTERCEPT and POINT (LMF/NRI).
#' @param source Character string. The data source format,
#' \code{"AIM", "TerrADat", "DIMA", "LMF", "NRI"} (case independent).
#' @param tblGapHeader Dataframe of the data structure tblGapHeader from the
#' DIMA database with the addition of PrimaryKey and DBKey fields. Use with
#' tblGapDetail when data source is AIM, DIMA, or TerrADat; alternately provide
#' dsn.
#' @param tblGapDetail Dataframe of the data structure tblGapDetail from the
#' DIMA database with the addition of PrimaryKey and DBKey fields. Use with
#' tblGapHeader when data source is AIM, DIMA, or TerrADat; alternately provide
#' dsn.
#' @param GINTERCEPT Dataframe of the data structure GINTERCEPT from the LMF/NRI
#' database. Use with POINT when data source is LMF or NRI; alternately provide
#' dsn.
#' @param POINT Dataframe of the data structure POINT from the LMF database.
#' Use with GINTERCEPT when data source if LMF or NRI; alternately provide dsn.
#' @param file_type Character string that denotes the source file type of the
#' LMF/NRI data, \code{"gdb"} or \code{"txt"}. Not necessary for
#' AIM/DIMA/TerrADat, or if GINTERCEPT and POINT are provided.
#' @importFrom magrittr %>%
#' @name gather_gap
#' @family <gather>
#' @return A tall data frame containing the data from the gap measurements.
#' @examples
#' gather_gap(dsn = "Path/To/AIM_Geodatabase.gdb",
#'            source = "AIM")
#' gather_gap(dsn = "Path/To/LMF_Geodatabase.gdb",
#'            source = "LMF")
#'
#' aim_gapdetail <- read.csv("Path/To/tblGapDetail.csv")
#' aim_gapheader <- read.csv("Path/To/tblGapHeader.csv")
#' gather_gap(source = "AIM",
#'            tblGapDetail = aim_gapdetail,
#'            tblGapHeader = aim_gapheader)
#'
#' lmf_gintercept <- read.csv("Path/To/GINTERCEPT.csv")
#' lmf_point <- read.csv("Path/To/POINT.csv")
#' gather_gap(source = "LMF",
#'            GINTERCEPT = lmf_gintercept,
#'            POINT = lmf_point)

## gather gap data
#' @export gather_gap_terradat
#' @rdname gather_gap
gather_gap_terradat <- function(dsn = NULL,
                                tblGapDetail = NULL,
                                tblGapHeader = NULL) {

  ### switch by input types
  if(!is.null(tblGapDetail) & !is.null(tblGapHeader)){
    gap_detail <- tblGapDetail %>%
      dplyr::select_if(!names(.) %in% c(
        'GlobalID',
        'created_user',
        'created_date',
        'last_edited_user',
        'last_edited_date',
        'DateLoadedInDb',
        'DateLoadedinDB',
        "rid"
      ))
    gap_header <- tblGapHeader %>%
      dplyr::select_if(!names(.) %in% c(
        'GlobalID',
        'created_user',
        'created_date',
        'last_edited_user',
        'last_edited_date',
        'DateLoadedInDb',
        'DateLoadedInDB',
        "rid"
      ))
  } else if(!is.null(dsn)){
    if (!file.exists(dsn)) {
      stop("dsn must be a valid filepath to a geodatabase containing tblGapDetail and tblGapHeader")
    }
    # Read tblGapDetail
    gap_detail <- suppressWarnings(sf::st_read(
      dsn = dsn,
      layer = "tblGapDetail",
      stringsAsFactors = FALSE, quiet = T
    )) %>%

      # Remove database management fields that aren't relevant
      dplyr::select_if(!names(.) %in% c(
        'GlobalID',
        'created_user',
        'created_date',
        'last_edited_user',
        'last_edited_date',
        'DateLoadedInDb',
        'DateLoadedInDB',
        'rid'
      ))

    # Read tblGapHeader
    gap_header <- suppressWarnings(sf::st_read(
      dsn = dsn,
      layer = "tblGapHeader",
      stringsAsFactors = FALSE, quiet = T
    )) %>%

      # Remove database management fields that aren't relevant
      dplyr::select_if(!names(.) %in% c(
        'GlobalID',
        'created_user',
        'created_date',
        'last_edited_user',
        'last_edited_date',
        'DateLoadedInDb',
        'DateLoadedInDB',
        'rid'
      ))

  } else {
    stop("Provide either tblGapDetail and tblGapHeader, or the path to a GDB containing those tables")
  }
  # add null DBKey field if not present
  if(!"DBKey" %in% colnames(gap_header)) gap_header$DBKey <- NA
  if(!"DBKey" %in% colnames(gap_detail)) gap_detail$DBKey <- NA

  ## ensure that gap, gapstart, and gapend are numeric
  gap_detail$GapStart <- as.numeric(gap_detail$GapStart)
  gap_detail$GapEnd <- as.numeric(gap_detail$GapEnd)
  gap_detail$Gap <- as.numeric(gap_detail$Gap)
  ##

  # Merge header and detail data together
  gap_tall <- dplyr::left_join(
    x = gap_header,
    y = gap_detail,
    by = c("RecKey", "PrimaryKey", "DBKey")
  )

  ## Remove all orphaned records
  gap_tall <- gap_tall[!is.na(gap_tall$PrimaryKey), ]

  # Look for NA values in NoCanopyGaps and NoBasalGaps, we assume they are 0
  gap_tall <- gap_tall %>%
    dplyr::mutate(
      NoCanopyGaps = tidyr::replace_na(NoCanopyGaps, replace = 0),
      NoBasalGaps = tidyr::replace_na(NoBasalGaps, replace = 0)
    )

  ## Add zero values where there is no canopy gap present on line
  gap_tall[gap_tall$NoCanopyGaps == 1, ] <- gap_tall %>%
    dplyr::filter(NoCanopyGaps == 1) %>%
    tidyr::replace_na(list(
      RecType = "C", GapStart = 0, GapEnd = 0, Gap = 0, Measure = 1
    ))
  gap_tall$NoCanopyGaps <- as.numeric(gap_tall$NoCanopyGaps)
  gap_tall$NoBasalGaps <- as.numeric(gap_tall$NoBasalGaps)

  ## Add zero values where there is no canopy gap present on the line, AND there is basal gap on the line
  # Find missing records
  gap_tall_missing_c <-
    gap_tall %>%
    dplyr::filter(NoCanopyGaps == 1,
                  RecType != "C") %>%
    dplyr::select(PrimaryKey, LineKey, RecKey, NoBasalGaps, NoCanopyGaps) %>%
    unique() %>%
    dplyr::mutate(RecType = "C", GapStart = 0, GapEnd = 0, Gap = 0, Measure = 1)

  # Append them to gap_tall
  gap_tall <-
    dplyr::bind_rows(gap_tall, gap_tall_missing_c)

  ## Add zero values where there is no basal gap present on line
  gap_tall[gap_tall$NoBasalGaps == 1, ] <- gap_tall %>%
    dplyr::filter(NoBasalGaps == 1) %>%
    tidyr::replace_na(list(
      RecType = "B", GapStart = 0, GapEnd = 0, Gap = 0, Measure = 1
    ))

  ## Add zero values where there is no basal gap present on the line, AND there is canopy gap on the line
  # Find missing records
  gap_tall_missing_b <-
    gap_tall %>%
    dplyr::filter(NoBasalGaps == 1,
                  RecType != "B" | is.na(RecType)) %>%
    dplyr::select(PrimaryKey, LineKey, RecKey, NoCanopyGaps, NoBasalGaps) %>%
    unique() %>%
    dplyr::mutate(RecType = "B", GapStart = 0, GapEnd = 0, Gap = 0, Measure = 1)

  # Append them to gap_tall
  gap_tall <-
    dplyr::bind_rows(gap_tall, gap_tall_missing_b)

  ## Identify which gaps are perennial gaps vs all canopy gaps. Perennial
  ## gaps are those with only PerennialsCanopy == 1
  gap_tall <- gap_tall %>% dplyr::mutate(RecType = as.character(RecType))

  gap_tall$RecType[gap_tall$PerennialsCanopy == 1 &
                     gap_tall$AnnualForbsCanopy == 0 &
                     gap_tall$AnnualGrassesCanopy == 0 &
                     gap_tall$OtherCanopy == 0] <- "P"

  ## last round drop
  gap_tall <- gap_tall %>% dplyr::select_if(!names(.) %in%
                                       c('DateLoadedInDb',
                                         'DateLoadedInDB',
                                         'DataErrorChecking',
                                         'DataEntry',
                                         'DateModified',
                                         'FormType')
  ) %>%
    # make sure data types are numeric when needed
    dplyr::mutate(
      GapStart = suppressWarnings(as.numeric(GapStart)),
      GapEnd = suppressWarnings(as.numeric(GapEnd)),
      Gap = suppressWarnings(as.numeric(Gap))
    )

  return(gap_tall)
}

#' @export gather_gap_lmf
#' @rdname gather_gap
gather_gap_lmf <- function(dsn = NULL,
                           file_type = NULL,
                           GINTERCEPT = NULL,
                           POINT = NULL) {

  # if file type is NULL, define it by checking the extension of dsn
  if(is.null(file_type)){
    extension <- substr(dsn, nchar(dsn)-2, nchar(dsn))
    if(extension == "csv") {
      file_type <- "csv"
    } else if(extension == "gdb") {
      file_type <- "gdb"
    } else {
      file_type <- "txt"
    }
  }


  if(!is.null(GINTERCEPT) & !is.null(POINT)){
    gintercept <- GINTERCEPT
    point <- POINT
  } else if(!is.null(dsn) & file_type %in% c("gdb", "csv", "txt")){
    if (!file.exists(dsn)) {
      stop("dsn must be a valid filepath to a database containing GINTERCEPT and POINT")
    }
    gintercept <- switch(file_type,
                         "gdb" = {
                           suppressWarnings(sf::st_read(
                             dsn = dsn,
                             layer = "GINTERCEPT",
                             stringsAsFactors = FALSE,
                             quiet = T
                           )) %>%
                             dplyr::select_if(!names(.) %in% c(
                               'GlobalID',
                               'created_user',
                               'created_date',
                               'last_edited_user',
                               'last_edited_date'
                             ))
                         },
                         "txt" = {
                           read.table(paste(dsn, "gintercept.txt", sep = ""),
                                      stringsAsFactors = FALSE, strip.white = TRUE,
                                      header = FALSE, sep = "|"
                           )
                         },
                         "csv" = {
                           read.csv(dsn)
                         }
    )
    # Read in point file for other plot level information
    point <- switch(file_type,
                    "gdb" = {
                      suppressWarnings(sf::st_read(
                        dsn = dsn,
                        layer = "POINT",
                        stringsAsFactors = FALSE,
                        quiet = T
                      )) %>%
                        dplyr::select_if(!names(.) %in% c(
                          'GlobalID',
                          'created_user',
                          'created_date',
                          'last_edited_user',
                          'last_edited_date'
                        ))
                    },
                    "txt" = {
                      read.table(paste(dsn, "point.txt", sep = ""),
                                 stringsAsFactors = FALSE,
                                 strip.white = TRUE,
                                 header = FALSE, sep = "|"
                      )
                    },
                    "csv" = {
                      stop("csv not currently supported for gap data")
                      read.csv(point_dsn)
                    }
    )


    if (file_type == "txt") {
      # Add meaningful column names
      gintercept <- name_variables_nri(
        data = gintercept,
        table_name = "GINTERCEPT"
      )
      point <- name_variables_nri(
        data = gintercept,
        table_name = "POINT"
      )
    }
  }
  else {
    stop("Supply either GINTERCEPT and POINT, or the path to a GDB containing those tables")
  }

  # Ensure START_GAP and END_GAP are numeric
  gintercept$START_GAP <- as.numeric(gintercept$START_GAP)
  gintercept$END_GAP <- as.numeric(gintercept$END_GAP)

  # Look at the point table and add blanks or substitute perennial gap for
  # canopy gap
  canopy_infer <- point[point$GAPS_DIFFERENT_NESW == "N" |
                          point$GAPS_DIFFERENT_NWSE == "N", ] %>%
    dplyr::select(
      "PrimaryKey",
      "GAPS_DIFFERENT_NESW",
      "GAPS_DIFFERENT_NWSE"
    ) %>%

    # Gather so that we can query by lines
    tidyr::gather(key = "TRANSECT", value = "different", -"PrimaryKey") %>%

    # Select so that only values where canopy gap is not different
    subset(different == "N") %>%

    # Reduce line key to just line number
    dplyr::mutate(TRANSECT = stringr::str_replace_all(TRANSECT,
                                                      pattern = "GAPS_DIFFERENT_",
                                                      replace = ""
    ) %>% tolower())

  # select perennial gaps that are not different to canopy gaps and infer c
  # canopy gaps

  canopy_infer <- dplyr::full_join(gintercept, canopy_infer,
                                   by = c("PrimaryKey", "TRANSECT")
  ) %>%
    dplyr::filter(!is.na(different) & GAP_TYPE == "peren") %>%

    # Code perennial to canopy
    dplyr::mutate(GAP_TYPE = "canopy")

  # Join canopy data back to gintercept

  gintercept <- rbind(gintercept, dplyr::select(canopy_infer, -different))

  ## Add zeros where no canopy gap data were recorded
  zero_gap <- point %>%
    dplyr::select(
      "PrimaryKey",
      "DBKey",
      "BASAL_GAPS_NESW",
      "CANOPY_GAPS_NESW",
      "BASAL_GAPS_NWSE",
      "CANOPY_GAPS_NWSE",
      "PERENNIAL_CANOPY_GAPS_NESW",
      "PERENNIAL_CANOPY_GAPS_NWSE"
    ) %>%
    tidyr::gather(key = "TRANSECT", value = "zero", -c("PrimaryKey", "DBKey")) %>%

    # Filter for plots and lines where we need to insert zeros
    dplyr::filter(zero == "N") %>%

    # rework transect name
    dplyr::mutate(
      GAP_TYPE = stringr::str_replace(TRANSECT,
                                      pattern = "_.*",
                                      replacement = ""
      ) %>%
        # recode GAP_TYPE
        dplyr::recode(
          "CANOPY" = "canopy",
          "PERENNIAL" = "peren",
          "BASAL" = "basal"
        ),
      TRANSECT = stringr::str_sub(TRANSECT, -4) %>% tolower(),
      START_GAP = 0,
      END_GAP = 0
    )


  # Merge back to gintercept
  gintercept <- dplyr::full_join(gintercept, zero_gap,
                                 by = c("TRANSECT", "GAP_TYPE", "START_GAP",
                                        "END_GAP", "PrimaryKey", "DBKey")) %>%
    dplyr::select(-zero)

  # convert to metric, original data are in decimal feet
  gintercept$START_GAP <- gintercept$START_GAP * 30.48
  gintercept$END_GAP <- gintercept$END_GAP * 30.48
  gintercept$Gap <- abs(gintercept$END_GAP - gintercept$START_GAP)


  # check for negative values and remove
  gap <- gintercept %>% subset(Gap >= 0)


  # recode gap type so that it fits the DIMA types
  gap$GAP_TYPE <- as.character(gap$GAP_TYPE)
  gap$GAP_TYPE[gap$GAP_TYPE == "peren"] <- "P"
  gap$GAP_TYPE[gap$GAP_TYPE == "canopy"] <- "C"
  gap$GAP_TYPE[gap$GAP_TYPE == "basal"] <- "B"

  # rename fields so they can be merged with a DIMA/TerrADat type
  gap <- dplyr::rename(gap,
                       LineKey = TRANSECT, RecType = GAP_TYPE,
                       GapStart = START_GAP, GapEnd = END_GAP, SeqNo = SEQNUM
  )

  # units are metric
  gap$Measure <- 1

  # line length of an NRI transect in meters
  gap$LineLengthAmount <- 150 * 30.48 / 100

  # minimum gap size
  gap$GapMin <- 12 * 2.54

  # Strip down fields
  gap <- gap %>%
    dplyr::select_if(!names(.) %in% c(
      'SURVEY', 'COUNTY', 'STATE', 'PLOTKEY',
      'PSU', 'POINT',
      'created_user',
      'created_date',
      'last_edited_user',
      'last_edited_date',
      'GlobalID',
      'X'
    )) %>%
  # make sure data types are numeric when needed
   dplyr::mutate(
     GapStart = suppressWarnings(as.numeric(GapStart)),
     GapEnd = suppressWarnings(as.numeric(GapEnd)),
     Gap = suppressWarnings(as.numeric(Gap))
   )

  return(gap)
}

#' export gather_gap_survey123
#' rdname gather_gap
# gather_gap_survey123 <- function(Gap_0, GapDetail_1) {
#
#   # Add fields to match terradat schema
#   GapDetail_1$DBKey <- NA
#   GapDetail_1$SeqNo <- NA
#   Gap_0$DBKey <- NA
#   Gap_0$RecKey <- NA
#
#   # Select only the necessary fields
#   gap_detail <- GapDetail_1 %>%
#     dplyr::select(
#       GlobalID,
#       ParentGlobalID,
#       DBKey,
#       GapStart,
#       GapEnd,
#       RecType,
#       Gap,
#       SeqNo
#     )
#
#   gap_header <- Gap_0 %>%
#     dplyr::select(
#       GlobalID,
#       PrimaryKey = PlotKey,
#       DBKey,
#       LineKey,
#       RecKey,
#       FormDate,
#       Direction,
#       # Measure, # This is necessary but missing data
#       LineLengthAmount = LineLengthCM,
#       GapMin,
#       GapData, # Will have to be reclassified later
#       Notes = CollectNote,
#       NoCanopyGaps,
#       NoBasalGaps
#     )
#
#
#   ## ensure that gap, gapstart, and gapend are numeric
#   gap_detail$GapStart <- as.numeric(gap_detail$GapStart)
#   gap_detail$GapEnd <- as.numeric(gap_detail$GapEnd)
#   gap_detail$Gap <- as.numeric(gap_detail$Gap)
#   ##
#
#   # Merge header and detail data together
#   # Survey123 uses global ID, we have to translate it, then drop GlobalIDs
#   gap_tall <- dplyr::left_join(
#     x = gap_header,
#     y = gap_detail,
#     by = c("GlobalID" = "ParentGlobalID", "DBKey")
#   ) %>%
#     dplyr::select(-"GlobalID.y", -"GlobalID")
#
#   # Check for duplicate PrimaryKeys
#   dupkeys <- gap_tall$PrimaryKey[duplicated(gap_tall$PrimaryKey)]
#   if(length(dupkeys) > 0){
#     dupnames <- paste(unique(dupkeys), collapse = ", ")
#     warning(paste("Duplicate PrimaryKeys found. Change PlotKey in the original data:", dupnames))
#   }
#
#   # Remove all orphaned records
#   gap_tall <- gap_tall[!is.na(gap_tall$PrimaryKey), ]
#
#   # reclassify GapData from text to code
#   gap_tall$GapData <-
#     dplyr::recode(
#       gap_tall$GapData,
#       "Both" = 1, ### Probable bug here -- I do not know what survey123 codes this value as
#       "Canopy" = 2,
#       "Basal" = 3
#     )
#
#   # Look for NA values in NoCanopyGaps and NoBasalGaps, we assume they are 0
#   gap_tall <- gap_tall %>%
#     dplyr::mutate(
#       NoCanopyGaps = tidyr::replace_na(NoCanopyGaps, replace = 0),
#       NoBasalGaps = tidyr::replace_na(NoBasalGaps, replace = 0)
#     )
#
#   ## Add zero values where there is no canopy gap present on line
#   gap_tall[gap_tall$NoCanopyGaps == 1, ] <- gap_tall %>%
#     dplyr::filter(NoCanopyGaps == 1) %>%
#     tidyr::replace_na(list(
#       RecType = "C", GapStart = 0, GapEnd = 0, Gap = 0, Measure = 1
#     ))
#   gap_tall$NoCanopyGaps <- as.numeric(gap_tall$NoCanopyGaps)
#   gap_tall$NoBasalGaps <- as.numeric(gap_tall$NoBasalGaps)
#
#   ## Add zero values where there is no canopy gap present on the line, AND there is basal gap on the line
#   # Find missing records
#   gap_tall_missing_c <-
#     gap_tall %>%
#     dplyr::filter(NoCanopyGaps == 1,
#                   RecType != "C") %>%
#     dplyr::select(PrimaryKey, LineKey, RecKey, NoBasalGaps, NoCanopyGaps) %>%
#     unique() %>%
#     dplyr::mutate(RecType = "C", GapStart = 0, GapEnd = 0, Gap = 0, Measure = 1)
#
#   # Append them to gap_tall
#   gap_tall <-
#     dplyr::bind_rows(gap_tall, gap_tall_missing_c)
#
#   ## Add zero values where there is no basal gap present on line
#   gap_tall[gap_tall$NoBasalGaps == 1, ] <- gap_tall %>%
#     dplyr::filter(NoBasalGaps == 1) %>%
#     tidyr::replace_na(list(
#       RecType = "B", GapStart = 0, GapEnd = 0, Gap = 0, Measure = 1
#     ))
#
#   ## Add zero values where there is no basal gap present on the line, AND there is canopy gap on the line
#   # Find missing records
#   gap_tall_missing_b <-
#     gap_tall %>%
#     dplyr::filter(NoBasalGaps == 1,
#                   RecType != "B" | is.na(RecType)) %>%
#     dplyr::select(PrimaryKey, LineKey, RecKey, NoCanopyGaps, NoBasalGaps) %>%
#     unique() %>%
#     dplyr::mutate(RecType = "B", GapStart = 0, GapEnd = 0, Gap = 0, Measure = 1)
#
#   # Append them to gap_tall
#   gap_tall <-
#     dplyr::bind_rows(gap_tall, gap_tall_missing_b)
#
#   return(gap_tall)
# }
#

#' @export gather_gap
#' @rdname gather_gap
gather_gap <- function(dsn = NULL,
                       file_type = "gdb",
                       source,
                       tblGapHeader = NULL,
                       tblGapDetail = NULL,
                       POINT = NULL,
                       GINTERCEPT = NULL,
                       autoQC = TRUE#,
                       # Gap_0 = NULL,
                       # GapDetail_1 = NULL
                       ) {

  # Gather gap using the appropriate method
  if(toupper(source) %in% c("AIM", "TERRADAT", "DIMA")){
    gap <- gather_gap_terradat(dsn = dsn,
                               tblGapDetail = tblGapDetail,
                               tblGapHeader = tblGapHeader)
  } else if(toupper(source) %in% c("LMF", "NRI")){
    gap <- gather_gap_lmf(dsn = dsn,
                          file_type = file_type,
                          POINT = POINT,
                          GINTERCEPT = GINTERCEPT)
  # } else if(toupper(source) %in% c("SURVEY123")){
    # gap <- gather_gap_survey123(Gap_0 = Gap_0, GapDetail_1 = GapDetail_1)
  } else {
    stop("source must be AIM, TerrADat, DIMA, LMF, or NRI (all case independent)")
  }

  gap$source <- source

  if("sf" %in% class(gap)) gap <- sf::st_drop_geometry(gap)

  if (any(class(gap) %in% c("POSIXct", "POSIXt"))) {
    change_vars <- names(gap)[do.call(rbind, vapply(gap,
                                                    class))[, 1] %in% c("POSIXct", "POSIXt")]
    gap <- dplyr::mutate_at(gap, dplyr::vars(change_vars),
                            dplyr::funs(as.character))
  }

  # reorder so that primary key is leftmost column
  gap <- gap %>%
    dplyr::select(PrimaryKey, DBKey, LineKey, tidyselect::everything())

  # Drop rows with no data
  gap <- gap %>%
    dplyr::filter(!(is.na(Gap) &
                      is.na(GapEnd) &
                      is.na(GapMin) &
                      is.na(GapStart) &
                      is.na(LineKey) &
                      is.na(LineLengthAmount) &
                      is.na(Measure) &
                      is.na(RecType) &
                      is.na(SeqNo)))

  # remove duplicates and empty rows
  if(autoQC){
    message("Removing duplicated rows and rows with no essential data. Disable by adding the parameter 'autoQC = FALSE'")
    gap <- gap %>% tdact_remove_duplicates() %>% tdact_remove_empty(datatype = "gap")
  }

  return(gap)
}