R/get_component_from_GDB.R

Defines functions get_mapunit_from_GDB get_legend_from_GDB get_component_from_GDB

Documented in get_component_from_GDB get_legend_from_GDB get_mapunit_from_GDB

#' @export 
#' @rdname fetchGDB
get_component_from_GDB <- function(dsn = "gNATSGO_CONUS.gdb",
                                   WHERE = NULL,
                                   childs = FALSE,
                                   droplevels = TRUE) {
  
  # check
  vars_co <- sf::read_sf(dsn = dsn, query = "SELECT * FROM component LIMIT 0", as_tibble = FALSE, fid_column_name = "cokey") |>
    names() |> 
    paste(collapse = "|")
  
  if (length(WHERE) > 1) {
    message("WHERE has length greater than 1, using first value")
    WHERE <- WHERE[1]
  }
  
  idx_co <- grepl(vars_co, WHERE, ignore.case = TRUE)
  if (length(idx_co) == 0) {
    idx_co <- FALSE
  }

  if (!idx_co && !is.null(WHERE)) {
    stop("the WHERE argument is not targeting the component table")
  }

  # query
  message("getting components from ", substr(WHERE, 1, 20), "...")
  qry <- paste0(
    "SELECT *

    FROM component

    WHERE ", WHERE
  )
  if (is.null(WHERE)) qry <- gsub("WHERE ", "", qry)
  co <- sf::read_sf(dsn = dsn, query = qry, as_tibble = FALSE, fid_column_name = "cokey")
  co$cokey.1 <- NULL
  
  
  # recode metadata domains
  co <- uncode(
    co,
    droplevels = droplevels
  )
  

  # childs
  if (isTRUE(childs) && nrow(co) > 0) {

    message("getting child tables")

    # exec sub query
    ## pm
    if (!is.null(WHERE)) {
      idx <- seq(0, 2e8, 1000)
      co$idx <- as.character(cut(seq_len(nrow(co)), breaks = idx))
      
      copm <- by(co, co$idx, function(x) {
        temp <- .get_copmgrp_from_GDB(dsn = dsn, x)
        })
      copm <- do.call("rbind", copm)
    } else {
      copm <- .get_copmgrp_from_GDB(dsn)
    }
    
    
    ## gmd
    if (!is.null(WHERE)) {
      idx <- seq(0, 2e8, 1000)
      co$idx <- as.character(cut(seq_len(nrow(co)), breaks = idx))
      
       cogmd <- by(co, co$idx, function(x) {
        temp <- .get_cogeomordesc_from_GDB(dsn = dsn, x)
        })
      cogmd <- do.call("rbind", cogmd)
    } else {
      cogmd <- .get_cogeomordesc_from_GDB(dsn)
    }

    
    # prep
    copm2  <- .copm_prep2(copm, "cokey")
    cogmd2 <- .cogmd_prep2(cogmd, "cokey")
    
    # merge
    co$idx <- NULL
    co <- merge(co, copm2,  by = "cokey", all.x = TRUE, sort = FALSE)
    co <- merge(co, cogmd2, by = "cokey", all.x = TRUE, sort = FALSE)
  }

  
  # done
  return(co)
}

#' @export 
#' @rdname fetchGDB
#' @param stats Return statistics (number of mapunit keys per legend; number of components, major components per mapunit, total and hydric component percentage)? Default: `FALSE`
get_legend_from_GDB <- function(dsn = "gNATSGO_CONUS.gdb",
                                WHERE = NULL,
                                droplevels = TRUE,
                                stats = FALSE) {
  
  vars_le <- sf::read_sf(dsn = dsn, query = "SELECT * FROM legend LIMIT 0", as_tibble = FALSE, fid_column_names = "lkey") |>
    names()
  
  if (!is.null(WHERE)) {
    # check
    idx_le <- grepl(paste(vars_le, collapse = "|"), WHERE, ignore.case = TRUE)

    if (! idx_le) {
      stop("the WHERE argument is not targeting the legend table")
    }
    
    # query
    qry <- paste0("SELECT * FROM legend WHERE ", WHERE)
    le <- sf::read_sf(dsn = dsn, query = qry, as_tibble = FALSE, fid_column_name = "lkey")
  } else le <- sf::read_sf(dsn = dsn, layer = "legend", as_tibble = FALSE, fid_column_name = "lkey")
  le$lkey.1 <- NULL


  if (isTRUE(stats)) {
    mu  <- sf::read_sf(dsn = dsn, query = paste0("SELECT lkey, mukey FROM mapunit WHERE lkey IN ('", paste(le$lkey, collapse = "', '"), "')"), as_tibble = FALSE, fid_column_name = "mukey")
    mu$mukey.1 <- NULL
    n_mukey <- aggregate(mukey ~ lkey, data = mu, length)
    names(n_mukey)[2] <- "n_mukey"

    le <- merge(le, n_mukey, by = "lkey", all.x = TRUE, sort = FALSE)
  }


  # recode metadata domains
  le <- uncode(le,
               droplevels = droplevels
  )


  # done
  return(le)

}

#' @export 
#' @rdname fetchGDB
get_mapunit_from_GDB <- function(dsn = "gNATSGO_CONUS.gdb",
                                 WHERE = NULL,
                                 droplevels = TRUE,
                                 stats = FALSE) {
  
  # tests
  if (!is.null(WHERE)) {
    
    vars_le <- sf::read_sf(dsn = dsn, query = "SELECT * FROM legend LIMIT 0", as_tibble = FALSE, fid_column_name = "lkey") |>
      names()
    vars_le <- vars_le[! vars_le %in% c("lkey.1")]
    idx_le <- grepl(paste(vars_le, collapse = "|"), WHERE, ignore.case = TRUE)
    
    vars_mu <- sf::read_sf(dsn = dsn, query = "SELECT * FROM mapunit LIMIT 0", as_tibble = FALSE, fid_column_name = "mukey") |>
      names()
    vars_mu <- vars_mu[! vars_mu %in% c("lkey", "mukey.1")]
    idx_mu <- grepl(paste(vars_mu, collapse = "|"), WHERE, ignore.case = TRUE)
    
    if (idx_le & idx_mu) {
      stop("the WHERE argument can not target both the legend and mapunit table at the same time")
    }
    
    if (! idx_le & ! idx_mu) {
      stop("the WHERE argument is not targeting either the legend or mapunit table")
    }
    
    # query
    
    message("getting mapunits from WHERE ", WHERE)
    
    if (idx_mu) {
      qry <- paste0("SELECT * FROM mapunit WHERE ", WHERE)
      mu  <- sf::read_sf(dsn = dsn, query = qry, as_tibble = FALSE, fid_column_name = "mukey")
      
      qry <- paste0("SELECT * FROM legend WHERE lkey IN ('", paste(unique(mu$lkey), collapse = "', '"), "')")
      le  <- sf::read_sf(dsn = dsn, query = qry, as_tibble = FALSE, fid_column_name = "lkey")
    }
    
    if (idx_le) {
      qry <- paste0("SELECT * FROM legend WHERE ", WHERE)
      le <- sf::read_sf(dsn = dsn, query = qry, as_tibble = FALSE, fid_column_name = "lkey")
      
      qry <- paste0("SELECT * FROM mapunit WHERE lkey IN ('", paste(unique(le$lkey), collapse = "', '"), "')")
      mu  <- sf::read_sf(dsn = dsn, query = qry, as_tibble = FALSE, fid_column_name = "mukey")
    }
  }


  # query
  if (is.null(WHERE)) {
    message("getting mapunits")
    mu  <- sf::read_sf(dsn = dsn, layer = "mapunit", as_tibble = FALSE, fid_column_name = "mukey")
    
    qry <- paste0("SELECT * FROM legend WHERE lkey IN ('", paste(unique(mu$lkey), collapse = "', '"), "')")
    le <- sf::read_sf(dsn = dsn, query = qry, as_tibble = FALSE, fid_column_name = "lkey")
  }
  

  le$lkey.1  <- NULL
  mu$mukey.1 <- NULL
  mu <- merge(mu, le, by = "lkey", all.x = TRUE, sort = FALSE)
  mu <- mu[order(mu$areasymbol), ]


  if (isTRUE(stats)) {
    
    if (!is.null(WHERE)) {
      idx <- c(0, rep(3000, 1000) * 1:1000)
      mu$idx <- as.character(cut(seq_len(nrow(mu)), breaks = idx))
      
      co <- by(mu, mu$idx, function(x) {
        qry <- paste0(
          "SELECT mukey, cokey, comppct_r, majcompflag, hydricrating
          
          FROM component
          
          WHERE mukey IN ('", paste(x$mukey, collapse = "', '"), "')"
          )
        message("getting components from subset ", x$idx[1])
        co <- sf::read_sf(dsn = dsn, query = qry, as_tibble = FALSE, fid_column_name = "cokey")
        })
      co <- do.call("rbind", co)
      co$cokey.1 <- NULL
      mu$idx <- NULL
    } else {
      qry <- "SELECT mukey, cokey, comppct_r, majcompflag, hydricrating
    FROM component"
      co <- sf::read_sf(dsn, query = qry, as_tibble = FALSE, fid_column_name = "cokey")
      co$cokey.1 <- NULL
    }
    
    
    if (nrow(co) > 0) {
      
      co2 <- data.table::as.data.table(co)
      . <- NULL
      comppct_r <- NULL
      hydricrating <- NULL
      cokey <- NULL
      majcompflag <- NULL
      mukey <- NULL
      co2 <- as.data.frame(co2[, .(
        pct_component = sum(comppct_r, na.rm = TRUE),
        pct_hydric    = sum((hydricrating == "Yes") * comppct_r, na.rm = TRUE),
        n_component   = length(cokey),
        n_majcompflag = sum(majcompflag == "Yes", na.rm = TRUE)
        ),
        by = mukey
      ])
      
      # co2 <- {
      #   temp <- data.frame(
      #     mukey         = as.character(0),
      #     pct_component = as.integer(0),
      #     pct_hydric    = as.integer(0),
      #     n_component   = as.integer(0),
      #     n_majcompflag = as.integer(0),
      #     stringsAsFactors = FALSE
      #   )
      # 
      #   co$df <- list(temp)[rep(1, times = nrow(co))]
      #   split(co, co$mukey, drop = TRUE) ->.;
      #   lapply(., function(x) {
      #     df               = x$df[[1]]
      #     df$mukey         = x$mukey[1]
      #     df$pct_component = sum(x$comppct_r, na.rm = TRUE)
      #     df$pct_hydric    = sum((x$hydricrating == "Yes") * x$comppct_r, na.rm = TRUE)
      #     df$n_component   = length(x$cokey)
      #     df$n_majcompflag = sum(x$majcompflag == "Yes", na.rm = TRUE)
      #     return(df)
      #   }) ->.;
      #   do.call("rbind", .) ->.;
      # }
      mu <- merge(mu, co2, by = "mukey", all.x = TRUE, sort = FALSE)
      } else {
        mu <- cbind(mu, pct_component = NA_integer_, pct_hydric = NA_integer_, n_component = NA_integer_, n_majcompflag = NA_integer_)
      }
    }


  # recode metadata domains
  mu <- uncode(
    mu,
    droplevels = droplevels
  )
  
  
  vars <- c("areasymbol", vars_mu)
  if (isTRUE(stats)) {
    mu <- mu[c(vars, "pct_component", "pct_hydric", "n_component", "n_majcompflag")]
  } else {
    mu <- mu[vars]
  }


  # done
  return(mu)

}


.get_copmgrp_from_GDB <- function(dsn = dsn, co = NULL) {

  # query copmgrp table
  qry <- paste0(
    "SELECT *

     FROM copmgrp
    
    WHERE rvindicator = 'Yes' 
    "
    
    , if (!is.null(co)) {
      paste0("AND cokey IN ", soilDB::format_SQL_in_statement(co$cokey))
    }
  )
  pmg <- sf::read_sf(dsn = dsn, query = qry, as_tibble = FALSE, fid_column_name = "copmgrpkey")
  
  
  # remove duplicate rvindicators
  dat <- as.data.frame.matrix(table(pmg$cokey, pmg$rvindicator))
  n_rvindicator <- NULL
  dat <- subset(data.frame(cokey = row.names(dat), n_rvindicator = dat$Yes), n_rvindicator > 1)
  assign('dup.compmgrp.cokeyrvindictor', value = dat, envir = get_soilDB_env())
  message("-> QC: ", formatC(nrow(dat), format = "fg", big.mark = ","), " duplicate 'representative' rvindicators in the copmgrp table. \n\tUse `get('dup.compmgrp.cokeyrvindictor', envir=get_soilDB_env())` for offending component record IDs (cokey)")
  
  pmg$rvindicator <- NULL
  pmg2 <- .flatten_gmd(pmg[c("cokey", "pmgroupname")], "cokey", "copmgrp", sep = "; ")
  pmg$pmgroupname <- NULL
  pmg <- merge(pmg, pmg2, by = "cokey", all.x = TRUE, sort = FALSE)
  
  
  # query copm table
  qry <- paste(
    "SELECT *

     FROM copm"
    
    , if (!is.null(co)) {
      paste0("WHERE copmgrpkey IN ", soilDB::format_SQL_in_statement(pmg$copmgrpkey))
    }
  )
  pm <- sf::read_sf(dsn = dsn, query = qry, as_tibble = FALSE, fid_column_name = "copmkey")

  pm <- merge(pmg, pm, by = "copmgrpkey", all.x = TRUE, sort = FALSE)
  
  pm <- data.table::as.data.table(pm)
  pm <- pm[with(pm, order(cokey, copmgrpkey, pmorder)), ]
  pm <- as.data.frame(pm)

  return(pm)
}


.get_cogeomordesc_from_GDB <- function(dsn = dsn, co = NULL) {
  
  
  # geomorphic description ----
  qry <- paste0(
    "SELECT cokey, geomftname, geomfname, geomfeatid, existsonfeat, cogeomdkey

      FROM cogeomordesc

      WHERE rvindicator = 'Yes'", 
    if (!is.null(co)) {
      paste0("AND cokey IN ", format_SQL_in_statement(co$cokey))
    })
  cogmd    <- sf::read_sf(dsn, query = qry, as_tibble = FALSE, fid_column_name = "cogeomdkey")
  cogmd_ls <- cogmd[cogmd$geomftname == "Landscape", ]
  cogmd_lf <- cogmd[cogmd$geomftname == "Landform",  ]
  cogmd_lf$geomftname <- NULL
  
  names(cogmd_ls)[names(cogmd_ls) == "geomfname"] <- "landscape"
  names(cogmd_lf)[names(cogmd_lf) == "geomfname"] <- "landform"

  
  cogmd_ls <- .flatten_gmd(cogmd_ls, "cokey", "cogeomordesc")
  
  
  
  # geomorphic components ----
  qry <- paste0(
    "SELECT cogeomdkey, geomposmntn, geomposhill, geompostrce, geomposflats
    
      FROM cosurfmorphgc ",
    
    if (!is.null(co)) {
      paste0("WHERE cogeomdkey IN ", format_SQL_in_statement(cogmd_lf$cogeomdkey))
    })
  lf_3d <- sf::read_sf(dsn, query = qry, as_tibble = FALSE, fid_column_name = "cosurfmorgckey")
  names(lf_3d)[1:5] <- gsub("geompos", "", names(lf_3d)[1:5])
  
  lf_3d <- .flatten_gmd(lf_3d, key = "cogeomdkey", table = "cosurfmorphgc")
  
  
  
  # slope shape ----
  qry <- paste0(
    "SELECT cogeomdkey, shapeacross, shapedown 

      FROM cosurfmorphss ",

    if (!is.null(co)) {
      paste0("WHERE cogeomdkey IN ", format_SQL_in_statement(cogmd_lf$cogeomdkey))
    })
  lf_ss <- sf::read_sf(dsn, query = qry, as_tibble = FALSE, fid_column_name = "cosurfmorsskey")
  
  lf_ss <- .format_slopeshape(lf_ss)
  lf_ss <- .flatten_gmd(lf_ss, "cogeomdkey", "cosurfmorphss")
  
  

  # hillslope position ----
  qry <- paste0(
    "SELECT cogeomdkey, hillslopeprof

      FROM cosurfmorphhpp ",
    
    if (!is.null(co)) {
      paste0("WHERE cogeomdkey IN ", format_SQL_in_statement(cogmd_lf$cogeomdkey))
    })
  lf_2d <- sf::read_sf(dsn, query = qry, as_tibble = FALSE, fid_column_name = "cosurfmorhppkey")
  
  lf_2d <- .flatten_gmd(lf_2d, "cogeomdkey", "cosurfmorphhpp")
  
  
  
  # merge results
  cogmd <- merge(cogmd_ls[c("cokey", "landscape")], cogmd_lf, by = "cokey", all = TRUE, sort = FALSE)
  cogmd <- merge(cogmd, lf_3d, by = "cogeomdkey", all.x = TRUE, sort = FALSE)
  cogmd <- merge(cogmd, lf_ss, by = "cogeomdkey", all.x = TRUE, sort = FALSE)
  cogmd <- merge(cogmd, lf_2d, by = "cogeomdkey", all.x = TRUE, sort = FALSE)
  cogmd$cogeomdkey <- NULL

  return(cogmd)
}


.get_chorizon_from_GDB <-
  function(dsn = "gNATSGO_CONUS.gdb",
           cokey = NULL,
           droplevels = TRUE,
           childs = FALSE) {
    
  # chorizon
  idx <- seq(0, 2e8, 3000)
  
  if (!is.null(cokey)) {
    idx2 <- as.character(cut(seq_along(cokey), breaks = idx))
    co   <- data.frame(cokey, idx2)
    
    ch <- by(co, co$idx, function(x) {
      qry <- paste0(
        "SELECT *
        FROM chorizon
        WHERE cokey IN ", format_SQL_in_statement(x$cokey)
        )
      ch  <- sf::read_sf(dsn = dsn, query = qry, as_tibble = FALSE, fid_column_name = "chkey")
    })
    ch <- do.call("rbind", ch)

  } else ch <- sf::read_sf(dsn = dsn, query = "SELECT * FROM chorizon", as_tibble = FALSE, fid_column_name = "chkey")
  ch$chkey.1 <- NULL

  
  # iterate over the threshold
  vars_rf <- c('texture', 'texcl', 'fine_gravel', 'gravel', 'cobbles', 'stones', 'boulders', 'channers', 'flagstones', 'parafine_gravel', 'paragravel', 'paracobbles', 'parastones', 'paraboulders', 'parachanners', 'paraflagstones', 'unspecified', 'total_frags_pct_nopf', 'total_frags_pct')
  if (isTRUE(childs) && nrow(ch) > 0) {
    
    idx <- seq(0, 2e8, 3000)
    
    
    if (!is.null(cokey))
      ch$idx <- as.character(cut(seq_len(nrow(ch)), breaks = idx))
    
    if (is.null(cokey))
      ch$idx <- 1
    
    temp <- by(ch, ch$idx, function(x) {

      # chtexturegrp ----
      chtg <- .get_chtexturegrp_from_GDB(
        dsn   = dsn, 
        chkey = if (!is.null(cokey)) x$chkey else NULL
        )
      
      # chtexture ----
      cht <- .get_chtexture_from_GDB(
        dsn     = dsn, 
        chtgkey = if (!is.null(cokey)) chtg$chtgkey else NULL
        )
      
      # aggregate ----
      if (nrow(chtg) > 0) {
        chtg <- aggregate(texture ~ chkey + chtgkey, data = chtg, paste0, collapse = ", ")
      } else chtg <- chtg[c("chkey", "chtgkey", "texture")]
      if (nrow(cht) > 0) {
        cht  <- aggregate(texcl   ~ chtgkey,         data = cht,  paste0, collapse = ", ")
      } else cht <- cht[c("chtgkey", "texcl")]
      
      
      # chfrags ----
      chf <- .get_chfrags_from_GDB(
        dsn   = dsn, 
        chkey = if (!is.null(cokey)) x$chkey else NULL
        )

      # merge
      ch <- merge(x, chtg, by = "chkey",   all.x = TRUE, sort = FALSE)
      ch <- merge(ch, cht, by = "chtgkey", all.x = TRUE, sort = FALSE)
      ch <- merge(ch, chf, by = "chkey",   all.x = TRUE, sort = FALSE)

      return(ch)
    })
    ch <- do.call("rbind", temp)
    ch$idx <- NULL
  } else {
    mis_df <- as.data.frame(matrix(ncol = 19, nrow = nrow(ch)))
    names(mis_df) <- vars_rf
    ch <- cbind(ch, mis_df)
  }
  
  # vars <- sf::read_sf(dsn = dsn, query = "SELECT * FROM chorizon LIMIT 0", as_tibble = FALSE, fid_column_name = "lkey") |> names() |> c(vars_rf)
  # idx <- unlist(lapply(vars, function(x) which(names(ch) %in% x)))
  # ch <- ch[idx]
  
  # # append missing columns from ch LIMIT 0
  # if (ncol(ch) < length(vars)) {
  # 
  #   mis <- vars[! vars %in% names(ch)]
  #   mis_df <- as.data.frame(matrix(ncol = length(mis), nrow = nrow(ch)))
  #   names(mis_df) <- mis
  # 
  #   idx <- 1:2
  #   mis_df[idx] <- lapply(mis_df[idx], as.character)
  #   mis_df[-idx] <- lapply(mis_df[-idx], as.numeric)
  #   
  #   ch <- cbind(ch, mis_df)
  # }
  
  # ch$texture <- tolower(ch$texture)
  # ch$texcl   <- tolower(ch$texcl)
  ch <- uncode(ch, droplevels = droplevels)

  return(ch)
}


## chtexturegrp ----
.get_chtexturegrp_from_GDB <-  function(dsn, chkey = NULL) {
  
  if (!is.null(chkey)) {
         qry  <- paste0("SELECT * FROM chtexturegrp WHERE rvindicator = 'Yes' AND chkey IN ", format_SQL_in_statement(chkey))
  } else qry <-         "SELECT * FROM chtexturegrp WHERE rvindicator = 'Yes'"
  
  chtg <- sf::read_sf(dsn = dsn, query = qry, as_tibble = FALSE, fid_column_name = "chtgkey")
}


## chtexture -----
.get_chtexture_from_GDB <- function(dsn, chtgkey = NULL) {
  
  if (!is.null(chtgkey)) {
         qry  <- paste0("SELECT * FROM chtexture WHERE chtgkey IN ", format_SQL_in_statement(chtgkey))
  } else qry <-         "SELECT * FROM chtexture"
  
  cht <- sf::read_sf(dsn = dsn, query = qry, as_tibble = FALSE, fid_column_name = "chtkey")
}
  


## chfrags -----
.get_chfrags_from_GDB <- function(dsn, chkey = NULL) {
  
  if (!is.null(chkey)) {
           qry  <- paste0("SELECT * FROM chfrags WHERE chkey IN ", format_SQL_in_statement(chkey))
    } else qry <-         "SELECT * FROM chfrags"
  
  chf <- sf::read_sf(dsn = dsn, query = qry, as_tibble = FALSE, fid_column_name = "chfragskey")
  chf <- simplifyFragmentData(chf, id.var = "chkey", vol.var = "fragvol_r", prefix = "frag")
}



#' Get a SoilProfileCollection from a SSURGO file geodatabase
#' 
#' Functions to load and flatten commonly used tables and from SSURGO file
#' geodatabases, and create soil profile collection objects (SPC).
#' 
#' These functions return data from SSURGO file geodatabases with the use of a
#' simple text string that formatted as an SQL WHERE clause (e.g. \code{WHERE =
#' "areasymbol = 'IN001'"}. Any columns within the target table can be
#' specified (except for fetchGDB() which currently can only target one table 
#' (e.g. legend, mapunit or component) at a time with the WHERE clause).
#' 
#' @aliases fetchGDB get_legend_from_GDB get_mapunit_from_GDB
#' get_component_from_GDB
#' @param dsn data source name (interpretation varies by driver - for some
#' drivers, dsn is a file name, but may also be a folder, or contain the name
#' and access credentials of a database); in case of GeoJSON, dsn may be the
#' character string holding the geojson data. It can also be an open database
#' connection.
#' @param WHERE text string formatted as an SQL WHERE clause (default: FALSE)
#' @param childs logical; if FALSE parent material and geomorphic child tables
#' are not flattened and appended
#' @param droplevels logical: indicating whether to drop unused levels in
#' classifying factors. This is useful when a class has large number of unused
#' classes, which can waste space in tables and figures.
#' @return A \code{data.frame} or \code{SoilProfileCollection} object.
#' @author Stephen Roecker
#' @keywords manip
#' @examplesIf requireNamespace("aqp", quietly = TRUE)
#' 
#' \donttest{
#' 
#' ## replace `dsn` with path to your own geodatabase (SSURGO OR gNATSGO)
#' ##
#' ##  download CONUS gNATSGO from here:
#' ##    https://nrcs.app.box.com/v/soils/folder/191790828371
#' ##
#' # dsn <- "D:/geodata/soils/gNATSGO_CONUS.gdb"
#' # le <- get_legend_from_GDB(dsn = dsn, WHERE = "areasymbol LIKE '%'")
#' # mu <- get_mapunit_from_GDB(dsn = dsn, WHERE = "muname LIKE 'Miami%'")
#' # co <- get_component_from_GDB(dsn, WHERE = "compname = 'Miami'
#' #                              AND majcompflag = 'Yes'", childs = FALSE)
#' # f_in_GDB <- fetchGDB(WHERE = "areasymbol LIKE 'IN%'")
#' 
#' }
#'
#' @export fetchGDB
fetchGDB <- function(dsn = "gNATSGO_CONUS.gdb",
                     WHERE = NULL,
                     childs = FALSE,
                     droplevels = TRUE) {
  
  if (!requireNamespace("aqp")) {
    stop("package 'aqp' is required", call. = FALSE)
  }
  
  # checks
  vars_le <- vars_le <- sf::read_sf(dsn = dsn, query = "SELECT * FROM legend LIMIT 0", as_tibble = FALSE, fid_column_name = "lkey") |> names() |> paste(collapse = "|")
  
  vars_mu <- vars_mu <- sf::read_sf(dsn = dsn, query = "SELECT * FROM mapunit LIMIT 0", as_tibble = FALSE, fid_column_name = "mukey") |> names() 
  vars_mu <- vars_mu[! vars_mu %in% "lkey"] |> paste(collapse = "|")
  
  vars_co <- vars_co <- sf::read_sf(dsn = dsn, query = "SELECT * FROM component LIMIT 0", as_tibble = FALSE, fid_column_name = "cokey") |> names()
  vars_co <- vars_co[! vars_co %in% "mukey"] |> paste(collapse = "|")

  if (!is.null(WHERE)) {
    idx_le <- grepl(vars_le, WHERE, ignore.case = TRUE)
    idx_mu <- grepl(vars_mu, WHERE, ignore.case = TRUE)
    idx_co <- grepl(vars_co, WHERE, ignore.case = TRUE)
    
    if (sum(c(idx_le, idx_mu, idx_co)) > 1) {
      stop("WHERE can only target 1 table at a time")
    }
  } else {
    idx_le <- FALSE
    idx_mu <- FALSE
    idx_co <- FALSE
  }
  

  # target legend table ----
  if (idx_le) {
    le <- get_legend_from_GDB(dsn = dsn,
                              WHERE = WHERE,
                              stats = FALSE)
    
    qry <- paste0("lkey IN ('", paste(le$lkey, collapse = "', '"), "')")
    mu <- suppressMessages(get_mapunit_from_GDB(dsn = dsn, WHERE = qry))
    mu <- mu[order(mu$areasymbol), ]
  }
  
  # target mapunit table ----
  if (idx_mu) {
    mu <- suppressMessages(get_mapunit_from_GDB(dsn = dsn, WHERE = WHERE))
    mu <- mu[order(mu$areasymbol), ]
  }

  # get components
  if (idx_le || idx_mu) {
    
    if (idx_le) {
      mu$idx <- mu$areasymbol
    }
    
    if (idx_mu) {
      mu$idx <- "1"
    }
    
    temp <- by(mu, mu$idx, function(x) {
      if (idx_le) {
        message("getting components and horizons from areasymbol = '", unique(x$idx), "'")
      } else{
        message("getting components and horizons from ", WHERE)
      }
      
      # components
      # idx <- c(0, rep(375, 15) * 1:15)
      idx <- seq(0, 2e8, 3000)
      x$idx <- as.character(cut(seq_len(nrow(x)), breaks = idx))
      co <- by(x, x$idx, function(x2) {
        qry <- paste0("mukey IN ('", paste(x2$mukey, collapse = "', '"), "')")
        tryCatch({
          co  <- get_component_from_GDB(
            dsn        = dsn,
            WHERE      = qry,
            childs     = childs,
            droplevels = droplevels
          )
        }, error = function(err) {
          print(paste("Error occured: ", err))
          return(NULL)
        })
      })
      co <- do.call("rbind", co)
      co$idx <- NULL
      
      # horizons
      tryCatch({
        h   <- .get_chorizon_from_GDB(dsn = dsn, cokey = co$cokey)
      }, error = function(err) {
        print(paste("Error occured: ", err))
        return(NULL)
      })
      
      return(list(co = co, h = h))
    })
    
    co <- as.data.frame(data.table::rbindlist(lapply(temp, function(x) x$co)))
    h  <- as.data.frame(data.table::rbindlist(lapply(temp, function(x) x$h)))
  } 
  
  if (idx_co || is.null(WHERE)) {

    message("getting components and horizons from ", WHERE)

    # target component table ----
    co <- get_component_from_GDB(
      dsn        = dsn,
      WHERE      = WHERE,
      childs     = childs,
      droplevels = droplevels
    )
  
    # horizons
    if (is.null(WHERE)) {
      cokey <- NULL
    } else {
      cokey <- co$cokey
    }
    
    h <- .get_chorizon_from_GDB(dsn = dsn,
                                cokey = cokey,
                                childs = childs)
  }
  
  le$lkey.1  <- NULL
  mu$mukey.1 <- NULL
  co$cokey.1 <- NULL
  h$chkey.1  <- NULL
  
  if (nrow(co) > 0) {
    f <- merge(co["cokey"], h, by = "cokey", all.x = TRUE, sort = FALSE)
    f <- f[order(f$cokey), ]
    aqp::depths(f) <- cokey ~ hzdept_r + hzdepb_r
    aqp::site(f) <- co
  } else {
    f <- aqp::SoilProfileCollection(idcol = "cokey", 
                                    site = data.frame(cokey = character()),
                                    horizons =  data.frame(cokey = character(), 
                                                           hzdept_r = integer(), 
                                                           hzdepb_r = integer(),
                                                           hzID = character()),
                                    depthcols = c("hzdept_r", "hzdepb_r"))
    
  }
  
  return(f)
}

Try the soilDB package in your browser

Any scripts or data that you put into this service are public.

soilDB documentation built on April 3, 2026, 9:07 a.m.