R/io_read.R

Defines functions ddbs_read_meta ddbs_read_vector ddbs_read_table

Documented in ddbs_read_meta ddbs_read_table ddbs_read_vector

#' Reads a vectorial table from DuckDB into R
#'
#' Retrieves the data from a DuckDB table, view, or Arrow view with a geometry
#' column, and converts it to an R \code{sf} object. This function works with
#' both persistent tables created by \code{ddbs_write_table} and temporary
#' Arrow views created by \code{ddbs_register_table}.
#'
#' @template conn
#' @template name
#' @param clauses character, additional SQL code to modify the query from the
#' table (e.g. "WHERE ...", "ORDER BY...")
#' @template quiet
#'
#' @returns an sf object
#' @export
#'
#' @examples
#' \dontrun{
#' ## load packages
#' library(duckspatial)
#' library(sf)
#'
#' # create a duckdb database in memory (with spatial extension)
#' conn <- ddbs_create_conn(dbdir = "memory")
#'
#' ## create random points
#' random_points <- data.frame(
#'   id = 1:5,
#'   x = runif(5, min = -180, max = 180),
#'   y = runif(5, min = -90, max = 90)
#' )
#'
#' ## convert to sf
#' sf_points <- st_as_sf(random_points, coords = c("x", "y"), crs = 4326)
#'
#' ## Example 1: Write and read persistent table
#' ddbs_write_vector(conn, sf_points, "points")
#' ddbs_read_table(conn, "points")
#'
#' ## Example 2: Register and read Arrow view (faster, temporary)
#' ddbs_register_vector(conn, sf_points, "points_view")
#' ddbs_read_table(conn, "points_view")
#'
#' ## disconnect from db
#' ddbs_stop_conn(conn)
#' }
ddbs_read_table <- function(
    conn,
    name,
    clauses = NULL,
    quiet = FALSE) {
    
    # 0. Handle errors
    dbConnCheck(conn)
    assert_name(name)
    assert_name(clauses, "clauses")
    assert_logic(quiet, "quiet")
    

    # 1. Checks
    ## convenient names of table and/or schema.table
    name_list <- get_query_name(name)

    ## Check if table/view name exists in regular tables or Arrow views
    table_exists <- name_list$table_name %in% DBI::dbListTables(conn)
    object_type <- NULL

    if (table_exists) {
        # Determine if it's a table or view
        tables_df <- ddbs_list_tables(conn)
        db_tables <- paste0(tables_df$table_schema, ".", tables_df$table_name) |>
            sub(pattern = "^main\\.", replacement = "")
        match_idx <- which(db_tables == name_list$query_name)[1]
        if (!is.na(match_idx)) {
            table_type <- tables_df$table_type[match_idx]
            object_type <- if (!is.na(table_type) && identical(table_type, "VIEW")) {
                "view"
            } else {
                "table"
            }
        } else {
            object_type <- "table"
        }
    } else {
        # Check if it exists as an Arrow view
        arrow_views <- try(
            duckdb::duckdb_list_arrow(conn),
            silent = TRUE
        )
        arrow_exists <- if (inherits(arrow_views, "try-error")) {
            FALSE
        } else {
            name_list$query_name %in% arrow_views
        }

        if (!arrow_exists) {
            cli::cli_abort("The provided name is not present in the database as a table, view, or Arrow view.")
        } else {
            object_type <- "Arrow view"
        }
    }

    ## get column names and prepare SQL
    if (object_type == "Arrow view") {
        # For Arrow views, PRAGMA table_info doesn't work, so we need to get columns differently
        all_cols <- DBI::dbListFields(conn, name_list$query_name)

        # Dynamically identify geometry column (heuristic: look for standard names)
        candidates <- c("geometry", "geom", "shape", "wkb_geometry")
        geom_name <- intersect(all_cols, candidates)[1]

        # Fallback if standard name not found: find column that's not crs_duckspatial
        # The geometry column is added before crs_duckspatial, so it should be the
        # last column before crs_duckspatial (or last column if excluding crs_duckspatial)
        if (is.na(geom_name)) {
            if (length(all_cols) > 0) {
                # Take the LAST non-CRS column (geometry is added last during registration)
                geom_name <- all_cols[length(all_cols)]
            }
        }

        if (is.na(geom_name) || !geom_name %in% all_cols) {
            cli::cli_abort("Geometry column wasn't found in Arrow view <{name_list$query_name}>.")
        }

        no_geom_cols <- setdiff(all_cols, geom_name) 
        no_geom_cols <-  if (length(no_geom_cols) > 0) paste0('"', no_geom_cols, '",', collapse = ' ') else ""

        # For Arrow views: Try ST_AsWKB directly first (geoarrow may already be recognized as GEOMETRY)
        # If that fails, ST_GeomFromWKB will be needed, but geoarrow registration makes it GEOMETRY type
        select_geom_sql <- glue::glue("ST_AsWKB({geom_name}) AS {geom_name}")
    } else {
        # For regular tables and views, use get_geom_name
        geom_name    <- get_geom_name(conn, name_list$query_name)
        no_geom_cols <- get_geom_name(conn, name_list$query_name, rest = TRUE, collapse = TRUE)
        if (length(geom_name) == 0) cli::cli_abort("Geometry column wasn't found in table <{name_list$query_name}>.")

        # For regular tables: already GEOMETRY type
        select_geom_sql <- glue::glue("ST_AsWKB({geom_name}) AS {geom_name}")
    }

    # 2. Retrieve data
    ## Retrieve data as data frame
    tmp.query <- glue::glue(
            "SELECT
            {no_geom_cols}
            {select_geom_sql}
            FROM {name_list$query_name}"
    )
    tmp.query <- paste(tmp.query, clauses)
    data_tbl <- DBI::dbGetQuery(conn, tmp.query)
  
    ## Get the CRS
    crs <- get_table_crs(
        conn = conn,
        geom_name = geom_name,
        table_name = name_list$query_name
    )

    ## 5. convert to SF
    data_sf <- convert_to_sf_wkb(
        data       = data_tbl,
        crs        = crs,
        x_geom     = geom_name
    )

    ## return result
    if (isFALSE(quiet)) {
        cli::cli_alert_success("{object_type} {name} successfully imported.")
    }
    return(data_sf)

}





#' Load spatial vector data from DuckDB into R
#'
#' @description
#' `r lifecycle::badge("deprecated")`
#'
#' `ddbs_read_vector()` was renamed to \code{\link{ddbs_read_table}}.
#'
#' @inheritParams ddbs_read_table
#' @returns an sf object
#' @export
#' @keywords internal
ddbs_read_vector <- function(
    conn,
    name,
    clauses = NULL,
    quiet = FALSE) {
    
    lifecycle::deprecate_soft(
        when    = "1.0.0",
        what    = "ddbs_read_vector()",
        with    = "ddbs_read_table()"
    )
    
    ddbs_read_table(
        conn       = conn,
        name       = name,
        clauses    = clauses,
        quiet      = quiet
    )
}




#' Read metadata from a spatial file
#'
#' Retrieves file-level metadata from a spatial vector file (e.g. GeoPackage,
#' Shapefile, GeoJSON) using DuckDB's \code{ST_Read_Meta()} function. Returns
#' information about the file's driver and its layers as a tibble.
#'
#' @param path character, path to the spatial file to inspect.
#' @template conn
#'
#' @returns A \code{tibble} with one row per file and the
#'   following columns:
#'   \describe{
#'     \item{file_name}{Path to the file.}
#'     \item{driver_short_name}{Short name of the GDAL driver (e.g.
#'       \code{"GPKG"}).}
#'     \item{driver_long_name}{Full name of the GDAL driver (e.g.
#'       \code{"GeoPackage"}).}
#'     \item{layers}{A list-column of data frames, one per file, each
#'       describing the layers contained in the file. Unnest with
#'       \code{\link[tidyr]{unnest}} to access individual layer attributes
#'       such as name, geometry type, and feature count.}
#'   }
#' @export
#'
#' @examples
#' \dontrun{
#' ## Read metadata from a GeoPackage
#' meta <- ddbs_read_meta(
#'   system.file("spatial/rivers.geojson",
#'   package = "duckspatial")
#' )
#' 
#' ## View file-level metadata
#' meta
#'
#' ## Inspect layer details
#' tidyr::unnest(meta, layers)
#' }
ddbs_read_meta <- function(path, conn = NULL) {

  if (is.null(conn)) {
    target_conn <- ddbs_default_conn()
  } else {
    target_conn <- conn
  }

  DBI::dbGetQuery(target_conn, glue::glue(
    "SELECT * FROM ST_Read_Meta('{path}')"
  )) |> 
    tibble::as_tibble()

}

Try the duckspatial package in your browser

Any scripts or data that you put into this service are public.

duckspatial documentation built on June 22, 2026, 9:08 a.m.