R/io_write.R

Defines functions ddbs_write_vector ddbs_write_table

Documented in ddbs_write_table ddbs_write_vector

#' Write an SF Object to a DuckDB Database
#'
#' This function writes a Simple Features (SF) object into a DuckDB database as a new table.
#' The table is created in the specified schema of the DuckDB database.
#' 
#' **CRS Persistence:** \code{duckspatial} ensures CRS metadata is retained across 
#' sessions using two strategies: \strong{Native Spatial Storage} (for DuckDB 1.5.0+ 
#' databases) and \strong{Legacy Compatibility} (using column comments for older 
#' database versions). For file-based spatial data interchange, 
#' [ddbs_write_dataset()] to GeoParquet (\code{.parquet}) is recommended.
#'
#' @template conn
#' @param data A \code{sf} object to write to the DuckDB database, or the path to
#' a local file that can be read with `ST_READ`
#' @template name
#' @template overwrite
#' @param temp_view If `TRUE`, registers the `sf` object as a temporary Arrow-backed database 'view' 
#' using [ddbs_register_table] instead of creating a persistent table. This is much faster but the view 
#' will not persist. Defaults to `FALSE`.
#' @template quiet
#'
#' @returns TRUE (invisibly) for successful import
#' @export
#'
#' @examples
#' \dontrun{
#' ## load packages
#' library(duckspatial)
#' library(sf)
#'
#' # create a duckdb database in memory (with spatial extension)
#' conn <- ddbs_create_conn(dbdir = "memory")
#'
#' ## create random points
#' random_points <- data.frame(
#'   id = 1:5,
#'   x = runif(5, min = -180, max = 180),  # Random longitude values
#'   y = runif(5, min = -90, max = 90)     # Random latitude values
#' )
#'
#' ## convert to sf
#' sf_points <- st_as_sf(random_points, coords = c("x", "y"), crs = 4326)
#'
#' ## insert data into the database
#' ddbs_write_table(conn, sf_points, "points")
#'
#' ## read data back into R
#' ddbs_read_table(conn, "points")
#'
#' ## disconnect from db
#' ddbs_stop_conn(conn)
#' }
ddbs_write_table <- function(
    conn,
    data,
    name,
    overwrite = FALSE,
    temp_view = FALSE,
    quiet = FALSE
) {
    # 1. Checks
    ## Check if connection is correct
    dbConnCheck(conn)
    input_crs <- ddbs_input_crs_for_write(data, conn)

    ## Handle temp_view
    if (temp_view) {
        return(ddbs_register_table(conn, data, name, overwrite, quiet))
    }

    # Handle duckspatial_df/tbl_lazy - try cross-connection import first
    if (inherits(data, "duckspatial_df") || inherits(data, "tbl_lazy")) {
        source_conn <- get_conn_from_input(data)
        
        # Check if same connection (no import needed, just materialize)
        same_conn <- !is.null(source_conn) && identical(source_conn, conn)
        
        if (!same_conn && !is.null(source_conn)) {
            # Cross-connection: try efficient import strategies first
            # Use a temp name for the import view, we'll convert to table after
            temp_import_name <- paste0("temp_import_", gsub("-", "_", uuid::UUIDgenerate()))
            
            import_result <- tryCatch({
                import_view_to_connection(conn, source_conn, data, temp_import_name)
            }, error = function(e) NULL)
            
            if (!is.null(import_result)) {
                # Successful import - convert temp view to permanent table
                ## convenient names of table and/or schema.table
                name_list <- get_query_name(name)
                ## get schema.table available in the database
                tables_df <- ddbs_list_tables(conn)
                db_tables <- paste0(tables_df$table_schema, ".", tables_df$table_name) |>
                    sub(pattern = "^main\\.", replacement = "")
                ## Check if table name already exists
                if (name_list$query_name %in% db_tables & !overwrite)
                    cli::cli_abort("The provided name is already present in the database. Please, use `overwrite = TRUE` or choose a different name.")
                
                # Handle overwrite
                overwrite_table(name_list$query_name, conn, quiet, overwrite)
                
                # Create permanent table from the imported view
                DBI::dbExecute(conn, glue::glue(
                    "CREATE TABLE {name_list$query_name} AS SELECT * FROM {import_result$name}"
                ))
                
                # Cleanup temp view
                tryCatch(
                    DBI::dbExecute(conn, glue::glue("DROP VIEW IF EXISTS {import_result$name}")),
                    error = function(e) NULL
                )
                import_result$cleanup()
                
                if (isFALSE(quiet)) {
                    cli::cli_alert_success("Table {name_list$query_name} imported via {import_result$method}")
                }
                geom_col <- tryCatch(get_geom_name(conn, name_list$query_name), error = function(e) NULL)
                ddbs_write_legacy_crs_comment_if_needed(
                    conn,
                    name_list$query_name,
                    geom_col = geom_col,
                    crs = input_crs
                )
                return(invisible(TRUE))
            }
        }
        
        # Fallback: collect to sf and re-upload (slow but always works)
        if (inherits(data, "duckspatial_df")) {
             data <- ddbs_collect(data, as = "sf")
        } else {
             data <- dplyr::collect(data) |> sf::st_as_sf()
        }
    }

    ## convenient names of table and/or schema.table
    name_list <- get_query_name(name)
    ## get schema.table available in the database
    tables_df <- ddbs_list_tables(conn)
    db_tables <- paste0(tables_df$table_schema, ".", tables_df$table_name) |>
        sub(pattern = "^main\\.", replacement = "")
    ## Check if table name already exists
    if (name_list$query_name %in% db_tables & !overwrite)
        cli::cli_abort("The provided name is already present in the database. Please, use `overwrite = TRUE` or choose a different name.")

    # 2. Handle overwrite
    overwrite_table(name_list$query_name, conn, quiet, overwrite)

    ## 3. insert data
    if (inherits(data, "sf")) {

        # 3. Handle unsupported geometries (TOO SLOW)
        # unsupported_types <- c("GEOMETRYCOLLECTION")
        # geom_types <- unique(sf::st_geometry_type(data))
        # if (any(geom_types %in% unsupported_types)) {
        #     cli::cli_abort("Unsupported geometry types found: {paste(geom_types[geom_types %in% unsupported_types], collapse = ', ')}")
        # }

        # 4. Prepare data for writing - import as data frame with geom as binary
        ## Get geometry column name
        geom_name <- setdiff(names(data), names(sf::st_drop_geometry(data)))
        ## Extract geometry as binary and append to data frame
        wkb_data <- sf::st_as_binary(sf::st_geometry(data), EWKB = TRUE)
        data_df <- as.data.frame(data)
        data_df[[geom_name]] <- wkb_data  # Ensure raw data is preserved

        ## Get the CRS, and define the geometry type for duckdb
        geom_field <- get_geometry_type_duckdb(data, conn)

        ## Warn if no CRS was found in the input data
        if (geom_field == "GEOMETRY") {
            cli::cli_alert_warning("No CRS found in the input data. The table will be created without CRS information.")
        }

        ## Write data into DuckDB
        # duckdb::duckdb_register(conn, "temp_view", data_df, experimental = TRUE) # check later
        duckdb::dbWriteTable(
            conn, 
            DBI::Id(schema = name_list$schema_name, table = name_list$table_name), 
            data_df, 
            field.types = c(geom_name = "BLOB")
        )
 
        ## Convert to spatial with CRS
        DBI::dbExecute(conn, glue::glue("
            ALTER TABLE {name_list$query_name}
            ALTER COLUMN {geom_name} SET DATA TYPE {geom_field} USING ST_GeomFromWKB({geom_name});
        "))
        ddbs_write_legacy_crs_comment_if_needed(
            conn,
            name_list$query_name,
            geom_col = geom_name,
            crs = input_crs
        )
        # duckdb::duckdb_unregister(conn, "temp_view") |> on.exit()
        

    } else if (!is.character(data) || length(data) != 1) {
        cli::cli_abort("{.arg data} must be an {.cls sf} object, a {.cls duckspatial_df}, or a file path string.")
    } else {
        ## check file extension
        file_ext <- tools::file_ext(data)
        if (file_ext == "parquet") {
            cli::cli_abort("Direct parquet file writting is not currently supported.")
            ## Current state v1.5.0 - we can write parquet files, but the geometry is stored as STRUCT, and
            ## we cannot convert it to GEOMETRY type in duckdb
            
            ## insert data
            # DBI::dbExecute(
            #     conn,
            #     glue::glue("CREATE TABLE {name_list$query_name} AS SELECT * FROM read_parquet('{data}')")
            # )
            # ## Get the CRS
            # get_parquet_crs(data, conn)
            # ## specify geometry column
            # ## - try to get geom column name
            # metadata_df <- DBI::dbGetQuery(conn, glue::glue("DESCRIBE {name_list$query_name}"))
            # geom_name <- metadata_df$column_name[grepl("STRUCT", metadata_df$column_type)]
            # DBI::dbExecute(conn, glue::glue("
            #     ALTER TABLE {name_list$query_name}
            #     ALTER COLUMN {geom_name} SET DATA TYPE GEOMETRY USING ST_GeomFromWKB({geom_name});
            # "))
        
        } else {
            ## insert files (in duckdb v1.5, ST_Read() already manages CRS)
            DBI::dbExecute(
                conn,
                glue::glue("CREATE TABLE {name_list$query_name} AS SELECT * FROM ST_Read('{data}')")
            )
            ddbs_write_legacy_crs_comment_if_needed(
                conn,
                name_list$query_name,
                crs = input_crs
            )
        }
    }


    # 6. User feedback
    if (isFALSE(quiet)) {
        cli::cli_alert_success("Table {name_list$query_name} successfully imported")
        }

    return(invisible(TRUE))

}




#' Write an SF Object to a DuckDB Database
#'
#' @description
#' `r lifecycle::badge("deprecated")`
#'
#' `ddbs_write_vector()` was renamed to \code{\link{ddbs_write_table}}.
#'
#' @inheritParams ddbs_write_table
#' @returns TRUE (invisibly) for successful import
#' @export
#' @keywords internal
ddbs_write_vector <- function(
    conn,
    data,
    name,
    overwrite = FALSE,
    temp_view = FALSE,
    quiet = FALSE) {
    
    lifecycle::deprecate_soft(
        when    = "1.0.0",
        what    = "ddbs_write_vector()",
        with    = "ddbs_write_table()"
    )
    
    ddbs_write_table(
        conn = conn,
        data = data,
        name = name,
        overwrite = overwrite,
        temp_view = temp_view,
        quiet = quiet
    )
}

Try the duckspatial package in your browser

Any scripts or data that you put into this service are public.

duckspatial documentation built on June 22, 2026, 9:08 a.m.