context("data interface")
sc <- testthat_spark_connection()
test_rdd_with_non_spatial_attrs <- invoke_new(
sc,
"org.apache.sedona.core.spatialRDD.PointRDD",
java_context(sc),
test_data("arealm-small.csv"),
1L, # offset
sc$state$enums$delimiter$csv,
TRUE,
1L, # numPartitions
sc$state$object_cache$storage_levels$memory_only
) %>%
sparklyr.sedona:::new_spatial_rdd("point")
expect_result_matches_original <- function(pt_rdd) {
expect_equal(
pt_rdd %>% invoke("%>%", list("rawSpatialRDD"), list("count")),
test_rdd_with_non_spatial_attrs$.jobj %>%
invoke("%>%", list("rawSpatialRDD"), list("count"))
)
expect_geom_equal(
sc,
test_rdd_with_non_spatial_attrs$.jobj %>%
invoke("%>%", list("rawSpatialRDD"), list("takeOrdered", 5L), list("toArray")),
pt_rdd %>%
invoke("%>%", list("rawSpatialRDD"), list("takeOrdered", 5L), list("toArray"))
)
}
test_that("sedona_read_dsv_to_typed_rdd() creates PointRDD correctly", {
pt_rdd <- sedona_read_dsv_to_typed_rdd(
sc,
location = test_data("arealm-small.csv"),
delimiter = ",",
type = "point",
first_spatial_col_index = 1,
has_non_spatial_attrs = TRUE
)
expect_equal(class(pt_rdd), c("point_rdd", "spatial_rdd"))
expect_equal(pt_rdd$.jobj %>% invoke("approximateTotalCount"), 3000)
expect_boundary_envelope(pt_rdd, c(-173.120769, -84.965961, 30.244859, 71.355134))
for (idx in 0:8) {
expect_equal(
pt_rdd$.jobj %>%
invoke(
"%>%",
list("rawSpatialRDD"),
list("take", 9L),
list("get", idx),
list("getUserData")
),
"testattribute0\ttestattribute1\ttestattribute2"
)
}
})
test_that("sedona_read_dsv_to_typed_rdd() creates PolygonRDD correctly", {
polygon_rdd <- sedona_read_dsv_to_typed_rdd(
sc,
location = test_data("primaryroads-polygon.csv"),
delimiter = ",",
type = "polygon",
first_spatial_col_index = 0,
has_non_spatial_attrs = FALSE
)
expect_equal(class(polygon_rdd), c("polygon_rdd", "spatial_rdd"))
expect_equal(polygon_rdd$.jobj %>% invoke("approximateTotalCount"), 3000)
expect_boundary_envelope(polygon_rdd, c(-158.104182, -66.03575, 17.986328, 48.645133))
})
test_that("sedona_read_dsv_to_typed_rdd() creates LineStringRDD correctly", {
linestring_rdd <- sedona_read_dsv_to_typed_rdd(
sc,
location = test_data("primaryroads-linestring.csv"),
delimiter = ",",
type = "linestring",
first_spatial_col_index = 0,
has_non_spatial_attrs = FALSE
)
expect_equal(class(linestring_rdd), c("linestring_rdd", "spatial_rdd"))
expect_equal(linestring_rdd$.jobj %>% invoke("approximateTotalCount"), 3000)
expect_boundary_envelope(linestring_rdd, c(-123.393766, -65.648659, 17.982169, 49.002374))
})
test_that("sedona_read_geojson_to_typed_rdd() creates PointRDD correctly", {
pt_rdd <- sedona_read_geojson_to_typed_rdd(
sc,
location = test_data("arealm-small.json"),
type = "point",
has_non_spatial_attrs = TRUE
)
expect_equal(class(pt_rdd), c("point_rdd", "spatial_rdd"))
expect_equal(pt_rdd$.jobj %>% invoke("approximateTotalCount"), 3000)
expect_boundary_envelope(pt_rdd, c(-173.1208, -84.9660, 30.2449, 71.3551))
for (idx in 0:8) {
expect_equal(
pt_rdd$.jobj %>%
invoke(
"%>%",
list("rawSpatialRDD"),
list("take", 9L),
list("get", idx),
list("getUserData")
),
"testattribute0\ttestattribute1\ttestattribute2"
)
}
})
test_that("sedona_read_geojson_to_typed_rdd() creates PolygonRDD correctly", {
polygon_rdd <- sedona_read_geojson_to_typed_rdd(
sc,
location = test_data("polygon.json"),
type = "polygon",
has_non_spatial_attrs = TRUE
)
expect_equal(class(polygon_rdd), c("polygon_rdd", "spatial_rdd"))
expect_equal(polygon_rdd$.jobj %>% invoke("approximateTotalCount"), 1001)
expect_false(is.null(polygon_rdd$.jobj %>% invoke("boundaryEnvelope")))
first_2 <- polygon_rdd$.jobj %>%
invoke("%>%", list("rawSpatialRDD"), list("take", 2L))
expected_data <- c(
"01\t077\t011501\t5\t1500000US010770115015\t010770115015\t5\tBG\t6844991\t32636",
"01\t045\t021102\t4\t1500000US010450211024\t010450211024\t4\tBG\t11360854\t0"
)
for (i in seq_along(expected_data)) {
expect_equal(
first_2[[i]] %>% invoke("getUserData"),
expected_data[[i]]
)
}
expect_equal(
polygon_rdd$.jobj %>%
invoke("%>%", list("fieldNames"), list("toArray")) %>%
unlist(),
c("STATEFP", "COUNTYFP", "TRACTCE", "BLKGRPCE", "AFFGEOID", "GEOID", "NAME", "LSAD", "ALAND", "AWATER")
)
})
test_that("sedona_read_geojson() works as expected on geojson input with 'type' and 'geometry' properties", {
geojson_rdd <- sedona_read_geojson(sc, test_data("polygon.json"))
expect_equal(
geojson_rdd$.jobj %>% invoke("%>%", list("rawSpatialRDD"), list("count")), 1001
)
})
test_that("sedona_read_geojson() works as expected on geojson input without 'type' or 'geometry' properties", {
geojson_rdd <- sedona_read_geojson(sc, test_data("polygon-without-properties.json"))
expect_equal(
geojson_rdd$.jobj %>% invoke("%>%", list("rawSpatialRDD"), list("count")), 10
)
})
test_that("sedona_read_geojson() works as expected on geojson input with null property value", {
geojson_rdd <- sedona_read_geojson(sc, test_data("polygon-with-null-property-value.json"))
expect_equal(
geojson_rdd$.jobj %>% invoke("%>%", list("rawSpatialRDD"), list("count")), 3
)
})
test_that("sedona_read_geojson() can skip invalid geometries correctly", {
geojson_rdd <- sedona_read_geojson(
sc,
test_data("polygon-with-invalid-geometries.json"),
allow_invalid_geometries = TRUE,
skip_syntactically_invalid_geometries = FALSE
)
expect_equal(
geojson_rdd$.jobj %>% invoke("%>%", list("rawSpatialRDD"), list("count")), 3
)
geojson_rdd <- sedona_read_geojson(
sc,
test_data("polygon-with-invalid-geometries.json"),
allow_invalid_geometries = FALSE,
skip_syntactically_invalid_geometries = FALSE
)
expect_equal(
geojson_rdd$.jobj %>% invoke("%>%", list("rawSpatialRDD"), list("count")), 2
)
})
test_that("sedona_read_wkb() works as expected", {
wkb_rdd <- sedona_read_wkb(sc, test_data("county_small_wkb.tsv"))
expect_equal(
wkb_rdd$.jobj %>% invoke("%>%", list("rawSpatialRDD"), list("count")), 103
)
})
test_that("sedona_read_shapefile_to_typed_rdd() creates PointRDD correctly", {
pt_rdd <- sedona_read_shapefile_to_typed_rdd(
sc,
location = test_data("point"),
type = "point"
)
expect_equal(class(pt_rdd), c("point_rdd", "spatial_rdd"))
expect_equal(
pt_rdd$.jobj %>% invoke("%>%", list("rawSpatialRDD"), list("count")),
100000
)
})
test_that("sedona_read_shapefile_to_typed_rdd() creates PolygonRDD correctly", {
polygon_rdd <- sedona_read_shapefile_to_typed_rdd(
sc,
location = test_data("polygon"),
type = "polygon"
)
expect_equal(class(polygon_rdd), c("polygon_rdd", "spatial_rdd"))
expect_equal(
polygon_rdd$.jobj %>% invoke("%>%", list("rawSpatialRDD"), list("count")),
20069
)
})
test_that("sedona_read_shapefile_to_typed_rdd() creates LineStringRDD correctly", {
linestring_rdd <- sedona_read_shapefile_to_typed_rdd(
sc,
location = test_data("polyline"),
type = "linestring"
)
expect_equal(class(linestring_rdd), c("linestring_rdd", "spatial_rdd"))
expect_equal(
linestring_rdd$.jobj %>% invoke("%>%", list("rawSpatialRDD"), list("count")),
15137
)
})
test_that("sedona_read_shapefile() works as expected", {
wkb_rdd <- sedona_read_shapefile(sc, test_data("polygon"))
expect_equal(
wkb_rdd$.jobj %>% invoke("%>%", list("rawSpatialRDD"), list("count")), 10000
)
})
test_that("sedona_write_wkb() works as expected", {
output_location <- tempfile()
sedona_write_wkb(test_rdd_with_non_spatial_attrs, output_location)
pt_rdd <- invoke_new(
sc,
"org.apache.sedona.core.spatialRDD.PointRDD",
java_context(sc),
output_location,
0L, # offset
sc$state$enums$delimiter$wkb,
TRUE,
1L, # numPartitions
sc$state$object_cache$storage_levels$memory_only
)
expect_result_matches_original(pt_rdd)
})
test_that("sedona_write_wkt() works as expected", {
output_location <- tempfile()
sedona_write_wkt(test_rdd_with_non_spatial_attrs, output_location)
pt_rdd <- invoke_new(
sc,
"org.apache.sedona.core.spatialRDD.PointRDD",
java_context(sc),
output_location,
0L, # offset
sc$state$enums$delimiter$wkt,
TRUE,
1L, # numPartitions
sc$state$object_cache$storage_levels$memory_only
)
expect_result_matches_original(pt_rdd)
})
test_that("sedona_write_geojson() works as expected", {
output_location <- tempfile()
sedona_write_geojson(test_rdd_with_non_spatial_attrs, output_location)
pt_rdd <- invoke_new(
sc,
"org.apache.sedona.core.spatialRDD.PointRDD",
java_context(sc),
output_location,
0L, # offset
sc$state$enums$delimiter$geojson,
TRUE,
1L, # numPartitions
sc$state$object_cache$storage_levels$memory_only
)
expect_result_matches_original(pt_rdd)
})
test_that("sedona_save_spatial_rdd() works as expected", {
for (fmt in c("wkb", "wkt", "geojson")) {
location <- tempfile(pattern = "pt_", fileext = paste0(".", fmt))
copy_to(
sc, tibble::tibble(id = 1, name = "a point", type = "point")
) %>%
dplyr::mutate(
pt = dplyr::sql(
"ST_Point(CAST(123 AS Decimal(24, 20)), CAST(456 AS Decimal(24, 20)))"
)
) %>%
sedona_save_spatial_rdd(
spatial_col = "pt", output_location = location, output_format = fmt
)
sdf <- do.call(paste0("sedona_read_", fmt), list(sc, location))
expect_equal(
sdf$.jobj %>% invoke("%>%", list("rawSpatialRDD"), list("count")), 1
)
pts <- sdf$.jobj %>%
invoke("%>%", list("rawSpatialRDD"), list("takeOrdered", 1L))
expect_equal(pts[[1]] %>% invoke("getX"), 123)
expect_equal(pts[[1]] %>% invoke("getY"), 456)
expect_equal(pts[[1]] %>% invoke("getUserData"), "1.0\ta point\tpoint")
}
})
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.