library(duckdbfs)
library(dplyr)
library(sf)
6: duckdb
# SQL
<- open_dataset("https://data.source.coop/cboettig/pad-us-3/pad-us3-combined.parquet")
pad
<- duckdbfs::st_read_meta("https://data.source.coop/cboettig/pad-us-3/pad-us3-combined.fgb", tblname = "pad_meta")
pad_meta pad_meta
# A tibble: 1 × 7
feature_count geom_column_name geom_type name code wkt proj4
<dbl> <chr> <chr> <chr> <chr> <chr> <chr>
1 440107 geom Unknown (any) ESRI 102039 "PROJCS[\"USA… +pro…
|>
pad filter(State_Nm == "CA") |>
group_by(FeatClass) |>
summarise(total_area = sum(SHAPE_Area),
n = n()) |>
collect()
Warning: Missing values are always removed in SQL aggregation functions.
Use `na.rm = TRUE` to silence this warning
This warning is displayed once every 8 hours.
# A tibble: 5 × 3
FeatClass total_area n
<chr> <dbl> <dbl>
1 Marine 36793465608. 214
2 Proclamation 173843820097. 324
3 Easement 7275244842. 11337
4 Fee 194547665576. 17873
5 Designation 129887995064. 1293
::load_spatial() duckdbfs
Reading in as a normal tibble, and then converting to a spatial object:
<- pad |>
ca_fee filter(State_Nm == "CA", FeatClass == "Fee") |>
collect()
|> st_as_sf(sf_column_name = "geometry", crs = pad_meta$wkt) ca_fee
Simple feature collection with 17873 features and 34 fields
Geometry type: GEOMETRY
Dimension: XY
Bounding box: xmin: -2349024 ymin: 1242361 xmax: -1646723 ymax: 2452203
Projected CRS: USA_Contiguous_Albers_Equal_Area_Conic_USGS_version
# A tibble: 17,873 × 35
FeatClass Category Own_Type Own_Name Loc_Own Mang_Type Mang_Name Loc_Mang
* <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
1 Fee Fee LOC CITY San Diego,… LOC CITY San Die…
2 Fee Fee LOC CITY San Diego,… LOC CITY San Die…
3 Fee Fee STAT OTHS University… STAT OTHS Univers…
4 Fee Fee STAT OTHS University… STAT OTHS Univers…
5 Fee Fee LOC CITY San Diego,… LOC CITY San Die…
6 Fee Fee LOC CITY San Diego,… LOC CITY San Die…
7 Fee Fee LOC CITY City of Sa… LOC CITY City of…
8 Fee Fee LOC CITY San Diego,… LOC CITY San Die…
9 Fee Fee LOC CITY San Diego,… LOC CITY San Die…
10 Fee Fee LOC CITY San Diego,… LOC CITY San Die…
# ℹ 17,863 more rows
# ℹ 27 more variables: Des_Tp <chr>, Loc_Ds <chr>, Unit_Nm <chr>, Loc_Nm <chr>,
# State_Nm <chr>, Agg_Src <chr>, GIS_Src <chr>, Src_Date <chr>,
# GIS_Acres <int>, Source_PAID <chr>, WDPA_Cd <int>, Pub_Access <chr>,
# Access_Src <chr>, Access_Dt <chr>, GAP_Sts <chr>, GAPCdSrc <chr>,
# GAPCdDt <chr>, IUCN_Cat <chr>, IUCNCtSrc <chr>, IUCNCtDt <chr>,
# Date_Est <chr>, Comments <chr>, EsmtHldr <chr>, EHoldTyp <chr>, …
Similarly, any normal data frame can be coerced to a spatial sf
object, e.g.:
data.frame(lon = c(1,2), lat=c(0,0)) |> st_as_sf(coords = c("lon", "lat"), crs=4326)
Simple feature collection with 2 features and 0 fields
Geometry type: POINT
Dimension: XY
Bounding box: xmin: 1 ymin: 0 xmax: 2 ymax: 0
Geodetic CRS: WGS 84
geometry
1 POINT (1 0)
2 POINT (2 0)
<- paste0("https://raw.githubusercontent.com/cboettig/duckdbfs/",
spatial_ex "main/inst/extdata/spatial-test.csv") |>
open_dataset(format = "csv")
|>
spatial_ex mutate(geometry = st_point(longitude, latitude)) |>
to_sf(crs = 4326)
Simple feature collection with 10 features and 3 fields
Geometry type: POINT
Dimension: XY
Bounding box: xmin: 1 ymin: 1 xmax: 10 ymax: 10
Geodetic CRS: WGS 84
site latitude longitude geom
1 a 1 1 POINT (1 1)
2 b 2 2 POINT (2 2)
3 c 3 3 POINT (3 3)
4 d 4 4 POINT (4 4)
5 e 5 5 POINT (5 5)
6 f 6 6 POINT (6 6)
7 g 7 7 POINT (7 7)
8 h 8 8 POINT (8 8)
9 i 9 9 POINT (9 9)
10 j 10 10 POINT (10 10)
<- pad |>
ca_fee filter(State_Nm == "CA", FeatClass == "Fee") |>
group_by(Own_Type) |>
summarise(total = sum(SHAPE_Area)) |> head(1000) |> collect()