6: duckdb

Author

Carl Boettiger

Published

March 6, 2024

library(duckdbfs)
library(dplyr)
library(sf)
# SQL

pad <- open_dataset("https://data.source.coop/cboettig/pad-us-3/pad-us3-combined.parquet")

pad_meta <- duckdbfs::st_read_meta("https://data.source.coop/cboettig/pad-us-3/pad-us3-combined.fgb", tblname = "pad_meta")
pad_meta
# A tibble: 1 × 7
  feature_count geom_column_name geom_type     name  code   wkt            proj4
          <dbl> <chr>            <chr>         <chr> <chr>  <chr>          <chr>
1        440107 geom             Unknown (any) ESRI  102039 "PROJCS[\"USA… +pro…
pad |> 
  filter(State_Nm == "CA") |> 
  group_by(FeatClass) |> 
  summarise(total_area = sum(SHAPE_Area),
            n = n()) |>
  collect()
Warning: Missing values are always removed in SQL aggregation functions.
Use `na.rm = TRUE` to silence this warning
This warning is displayed once every 8 hours.
# A tibble: 5 × 3
  FeatClass       total_area     n
  <chr>                <dbl> <dbl>
1 Marine        36793465608.   214
2 Proclamation 173843820097.   324
3 Easement       7275244842. 11337
4 Fee          194547665576. 17873
5 Designation  129887995064.  1293
duckdbfs::load_spatial()

Reading in as a normal tibble, and then converting to a spatial object:

ca_fee <- pad |> 
  filter(State_Nm == "CA", FeatClass == "Fee") |> 
  collect()

ca_fee |> st_as_sf(sf_column_name = "geometry", crs = pad_meta$wkt)
Simple feature collection with 17873 features and 34 fields
Geometry type: GEOMETRY
Dimension:     XY
Bounding box:  xmin: -2349024 ymin: 1242361 xmax: -1646723 ymax: 2452203
Projected CRS: USA_Contiguous_Albers_Equal_Area_Conic_USGS_version
# A tibble: 17,873 × 35
   FeatClass Category Own_Type Own_Name Loc_Own     Mang_Type Mang_Name Loc_Mang
 * <chr>     <chr>    <chr>    <chr>    <chr>       <chr>     <chr>     <chr>   
 1 Fee       Fee      LOC      CITY     San Diego,… LOC       CITY      San Die…
 2 Fee       Fee      LOC      CITY     San Diego,… LOC       CITY      San Die…
 3 Fee       Fee      STAT     OTHS     University… STAT      OTHS      Univers…
 4 Fee       Fee      STAT     OTHS     University… STAT      OTHS      Univers…
 5 Fee       Fee      LOC      CITY     San Diego,… LOC       CITY      San Die…
 6 Fee       Fee      LOC      CITY     San Diego,… LOC       CITY      San Die…
 7 Fee       Fee      LOC      CITY     City of Sa… LOC       CITY      City of…
 8 Fee       Fee      LOC      CITY     San Diego,… LOC       CITY      San Die…
 9 Fee       Fee      LOC      CITY     San Diego,… LOC       CITY      San Die…
10 Fee       Fee      LOC      CITY     San Diego,… LOC       CITY      San Die…
# ℹ 17,863 more rows
# ℹ 27 more variables: Des_Tp <chr>, Loc_Ds <chr>, Unit_Nm <chr>, Loc_Nm <chr>,
#   State_Nm <chr>, Agg_Src <chr>, GIS_Src <chr>, Src_Date <chr>,
#   GIS_Acres <int>, Source_PAID <chr>, WDPA_Cd <int>, Pub_Access <chr>,
#   Access_Src <chr>, Access_Dt <chr>, GAP_Sts <chr>, GAPCdSrc <chr>,
#   GAPCdDt <chr>, IUCN_Cat <chr>, IUCNCtSrc <chr>, IUCNCtDt <chr>,
#   Date_Est <chr>, Comments <chr>, EsmtHldr <chr>, EHoldTyp <chr>, …

Similarly, any normal data frame can be coerced to a spatial sf object, e.g.:

data.frame(lon = c(1,2), lat=c(0,0)) |> st_as_sf(coords = c("lon", "lat"), crs=4326)
Simple feature collection with 2 features and 0 fields
Geometry type: POINT
Dimension:     XY
Bounding box:  xmin: 1 ymin: 0 xmax: 2 ymax: 0
Geodetic CRS:  WGS 84
     geometry
1 POINT (1 0)
2 POINT (2 0)
spatial_ex <- paste0("https://raw.githubusercontent.com/cboettig/duckdbfs/",
                     "main/inst/extdata/spatial-test.csv") |>
  open_dataset(format = "csv") 

spatial_ex |>
  mutate(geometry = st_point(longitude, latitude)) |>
  to_sf(crs = 4326)
Simple feature collection with 10 features and 3 fields
Geometry type: POINT
Dimension:     XY
Bounding box:  xmin: 1 ymin: 1 xmax: 10 ymax: 10
Geodetic CRS:  WGS 84
   site latitude longitude          geom
1     a        1         1   POINT (1 1)
2     b        2         2   POINT (2 2)
3     c        3         3   POINT (3 3)
4     d        4         4   POINT (4 4)
5     e        5         5   POINT (5 5)
6     f        6         6   POINT (6 6)
7     g        7         7   POINT (7 7)
8     h        8         8   POINT (8 8)
9     i        9         9   POINT (9 9)
10    j       10        10 POINT (10 10)
ca_fee <- pad |> 
  filter(State_Nm == "CA", FeatClass == "Fee") |> 
  group_by(Own_Type) |>
  summarise(total = sum(SHAPE_Area)) |> head(1000) |> collect()