Quickstart
cozip in R.
Write archives, read them back, query them straight from DuckDB. Copy, paste, run.
install.packages('cozip', repos = c( 'https://asterisk-labs.r-universe.dev', 'https://cloud.r-project.org' ))Write
cozip::create takes an arrow::Table with name and path columns. Anything else rides along as user-defined manifest columns.
tmp <- tempfile() dir.create(tmp) paths <- character() for (i in 0:2) { p <- file.path(tmp, sprintf("file_%04d.bin", i)) writeBin(rep(charToRaw("hello cozip\n"), 1000), p) paths <- c(paths, p) } tbl <- arrow::arrow_table( name = basename(paths), path = paths ) cozip::create("dataset.zip", tbl)
Read
cozip::read works on a local path or a remote URL with the same call. You get a data.frame plus an injected cozip:gdal_vsi column ready for terra or sf.
# local archive df <- cozip::read("dataset.zip") # or remote, no full download, two range requests under the hood df <- cozip::read( paste0( "https://huggingface.co/datasets/Major-TOM/Core-VIIRS-Nighttime-Light/", "resolve/main/2024/MAJORTOM-VIIRS-NTL_2024_median_000.zip" ) ) # hand the cozip:gdal_vsi path straight to terra r <- terra::rast(df[["cozip:gdal_vsi"]][1])
Query with DuckDB
The cozip community extension reads the same archive over SQL. Pair it with the duckdb R package, or run it straight from the DuckDB CLI.
-- one-time install INSTALL cozip FROM community; LOAD cozip; -- hello world, first 10 entries of the manifest SELECT * FROM read_cozip('https://huggingface.co/datasets/Major-TOM/Core-VIIRS-Nighttime-Light/resolve/main/2024/MAJORTOM-VIIRS-NTL_2024_median_000.zip') LIMIT 10; -- raw manifest, without the injected /vsisubfile/ column SELECT * FROM read_cozip( 'https://huggingface.co/datasets/Major-TOM/Core-VIIRS-Nighttime-Light/resolve/main/2024/MAJORTOM-VIIRS-NTL_2024_median_000.zip', gdal_vsi := false ) LIMIT 10; -- filter the manifest, keep the /vsisubfile/ paths for the biggest tifs SELECT name, "cozip:gdal_vsi", size FROM read_cozip('https://huggingface.co/datasets/Major-TOM/Core-VIIRS-Nighttime-Light/resolve/main/2024/MAJORTOM-VIIRS-NTL_2024_median_000.zip') WHERE name LIKE '%.tif' ORDER BY size DESC LIMIT 5;