Read

read(url) returns the manifest as an array of plain row objects, your custom columns included. Filter however you like, then range-fetch the matching files using the offset and size the writer added.

// run anywhere: from esm.sh in the browser, or "@asterisk-labs/cozip" after npm install
import { read } from "https://esm.sh/@asterisk-labs/cozip";

const url = "https://data.source.coop/asterisk-labs/cozip/dataset.zip";
const manifest = await read(url);
console.log(manifest);

// filter on your own columns, then range-fetch the matching files
const trains = manifest.filter((row) => row.split === "train");
for (const row of trains) {
  const res = await fetch(url, {
    headers: { Range: `bytes=${row.offset}-${row.offset + row.size - 1}` },
  });
  const bytes = new Uint8Array(await res.arrayBuffer());
  console.log(row.name, bytes.length, "bytes");
}