Source code for zarr_vectors.rechunk

"""N-dimensional rechunking for zarr vectors stores.

Rechunking reorganises data so that objects sharing a common
dimension value (group, attribute bin, object ID range) are
physically contiguous on disk. This turns O(N) manifest scans
into O(1) prefix-based reads for that dimension.

Usage::

    from zarr_vectors.rechunk import rechunk, RechunkSpec

    rechunk("tracts.zarrvectors", RechunkSpec(by="group"))

    # Categorical attribute-based rechunking (one chunk per unique value):
    from zarr_vectors.rechunk import rechunk_by_attribute
    rechunk_by_attribute("cells.zarrvectors", "gene")
"""

from __future__ import annotations

from typing import Any

from zarr_vectors.rechunk.engine import rechunk
from zarr_vectors.rechunk.rebin import rebin_level
from zarr_vectors.rechunk.spec import RechunkSpec

__all__ = ["RechunkSpec", "rebin_level", "rechunk", "rechunk_by_attribute"]


[docs] def rechunk_by_attribute( store_path: str, attribute_name: str, *, output: str | None = None, spatial_chunk_shape: tuple[float, ...] | None = None, ) -> dict[str, Any]: """Rechunk a store so that one chunk == one attribute value. Categorical only — every unique value of the named per-object attribute becomes its own bin, regardless of how many unique values there are. Resulting chunk keys gain a leading dim: ``(attr_bin, z, y, x)``. Args: store_path: Source store path or URL. attribute_name: Name of a per-object attribute already present on the source store (under ``object_attributes/<name>``). output: Output path; if ``None``, rechunks in place. spatial_chunk_shape: Optional new spatial chunk shape for the output store. Returns: The summary dict produced by :func:`rechunk`. """ spec = RechunkSpec( by=f"attribute:{attribute_name}", categorical=True, spatial_chunk_shape=spatial_chunk_shape, prefix_dim_name=attribute_name, ) return rechunk(store_path, spec, output=output)