Source code for zarr_vectors.core.backends

"""Pluggable storage backends.

zarr-vectors has **two** distinct backend layers, both of which can be
selected via the ``backend=`` kwarg on
:func:`zarr_vectors.core.store.create_store` /
:func:`zarr_vectors.core.store.open_store`:

1. **Byte-level KV backends** (this package): implement the
   :class:`StorageBackend` protocol.  Built-ins are ``local``,
   ``obstore``, and ``fsspec``.  Resolution and instantiation live in
   :func:`resolve_backend_name` and :func:`make_backend`.

2. **Zarr-Store-level backends**: return a ``zarr.abc.store.Store``
   directly.  Currently just ``icechunk`` (transactional, commit-based
   versioning on top of any object store).  Wired in
   :func:`zarr_vectors.core.store._make_zarr_store_with_session`.

Both kinds share the same public ``backend=`` kwarg, so callers don't
need to know which layer they're talking to.  Use ``backend="icechunk"``
for transactional cloud or local storage; use ``backend="obstore"`` /
``backend="fsspec"`` for direct byte-level cloud I/O without versioning.

Resolution order for the byte-level layer (icechunk is always explicit,
never auto-detected):

1. Explicit ``backend=`` kwarg on the public API.
2. ``ZARR_VECTORS_BACKEND`` environment variable.
3. URL-scheme auto-detect:
   - no scheme or ``file://`` → ``local``
   - cloud schemes (``s3``, ``gs``, ``gcs``, ``az``, ``azure``, ``abfs``,
     ``http``, ``https``) → ``obstore`` if installed, else ``fsspec``,
     else a :class:`~zarr_vectors.exceptions.StoreError` with an install
     hint.
"""

from __future__ import annotations

import os
from pathlib import Path
from typing import Any
from urllib.parse import urlparse

from zarr_vectors.exceptions import StoreError

from zarr_vectors.core.backends.async_base import AsyncStorageBackend
from zarr_vectors.core.backends.base import StorageBackend
from zarr_vectors.core.backends.local import LocalBackend

SCHEMES_LOCAL = frozenset({"", "file"})
SCHEMES_OBJECT_STORE = frozenset(
    {"s3", "gs", "gcs", "az", "azure", "abfs", "http", "https"}
)

_ENV_VAR = "ZARR_VECTORS_BACKEND"

__all__ = [
    "StorageBackend",
    "AsyncStorageBackend",
    "LocalBackend",
    "SCHEMES_LOCAL",
    "SCHEMES_OBJECT_STORE",
    "detect_scheme",
    "resolve_backend_name",
    "make_backend",
    "make_async_backend",
]


[docs] def detect_scheme(url: str | Path) -> str: """Return the URL scheme of ``url``, lowercased; empty string if none. A bare Windows drive letter (``C:\\foo``) is treated as local (returns ``""``), not as the scheme ``c``. """ if isinstance(url, Path): return "" if not isinstance(url, str): return "" # urlparse misreads ``C:\foo`` as scheme=='c'. Reject single-letter # schemes — no real scheme is a single character. parsed = urlparse(url) scheme = parsed.scheme.lower() if len(scheme) <= 1: return "" return scheme
def resolve_backend_name( url: str | Path, explicit: str | None = None, *, env_override: str | None = None, ) -> str: """Decide which backend to use for ``url``. Args: url: The store URL or path. explicit: User-supplied ``backend=`` kwarg. Wins if set. env_override: Override for the ``ZARR_VECTORS_BACKEND`` env var (for testing). Pass ``""`` to ignore the env var entirely. Returns: One of ``"local"``, ``"obstore"``, ``"fsspec"``. Raises: StoreError: If a cloud scheme is given but no compatible backend is installed. """ if explicit: return explicit.lower() env_val = env_override if env_override is not None else os.environ.get(_ENV_VAR) if env_val: return env_val.lower() scheme = detect_scheme(url) if scheme in SCHEMES_LOCAL: return "local" if scheme in SCHEMES_OBJECT_STORE: if _have("obstore"): return "obstore" if _have("fsspec"): return "fsspec" raise StoreError( f"URL {url!r} has scheme {scheme!r} which requires a cloud " f"backend, but neither 'obstore' nor 'fsspec' is installed. " f"Install with: pip install zarr-vectors[obstore]" ) # Unknown scheme — let local handle it; if it's broken, the backend # constructor will raise something more specific. return "local" def make_backend( url: str | Path, backend: str | None = None, *, env_override: str | None = None, **kwargs: Any, ) -> StorageBackend: """Resolve and construct the appropriate backend for ``url``. Args: url: Store URL or path. backend: Explicit backend name (``"local"`` / ``"obstore"`` / ``"fsspec"``). ``None`` means auto-detect. env_override: Test hook — see :func:`resolve_backend_name`. **kwargs: Forwarded to the backend constructor. """ name = resolve_backend_name(url, backend, env_override=env_override) if name == "local": return LocalBackend(url, **kwargs) if name == "obstore": from zarr_vectors.core.backends.obstore_backend import ObstoreBackend return ObstoreBackend(url, **kwargs) if name == "fsspec": from zarr_vectors.core.backends.fsspec_backend import FsspecBackend return FsspecBackend(url, **kwargs) raise StoreError(f"Unknown backend: {name!r}") def make_async_backend( url: str | Path, backend: str | None = None, *, env_override: str | None = None, **kwargs: Any, ) -> AsyncStorageBackend: """Resolve and construct an async-capable backend for ``url``. Each concrete backend implements both :class:`StorageBackend` and :class:`AsyncStorageBackend`, so the returned object can also be used synchronously. This entry point exists to give callers a statically-typed handle when they intend to use the async surface. Args: url: Store URL or path. backend: Explicit backend name. ``None`` means auto-detect. env_override: Test hook — see :func:`resolve_backend_name`. **kwargs: Forwarded to the backend constructor. """ return make_backend(url, backend, env_override=env_override, **kwargs) def _have(module: str) -> bool: """Return True if ``module`` is importable. Honours ``sys.modules`` overrides used in tests — a sentinel of ``None`` indicates "not available", and a stub module object (regardless of whether it has a real ``__spec__``) indicates "available". """ import importlib.util import sys if module in sys.modules: return sys.modules[module] is not None try: return importlib.util.find_spec(module) is not None except (ImportError, ValueError): return False