Source code for scitex_container.apptainer._store

#!/usr/bin/env python3
# Timestamp: "2026-05-24"
# File: src/scitex_container/apptainer/_store.py
"""Timestamped (sif, lock) artifact store for reproducible builds.

Artifact layout (per the operator-approved design)::

    <root>/containers/
    ├── <layer>.sif  ->  <layer>/<layer>-<ts>.sif        (latest symlink)
    └── <layer>/
        ├── <layer>-<ts>.sif        # canonical artifact (one per <ts>)
        ├── <layer>-<ts>.lock        # paired lock (full version set)
        ├── <layer>-<ts>.def         # locked def (used to reproduce)
        ├── <layer>-<ts>.verified    # OR .unverified (round-trip result)
        ├── <layer>-<ts>.keep        # optional: prune-protect dotfile
        ├── <layer>-<ts>.build.log
        └── ... older <ts> sets (pruned to retain N)

The ``<ts>`` correspondence between the SIF and its lock is the
invariant: a build's lock reproduces exactly that build's SIF.

This module owns the on-disk shape only (paths, symlink, markers,
retention). The round-trip orchestration lives in ``_reproducible.py``;
lock capture + locked-def generation live in ``_lockgen.py``.
"""

from __future__ import annotations

import datetime as _dt
import logging
import re
from dataclasses import dataclass
from pathlib import Path

from scitex_container._compat import supports_return_as

logger = logging.getLogger(__name__)

# <layer>-<YYYY-MMDD-HHMMSS>.sif — the timestamp shape mirrors the
# existing build-log timestamp (``_dt.strftime("%Y-%m%d-%H%M%S")``).
_TS_RE = r"\d{4}-\d{4}-\d{6}"


def timestamp() -> str:
    """Return a fresh build timestamp string (``YYYY-MMDD-HHMMSS``)."""
    return _dt.datetime.now().strftime("%Y-%m%d-%H%M%S")


@dataclass(frozen=True)
class ArtifactPaths:
    """Resolved paths for one timestamped build of a layer.

    Every artifact for a single ``(layer, ts)`` build, plus the
    layer-level ``latest`` symlink, derived from the output root.
    """

    root: Path  # containers/ dir
    layer: str
    ts: str

    @property
    def layer_dir(self) -> Path:
        return self.root / self.layer

    @property
    def sif(self) -> Path:
        return self.layer_dir / f"{self.layer}-{self.ts}.sif"

    @property
    def lock(self) -> Path:
        return self.layer_dir / f"{self.layer}-{self.ts}.lock"

    @property
    def locked_def(self) -> Path:
        return self.layer_dir / f"{self.layer}-{self.ts}.def"

    @property
    def verified_marker(self) -> Path:
        return self.layer_dir / f"{self.layer}-{self.ts}.verified"

    @property
    def unverified_marker(self) -> Path:
        return self.layer_dir / f"{self.layer}-{self.ts}.unverified"

    @property
    def keep_marker(self) -> Path:
        return self.layer_dir / f"{self.layer}-{self.ts}.keep"

    @property
    def build_log(self) -> Path:
        return self.layer_dir / f"{self.layer}-{self.ts}.build.log"

    @property
    def latest_symlink(self) -> Path:
        return self.root / f"{self.layer}.sif"


def artifact_paths(
    root: str | Path,
    layer: str,
    ts: str,
) -> ArtifactPaths:
    """Build an ``ArtifactPaths`` for ``(layer, ts)`` under ``root``."""
    return ArtifactPaths(root=Path(root), layer=layer, ts=ts)


[docs] @supports_return_as def list_builds(root: str | Path, layer: str) -> list[dict]: """List the timestamped builds of ``layer`` under ``root``. Parameters ---------- root : str or Path The ``containers/`` directory. layer : str Layer name (e.g. ``sac-base``). Returns ------- list[dict] One dict per build with keys ``ts``, ``sif``, ``lock``, ``def``, ``verified`` (bool|None — None when no marker yet), ``keep`` (bool), ``active`` (bool — the current ``latest`` symlink target). Sorted newest ``ts`` first. """ root = Path(root) layer_dir = root / layer if not layer_dir.is_dir(): return [] pat = re.compile(rf"^{re.escape(layer)}-({_TS_RE})\.sif$") active_ts = _active_ts(root, layer) builds: list[dict] = [] for sif in layer_dir.glob(f"{layer}-*.sif"): m = pat.match(sif.name) if not m: continue ts = m.group(1) ap = artifact_paths(root, layer, ts) verified: bool | None if ap.verified_marker.exists(): verified = True elif ap.unverified_marker.exists(): verified = False else: verified = None builds.append( { "ts": ts, "sif": str(ap.sif), "lock": str(ap.lock) if ap.lock.exists() else None, "def": str(ap.locked_def) if ap.locked_def.exists() else None, "verified": verified, "keep": ap.keep_marker.exists(), "active": ts == active_ts, } ) builds.sort(key=lambda b: b["ts"], reverse=True) return builds
def _active_ts(root: Path, layer: str) -> str | None: """Return the ``<ts>`` the layer ``latest`` symlink points at, if any.""" link = root / f"{layer}.sif" if not link.is_symlink(): return None target = link.readlink() m = re.match( rf"^{re.escape(layer)}/{re.escape(layer)}-({_TS_RE})\.sif$", str(target) ) if m: return m.group(1) # Fall back to matching the resolved name in case the link was # written as an absolute or oddly-shaped target. m2 = re.match(rf"^{re.escape(layer)}-({_TS_RE})\.sif$", Path(str(target)).name) return m2.group(1) if m2 else None
[docs] @supports_return_as def point_latest(root: str | Path, layer: str, ts: str) -> Path: """Point the layer ``latest`` symlink at the ``(layer, ts)`` build. Writes ``<root>/<layer>.sif -> <layer>/<layer>-<ts>.sif`` (a relative target so the store stays relocatable). Atomically replaces any existing symlink/file. Returns ------- Path The symlink path. """ root = Path(root) ap = artifact_paths(root, layer, ts) if not ap.sif.exists(): raise FileNotFoundError(f"Build artifact not found: {ap.sif}") link = ap.latest_symlink rel_target = Path(layer) / f"{layer}-{ts}.sif" tmp = root / f".{layer}.sif.tmp.{ts}" if tmp.is_symlink() or tmp.exists(): tmp.unlink() tmp.symlink_to(rel_target) # os.replace on a symlink path performs an atomic rename. import os as _os _os.replace(tmp, link) logger.info("Pointed %s.sif -> %s", layer, rel_target) return link
[docs] @supports_return_as def prune(root: str | Path, layer: str, retain: int) -> list[str]: """Prune old builds of ``layer``, keeping the ``retain`` most recent. Never removes: - the ``retain`` most-recent builds (by ``<ts>``), - the active build (current ``latest`` symlink target), - any build carrying a ``.keep`` marker. Parameters ---------- root : str or Path The ``containers/`` directory. layer : str Layer name. retain : int Number of most-recent builds to keep. Returns ------- list[str] The ``<ts>`` values that were pruned. """ root = Path(root) builds = list_builds(root, layer) # newest first active_ts = _active_ts(root, layer) pruned: list[str] = [] kept = 0 for b in builds: ts = b["ts"] if ts == active_ts: continue if b["keep"]: continue if kept < retain: kept += 1 continue _remove_build(root, layer, ts) pruned.append(ts) return pruned
def _remove_build(root: Path, layer: str, ts: str) -> None: """Delete every artifact file for one ``(layer, ts)`` build.""" ap = artifact_paths(root, layer, ts) for p in ( ap.sif, ap.lock, ap.locked_def, ap.verified_marker, ap.unverified_marker, ap.keep_marker, ap.build_log, ): if p.is_file() or p.is_symlink(): logger.info("Pruning %s", p.name) p.unlink()
[docs] @supports_return_as def mark_verified(root: str | Path, layer: str, ts: str) -> Path: """Write the ``.verified`` marker for a build (clears ``.unverified``).""" root = Path(root) ap = artifact_paths(root, layer, ts) if ap.unverified_marker.exists(): ap.unverified_marker.unlink() ap.verified_marker.write_text( f"round-trip verified at {_dt.datetime.now().isoformat()}\n" ) return ap.verified_marker
[docs] @supports_return_as def mark_unverified(root: str | Path, layer: str, ts: str, reason: str = "") -> Path: """Write the ``.unverified`` marker for a build (clears ``.verified``). The marker body carries the drift reason so a later use-time check can surface *what* drifted, not just *that* it drifted. """ root = Path(root) ap = artifact_paths(root, layer, ts) if ap.verified_marker.exists(): ap.verified_marker.unlink() body = f"reproducibility unverified at {_dt.datetime.now().isoformat()}\n" if reason: body += reason.rstrip("\n") + "\n" ap.unverified_marker.write_text(body) return ap.unverified_marker
[docs] @supports_return_as def protect(root: str | Path, layer: str, ts: str) -> Path: """Write the ``.keep`` prune-protect marker for a build.""" root = Path(root) ap = artifact_paths(root, layer, ts) ap.keep_marker.write_text("prune-protected\n") return ap.keep_marker
# EOF