Source code for scitex_container.apptainer._reproducible

#!/usr/bin/env python3
# Timestamp: "2026-05-24"
# File: src/scitex_container/apptainer/_reproducible.py
"""Self-verifying reproducible-build round-trip + use-time verify gate.

The round-trip (operator-approved core):

1. **rough build** → ``<layer>-<ts>.sif`` (from the loose ``.def``).
2. **freeze lock** → ``<layer>-<ts>.lock`` (the *actually installed*
   versions — only knowable post-build).
3. **generate locked def** → ``<layer>-<ts>.def`` (every pip version
   pinned from the lock).
4. **rebuild from the locked def** → a throwaway verify SIF.
5. **round-trip verify** — capture the rebuild's lock, compare the two
   version sets:
   - identical → mark ``.verified``.
   - mismatch → **fail loud**: mark ``.unverified`` with the drift diff;
     NOT a build failure (the rough SIF stays usable). Never a silent
     pass.
   The verify SIF is auto-deleted after the compare; the canonical kept
   artifact is the rough SIF + its lock + its locked def + the marker.

Byte-identical (``SOURCE_DATE_EPOCH``) is an OPTIONAL stretch, deliberately
NOT the default gate — version-set identity is the meaningful guarantee
for the paper's reproducibility claim.

The use-time gate (``check_verified``) is what consumers call on *every*
image use: ``.unverified`` → WARN by default, ERROR under
``require_verified``.

scitex-container takes the output ``root`` as an argument (path
injection); it never reads a consumer's config location.
"""

from __future__ import annotations

import logging
import os
from dataclasses import dataclass
from pathlib import Path

from scitex_container._compat import supports_return_as

from . import _store
from ._build import build as _build
from ._config import ImageConfig, load_config
from ._lockgen import (
    LockDiff,
    capture_lock,
    compare_locks,
    generate_locked_def,
    read_lock,
)

logger = logging.getLogger(__name__)


[docs] class VerifyError(RuntimeError): """Raised by the use-time gate when an image is unverified under strict mode."""
[docs] @dataclass class RoundTripResult: """Outcome of a reproducible round-trip build.""" layer: str ts: str sif: Path # the kept (rough) artifact lock: Path locked_def: Path verified: bool | None # None when verify was skipped (e.g. background pending) diff: LockDiff | None = None @property def marker(self) -> Path: ap = _store.artifact_paths(self.sif.parent.parent, self.layer, self.ts) return ap.verified_marker if self.verified else ap.unverified_marker
[docs] @supports_return_as def build_reproducible( layer: str, root: str | Path, *, def_path: str | Path | None = None, def_name: str | None = None, verify: bool = True, keep: bool = False, config: ImageConfig | None = None, force: bool = False, ) -> RoundTripResult: """Run the reproducible round-trip and manage the artifact store. Steps 1-3 (rough build, freeze lock, generate locked def) always run synchronously — the rough SIF + lock + locked def are the kept, immediately-usable artifacts. Steps 4-5 (verify rebuild + compare) run inline when ``verify=True``. The operator design specifies steps 4-5 run BACKGROUND-by-default in the CLI; this function exposes the synchronous primitive that a caller (CLI/MCP) backgrounds. ``verify=False`` skips them entirely (leaving the build unmarked) so a caller can schedule the verify rebuild as a detached job and call ``verify_roundtrip`` later. Parameters ---------- layer : str Layer name (artifact stem, e.g. ``sac-base``). root : str or Path The ``containers/`` directory (path injection). def_path : str or Path, optional Explicit path to the rough ``.def``. Either this or ``def_name``. def_name : str, optional Name of the ``.def`` to look up via ``find_containers_dir``. verify : bool Run steps 4-5 inline. False = skip (build stays unmarked). keep : bool Write the ``.keep`` prune-protect marker on the build. config : ImageConfig, optional Resolved config (retain). Loaded from ``root`` when None. force : bool Force the rough rebuild even when the recipe hash is unchanged. Returns ------- RoundTripResult The kept artifact paths + verify outcome. """ root = Path(root) cfg = config or load_config(root) ts = _store.timestamp() ap = _store.artifact_paths(root, layer, ts) ap.layer_dir.mkdir(parents=True, exist_ok=True) # --- Step 1: rough build ------------------------------------------ # Build straight into the timestamped artifact path. We bypass the # _build dir-per-image auto-layout by giving _build a temp image_name # then relocating — simpler: build with image_name=<layer>-<ts> so the # artifact lands at <root>/<layer>-<ts>/<layer>-<ts>.sif, then move it # up into <root>/<layer>/. To keep the store flat we instead build to # a scratch dir and move the SIF into the timestamped slot. _rough_build( layer=layer, ts=ts, root=root, canonical_sif=ap.sif, build_log=ap.build_log, def_path=def_path, def_name=def_name, force=force, ) # Snapshot the rough def alongside (the recipe that produced this build). resolved_rough_def = _resolve_def(def_path, def_name) rough_def_snapshot = ap.layer_dir / f"{layer}-{ts}.rough.def" rough_def_snapshot.write_text(resolved_rough_def.read_text()) # --- Step 2: freeze lock ------------------------------------------ rough_lock = capture_lock(ap.sif, ap.lock) # --- Step 3: generate locked def ---------------------------------- generate_locked_def(resolved_rough_def, rough_lock, ap.locked_def) # Point the latest symlink at the freshly-built rough artifact. _store.point_latest(root, layer, ts) if keep: _store.protect(root, layer, ts) # Prune older builds per retain. _store.prune(root, layer, cfg.retain) result = RoundTripResult( layer=layer, ts=ts, sif=ap.sif, lock=ap.lock, locked_def=ap.locked_def, verified=None, ) if not verify: logger.info("Skipping round-trip verify (verify=False); build is unmarked") return result # --- Steps 4-5: verify rebuild + compare -------------------------- diff = verify_roundtrip(layer, root, ts) result.verified = diff.identical result.diff = diff return result
def _resolve_def( def_path: str | Path | None, def_name: str | None, ) -> Path: """Resolve the rough ``.def`` path from ``def_path`` or ``def_name``.""" if def_path is not None: p = Path(def_path) if not p.is_absolute(): p = Path.cwd() / p if not p.exists(): raise FileNotFoundError(f"Definition file not found: {p}") return p if def_name is not None: from ._utils import find_containers_dir return find_containers_dir() / f"{def_name}.def" raise ValueError("Provide either def_path or def_name") def _rough_build( *, layer: str, ts: str, root: Path, canonical_sif: Path, build_log: Path, def_path: str | Path | None, def_name: str | None, force: bool, ) -> Path: """Run the loose (rough) build, relocate into the timestamped slot. Builds via ``_build.build`` with ``image_name=<layer>-<ts>`` so the artifact lands in a scratch dir-per-image at ``<root>/<layer>-<ts>/<layer>-<ts>.sif``, then moves the SIF into the canonical ``<root>/<layer>/<layer>-<ts>.sif`` slot and removes the scratch dir + the stray top-level ``<root>/<layer>-<ts>.sif`` symlink that ``_build`` writes for cross-layer lookups. The scratch auto-freeze locks are discarded — the reproducible store captures its own combined ``.lock`` (step 2). The rough build's log (which ``_build`` writes into the scratch dir as ``<scratch>/<scratch>.build-<inner-ts>.log``, where ``<inner-ts>`` is _build's own timestamp) is relocated into the canonical ``build_log`` slot before the scratch dir is removed — otherwise the log is lost on ``rmtree``. Returns ------- Path The canonical SIF path (``canonical_sif``). """ import shutil scratch_name = f"{layer}-{ts}" scratch_sif = _build( def_name=def_name or scratch_name, output_dir=root, force=force, sandbox=False, def_path=def_path, image_name=scratch_name, ) scratch_sif = Path(scratch_sif) canonical_sif.parent.mkdir(parents=True, exist_ok=True) if canonical_sif.exists(): canonical_sif.unlink() os.replace(scratch_sif, canonical_sif) scratch_dir = root / scratch_name # Preserve the rough build log into the canonical slot before the # scratch dir is removed (otherwise rmtree loses it). _build names # its log ``<scratch>.build-<inner-ts>.log`` with its own timestamp, # so glob for it rather than guessing the inner ts. _preserve_build_log(scratch_dir, scratch_name, build_log) # Clean the scratch dir-per-image and the stray top-level symlink. if scratch_dir.is_dir(): shutil.rmtree(scratch_dir, ignore_errors=True) stray_link = root / f"{scratch_name}.sif" if stray_link.is_symlink() or stray_link.exists(): try: stray_link.unlink() except OSError: pass # _build auto-freezes into ``output_dir`` (= root) WITHOUT host # isolation, leaving host-bleed lock files we never use (step 2 # captures our own isolated combined lock). Discard them. _discard_stray_locks(root) return canonical_sif def _preserve_build_log(scratch_dir: Path, scratch_name: str, build_log: Path) -> None: """Relocate _build's rough log out of the scratch dir into ``build_log``. _build writes ``<scratch_dir>/<scratch_name>.build-<inner-ts>.log`` with its own timestamp; pick the newest match and move it to the canonical ``build_log`` path so it survives the scratch ``rmtree``. Silently no-ops if no log is found (e.g. an up-to-date skip-rebuild). """ if not scratch_dir.is_dir(): return logs = sorted(scratch_dir.glob(f"{scratch_name}.build-*.log")) if not logs: return newest = logs[-1] build_log.parent.mkdir(parents=True, exist_ok=True) if build_log.exists(): build_log.unlink() os.replace(newest, build_log) def _discard_stray_locks(root: Path) -> None: """Remove the root-level lock files _build's auto-freeze leaves behind.""" for name in ("requirements-lock.txt", "dpkg-lock.txt", "node-lock.txt"): stray = root / name if stray.is_file(): stray.unlink()
[docs] @supports_return_as def verify_roundtrip( layer: str, root: str | Path, ts: str, ) -> LockDiff: """Rebuild from the locked def, compare version sets, mark the build. This is steps 4-5 split out so a caller can run it in the background (the operator default) after the rough build returns. It: 1. rebuilds from ``<layer>-<ts>.def`` into a throwaway verify SIF, 2. captures the rebuild's lock (a throwaway ``.verify.lock``), 3. compares against the rough lock, 4. marks ``.verified`` (identical) or ``.unverified`` (drift, loud), 5. deletes the throwaway verify SIF + its scratch dir + the ``.verify.lock``. Parameters ---------- layer : str Layer name. root : str or Path The ``containers/`` directory. ts : str Timestamp of the rough build to verify. Returns ------- LockDiff The round-trip comparison. ``identical`` is the gate. """ root = Path(root) ap = _store.artifact_paths(root, layer, ts) if not ap.locked_def.exists(): raise FileNotFoundError(f"Locked def not found: {ap.locked_def}") if not ap.lock.exists(): raise FileNotFoundError(f"Rough lock not found: {ap.lock}") rough_lock = read_lock(ap.lock) verify_name = f"{layer}-{ts}-verify" verify_scratch = root / verify_name # The rebuild's lock is a throwaway — captured only to compare against # the rough lock, then deleted in the finally below (it would otherwise # leave a stray <layer>-<ts>.verify.lock beside the kept artifacts). verify_lock_path = ap.layer_dir / f"{layer}-{ts}.verify.lock" try: verify_sif = _build( def_name=verify_name, output_dir=root, force=True, sandbox=False, def_path=ap.locked_def, image_name=verify_name, ) rebuild_lock = capture_lock(verify_sif, verify_lock_path) diff = compare_locks(rough_lock, rebuild_lock) finally: # Auto-delete the throwaway verify SIF + its scratch dir + symlink. _cleanup_verify(root, verify_name, verify_scratch) # Delete the throwaway rebuild lock — it was only needed for the # version-set compare above; the kept lock is the rough one. if verify_lock_path.is_file(): verify_lock_path.unlink() if diff.identical: _store.mark_verified(root, layer, ts) logger.info("Round-trip VERIFIED for %s-%s", layer, ts) else: reason = diff.summary() _store.mark_unverified(root, layer, ts, reason=reason) # Fail loud — but NOT a build failure: the rough SIF stays usable. logger.error( "Round-trip MISMATCH for %s-%s: %s. Marked .unverified " "(rough SIF stays usable; reproducibility unproven).", layer, ts, reason, ) return diff
def _cleanup_verify(root: Path, verify_name: str, verify_scratch: Path) -> None: """Delete the throwaway verify SIF, its scratch dir, and its symlink.""" import shutil link = root / f"{verify_name}.sif" if link.is_symlink() or link.exists(): try: link.unlink() except OSError: pass if verify_scratch.is_dir(): shutil.rmtree(verify_scratch, ignore_errors=True) # Discard _build's host-bleed auto-freeze locks (see _rough_build). _discard_stray_locks(root) # --------------------------------------------------------------------------- # Use-time verify gate (scitex-container owns; consumers call it) # ---------------------------------------------------------------------------
[docs] @dataclass(frozen=True) class VerifyStatus: """Result of a use-time verify check.""" state: str # "verified" | "unverified" | "unknown" sif: Path detail: str = "" @property def is_verified(self) -> bool: return self.state == "verified"
[docs] @supports_return_as def check_verified( sif_path: str | Path, *, require_verified: bool | None = None, root: str | Path | None = None, config: ImageConfig | None = None, ) -> VerifyStatus: """Check a built image's reproducibility marker — NOISY on every use. The use-time gate consumers call on every image use. Looks beside the SIF for the ``.verified`` / ``.unverified`` marker (resolving a ``latest`` symlink first): - ``.verified`` present → ``state="verified"`` (silent OK). - ``.unverified`` present → WARN by default ("reproducibility unverified: <drift>"); under ``require_verified`` → raise ``VerifyError``. - no marker → ``state="unknown"`` → WARN it's unverified; under ``require_verified`` → raise. Parameters ---------- sif_path : str or Path Path to the image being used (may be the ``latest`` symlink). require_verified : bool, optional Strict mode. When None, resolved from ``config`` / ``load_config(root)`` (``images.require_verified``). root : str or Path, optional Output root for config resolution (when ``require_verified`` and ``config`` are both None). config : ImageConfig, optional Pre-resolved config. Returns ------- VerifyStatus The marker state + detail. Raises ------ VerifyError When the image is not verified and strict mode is on. """ sif_path = Path(sif_path) resolved = sif_path.resolve() if sif_path.is_symlink() else sif_path if require_verified is None: cfg = config or load_config(root) require_verified = cfg.require_verified verified_marker = resolved.with_suffix(".verified") unverified_marker = resolved.with_suffix(".unverified") if verified_marker.exists(): return VerifyStatus( state="verified", sif=resolved, detail="round-trip verified" ) if unverified_marker.exists(): detail = unverified_marker.read_text().strip().replace("\n", "; ") msg = f"reproducibility unverified: {detail}" if require_verified: raise VerifyError(f"{resolved.name}: {msg}") logger.warning("%s: %s", resolved.name, msg) return VerifyStatus(state="unverified", sif=resolved, detail=detail) msg = "reproducibility unverified: no round-trip marker found" if require_verified: raise VerifyError(f"{resolved.name}: {msg}") logger.warning("%s: %s", resolved.name, msg) return VerifyStatus(state="unknown", sif=resolved, detail="no marker")
# EOF