Source code for lcmd_db._assembly

"""Assembly template execution via ``uv run --script``."""

from __future__ import annotations

import functools
import json
import shutil
import subprocess
from collections.abc import Mapping, Sequence
from pathlib import Path

from .exceptions import AssemblyError
from .types import AssemblyResult, AssemblyTemplateInfo, SlotInfo


@functools.lru_cache(1)
def _find_uv() -> str:
    found = shutil.which("uv")
    if found is not None:
        return found
    raise AssemblyError(
        "Assembly requires 'uv' but it was not found on PATH.\n\n"
        "Install with:\n"
        "  curl -LsSf https://astral.sh/uv/install.sh | sh\n\n"
        "Or: pip install uv\n\n"
        "Docs: https://docs.astral.sh/uv/getting-started/installation/"
    )


[docs] class AssemblyTemplate: """Execute a PEP 723 assembly script via ``uv run --script``. Wraps a self-contained Python script that combines fragment SMILES into a molecule. The script declares its own dependencies (e.g. rdkit) via PEP 723 inline metadata; ``uv`` resolves them automatically. """ __slots__ = ("_info", "_script_path") def __init__(self, info: AssemblyTemplateInfo, script_path: Path) -> None: self._info = info self._script_path = script_path @property def slug(self) -> str: return self._info.slug @property def name(self) -> str: return self._info.name @property def description(self) -> str: return self._info.description @property def slots(self) -> list[SlotInfo]: return self._info.slots
[docs] def assemble(self, **fragments: str | None) -> str: """Assemble a single molecule from fragment SMILES. Returns the resulting SMILES string. Raises ``AssemblyError`` on failure with the error message from the script. Keyword arguments must match slot IDs defined in the template. Required slots must be provided; optional slots use their defaults. """ resolved = self._resolve_fragments(fragments) raw = self._execute(resolved) if not raw.get("success"): raise AssemblyError(str(raw.get("error", "Assembly failed"))) return str(raw["smiles"])
[docs] def assemble_batch( self, fragments_list: Sequence[Mapping[str, str | None]] ) -> list[AssemblyResult]: """Assemble multiple molecules in a single subprocess call. Returns one ``AssemblyResult`` per input. Failed assemblies have ``result.error`` set instead of raising, so one bad input doesn't abort the whole batch. """ resolved = [self._resolve_fragments(f) for f in fragments_list] raw = self._execute(resolved) return [_to_result(r) for r in raw]
[docs] def save_script(self, path: str | Path) -> Path: """Copy the standalone assembly script to *path*. The saved script can be executed directly with ``uv run --script`` on any machine (e.g. HPC clusters) without the lcmd-db client. """ dest = Path(path) dest.write_text(self._script_path.read_text(encoding="utf-8"), encoding="utf-8") return dest
def _resolve_fragments( self, fragments: Mapping[str, str | None] ) -> dict[str, str | None]: slots_by_id = {s.id: s for s in self._info.slots} resolved: dict[str, str | None] = {} for slot in self._info.slots: if slot.id in fragments: resolved[slot.id] = fragments[slot.id] elif not slot.required and slot.default is not None: resolved[slot.id] = slot.default elif slot.required: raise AssemblyError( f"assemble() missing required slot '{slot.id}' " f"(fragment_type: {slot.fragment_type})" ) unknown = set(fragments) - slots_by_id.keys() if unknown: raise AssemblyError( f"assemble() got unknown slot(s): {', '.join(sorted(unknown))}" ) return resolved def _execute( self, data: dict[str, str | None] | list[dict[str, str | None]], ) -> dict | list[dict]: uv = _find_uv() try: result = subprocess.run( [uv, "run", "--script", str(self._script_path)], input=json.dumps(data), capture_output=True, text=True, timeout=120, ) except subprocess.TimeoutExpired as exc: raise AssemblyError("Assembly script timed out after 120 seconds") from exc if result.returncode != 0: raise AssemblyError(f"Assembly script failed:\n{result.stderr.strip()}") try: return json.loads(result.stdout) except json.JSONDecodeError as exc: raise AssemblyError( f"Assembly script returned invalid JSON:\n{result.stdout[:500]}" ) from exc def __repr__(self) -> str: return f"AssemblyTemplate({self.slug!r}, slots={[s.id for s in self.slots]})"
def _to_result(raw: dict) -> AssemblyResult: if raw.get("success"): return AssemblyResult(smiles=str(raw["smiles"])) return AssemblyResult(error=str(raw.get("error", "Unknown")))