Source code for lcmd_db._assembly
"""Assembly template execution via ``uv run --script``."""
from __future__ import annotations
import functools
import json
import shutil
import subprocess
from collections.abc import Mapping, Sequence
from pathlib import Path
from .exceptions import AssemblyError
from .types import AssemblyResult, AssemblyTemplateInfo, SlotInfo
@functools.lru_cache(1)
def _find_uv() -> str:
found = shutil.which("uv")
if found is not None:
return found
raise AssemblyError(
"Assembly requires 'uv' but it was not found on PATH.\n\n"
"Install with:\n"
" curl -LsSf https://astral.sh/uv/install.sh | sh\n\n"
"Or: pip install uv\n\n"
"Docs: https://docs.astral.sh/uv/getting-started/installation/"
)
[docs]
class AssemblyTemplate:
"""Execute a PEP 723 assembly script via ``uv run --script``.
Wraps a self-contained Python script that combines fragment SMILES
into a molecule. The script declares its own dependencies (e.g. rdkit)
via PEP 723 inline metadata; ``uv`` resolves them automatically.
"""
__slots__ = ("_info", "_script_path")
def __init__(self, info: AssemblyTemplateInfo, script_path: Path) -> None:
self._info = info
self._script_path = script_path
@property
def slug(self) -> str:
return self._info.slug
@property
def name(self) -> str:
return self._info.name
@property
def description(self) -> str:
return self._info.description
@property
def slots(self) -> list[SlotInfo]:
return self._info.slots
[docs]
def assemble(self, **fragments: str | None) -> str:
"""Assemble a single molecule from fragment SMILES.
Returns the resulting SMILES string. Raises ``AssemblyError``
on failure with the error message from the script.
Keyword arguments must match slot IDs defined in the template.
Required slots must be provided; optional slots use their defaults.
"""
resolved = self._resolve_fragments(fragments)
raw = self._execute(resolved)
if not raw.get("success"):
raise AssemblyError(str(raw.get("error", "Assembly failed")))
return str(raw["smiles"])
[docs]
def assemble_batch(
self, fragments_list: Sequence[Mapping[str, str | None]]
) -> list[AssemblyResult]:
"""Assemble multiple molecules in a single subprocess call.
Returns one ``AssemblyResult`` per input. Failed assemblies
have ``result.error`` set instead of raising, so one bad input
doesn't abort the whole batch.
"""
resolved = [self._resolve_fragments(f) for f in fragments_list]
raw = self._execute(resolved)
return [_to_result(r) for r in raw]
[docs]
def save_script(self, path: str | Path) -> Path:
"""Copy the standalone assembly script to *path*.
The saved script can be executed directly with ``uv run --script``
on any machine (e.g. HPC clusters) without the lcmd-db client.
"""
dest = Path(path)
dest.write_text(self._script_path.read_text(encoding="utf-8"), encoding="utf-8")
return dest
def _resolve_fragments(
self, fragments: Mapping[str, str | None]
) -> dict[str, str | None]:
slots_by_id = {s.id: s for s in self._info.slots}
resolved: dict[str, str | None] = {}
for slot in self._info.slots:
if slot.id in fragments:
resolved[slot.id] = fragments[slot.id]
elif not slot.required and slot.default is not None:
resolved[slot.id] = slot.default
elif slot.required:
raise AssemblyError(
f"assemble() missing required slot '{slot.id}' "
f"(fragment_type: {slot.fragment_type})"
)
unknown = set(fragments) - slots_by_id.keys()
if unknown:
raise AssemblyError(
f"assemble() got unknown slot(s): {', '.join(sorted(unknown))}"
)
return resolved
def _execute(
self,
data: dict[str, str | None] | list[dict[str, str | None]],
) -> dict | list[dict]:
uv = _find_uv()
try:
result = subprocess.run(
[uv, "run", "--script", str(self._script_path)],
input=json.dumps(data),
capture_output=True,
text=True,
timeout=120,
)
except subprocess.TimeoutExpired as exc:
raise AssemblyError("Assembly script timed out after 120 seconds") from exc
if result.returncode != 0:
raise AssemblyError(f"Assembly script failed:\n{result.stderr.strip()}")
try:
return json.loads(result.stdout)
except json.JSONDecodeError as exc:
raise AssemblyError(
f"Assembly script returned invalid JSON:\n{result.stdout[:500]}"
) from exc
def __repr__(self) -> str:
return f"AssemblyTemplate({self.slug!r}, slots={[s.id for s in self.slots]})"
def _to_result(raw: dict) -> AssemblyResult:
if raw.get("success"):
return AssemblyResult(smiles=str(raw["smiles"]))
return AssemblyResult(error=str(raw.get("error", "Unknown")))