From 959fc48806ad5e008135014d33acee76dc0e3468 Mon Sep 17 00:00:00 2001 From: goodboy Date: Fri, 17 Apr 2026 18:01:08 -0400 Subject: [PATCH] Impl min-viable `subint` spawn backend (B.2) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace the B.1 scaffold stub w/ a working spawn flow driving PEP 734 sub-interpreters on dedicated OS threads. Deats, - use private `_interpreters` C mod (not the public `concurrent.interpreters` API) to get `'legacy'` subint config — avoids PEP 684 C-ext compat issues w/ `msgspec` and other deps missing the `Py_mod_multiple_interpreters` slot - bootstrap subint via code-string calling new `_actor_child_main()` from `_child.py` (shared entry for both CLI and subint backends) - drive subint lifetime on an OS thread using `trio.to_thread.run_sync(_interpreters.exec, ..)` - full supervision lifecycle mirrors `trio_proc`: `ipc_server.wait_for_peer()` → send `SpawnSpec` → yield `Portal` via `task_status.started()` - graceful shutdown awaits the subint's inner `trio.run()` completing; cancel path sends `portal.cancel_actor()` then waits for thread join before `_interpreters.destroy()` Also, - extract `_actor_child_main()` from `_child.py` `__main__` block as callable entry shape bc the subint needs it for code-string bootstrap - add `"subint"` to the `_runtime.py` spawn-method check so child accepts `SpawnSpec` over IPC Prompt-IO: ai/prompt-io/claude/20260417T124437Z_5cd6df5_prompt_io.md (this patch was generated in some part by [`claude-code`][claude-code-gh]) [claude-code-gh]: https://github.com/anthropics/claude-code --- .../20260417T124437Z_5cd6df5_prompt_io.md | 97 ++++++++ .../20260417T124437Z_5cd6df5_prompt_io.raw.md | 168 ++++++++++++++ tractor/_child.py | 52 ++++- tractor/runtime/_runtime.py | 2 +- tractor/spawn/_subint.py | 211 ++++++++++++++++-- 5 files changed, 502 insertions(+), 28 deletions(-) create mode 100644 ai/prompt-io/claude/20260417T124437Z_5cd6df5_prompt_io.md create mode 100644 ai/prompt-io/claude/20260417T124437Z_5cd6df5_prompt_io.raw.md diff --git a/ai/prompt-io/claude/20260417T124437Z_5cd6df5_prompt_io.md b/ai/prompt-io/claude/20260417T124437Z_5cd6df5_prompt_io.md new file mode 100644 index 00000000..42cdb780 --- /dev/null +++ b/ai/prompt-io/claude/20260417T124437Z_5cd6df5_prompt_io.md @@ -0,0 +1,97 @@ +--- +model: claude-opus-4-7[1m] +service: claude +session: subints-spawner-phase-b1-impl +timestamp: 2026-04-17T12:44:37Z +git_ref: 5cd6df5 +scope: code +substantive: true +raw_file: 20260417T124437Z_5cd6df5_prompt_io.raw.md +--- + +## Prompt + +Continuing the Phase B work from the design sessions +logged in `20260417T034918Z_9703210_prompt_io.md` and +the Phase A impl in `20260417T035800Z_61a73ba_...`. + +User direction at this step: +- "ok now let's continue with the subints part of the + plan ya?" → kick off Phase B. +- After I proposed a 4-sub-phase breakdown (B.1 + scaffolding, B.2 min-viable spawn, B.3 lifecycle, + B.4 test enablement), user: "ok can we continue + with B" → approving the phasing and design + defaults (thread-per-subint, UDS default, + code-string bootstrap via `interp.exec()`). +- User switched the worktree to + `subint_spawner_backend` (instead of opening a + fresh worktree as I'd proposed): "i already + switched branches in the wkt, you should work off + `subint_spawner_backend` now". + +Scope of this turn: **B.1 scaffolding only** — +wire the `'subint'` name through the spawn-method +registry + harness, add a feature-detected stub +module, bump pyproject. No actual spawn flow. + +## Response summary + +Produced the Phase B.1 scaffolding patch, landed as +commit `5cd6df58` (subject: `Add `'subint'` spawn +backend scaffold (#379)`). Four files changed, ++124 / -2 lines. + +Key pieces (all generated by claude, reviewed by +human before commit): +- `tractor/spawn/_subint.py` — **new**; feature- + detects `concurrent.interpreters`; `subint_proc()` + stub raises `RuntimeError` on py<3.14 or + `NotImplementedError` with issue-#379 URL on + py≥3.14. Signature mirrors `trio_proc`/`mp_proc` + so B.2 can drop the impl in without touching + `_methods`. +- `tractor/spawn/_spawn.py` — adds `'subint'` to + `SpawnMethodKey`, grows a `case 'subint'` arm in + `try_set_start_method()` with feature-gate, re- + imports `sys` for the gate-error msg, adds late + `from ._subint import subint_proc` import and + `_methods` entry. +- `tractor/_testing/pytest.py` — converts the + gate-error into `pytest.UsageError` via a + `try/except` around `try_set_start_method()` so + `--spawn-backend=subint` on py<3.14 prints a + clean banner instead of a traceback. +- `pyproject.toml` — pin `requires-python` `<3.14` + → `<3.15`, add `3.14` trove classifier. + +## Files changed + +See `git diff 5cd6df5~1..5cd6df5 --stat`: + +``` + pyproject.toml | 3 +- + tractor/_testing/pytest.py | 8 +++- + tractor/spawn/_spawn.py | 15 ++++++ + tractor/spawn/_subint.py | 100 +++++++++++++++++++++++++ + 4 files changed, 124 insertions(+), 2 deletions(-) +``` + +Validation on py3.13: +- registry/gate probe — OK; `_has_subints = False`, + `subint_proc()` raises `RuntimeError` as expected. +- `try_set_start_method('subint')` — raises cleanly. +- `pytest --spawn-backend=subint` — exits with + `pytest.UsageError` banner. +- Spawn-relevant test subset — 69 passed, 1 skipped. +- Full suite on py3.13 deferred (scaffolding is + no-op on <3.14). +- Full-suite on py3.14 pending user setting up the + venv. + +## Human edits + +None — committed as generated by claude. `uv.lock` +was intentionally left unstaged by the user (pin- +bump regenerated cp314 wheel entries but they were +deferred to a follow-up). diff --git a/ai/prompt-io/claude/20260417T124437Z_5cd6df5_prompt_io.raw.md b/ai/prompt-io/claude/20260417T124437Z_5cd6df5_prompt_io.raw.md new file mode 100644 index 00000000..c5978559 --- /dev/null +++ b/ai/prompt-io/claude/20260417T124437Z_5cd6df5_prompt_io.raw.md @@ -0,0 +1,168 @@ +--- +model: claude-opus-4-7[1m] +service: claude +timestamp: 2026-04-17T12:44:37Z +git_ref: 5cd6df5 +diff_cmd: git diff 5cd6df5~1..5cd6df5 +--- + +Code generated in this turn was committed verbatim as +`5cd6df58` ("Add `'subint'` spawn backend scaffold +(#379)"). Per diff-ref mode, per-file code is captured +via the pointers below, each followed by a prose +summary. Non-code output (sanity-check results, +design rationale) is included verbatim. + +## Per-file generated content + +### `tractor/spawn/_subint.py` (new, 100 lines) + +> `git diff 5cd6df5~1..5cd6df5 -- tractor/spawn/_subint.py` + +New scaffolding module for the PEP 734 subinterpreter +backend. Contents: +- AGPL header + module docstring (describes backend + intent, 3.14+ availability gate, and explicit + "SCAFFOLDING STUB" status pointing at issue #379). +- Top-level `try/except ImportError` wrapping + `from concurrent import interpreters as + _interpreters` → sets module-global + `_has_subints: bool`. This lets the registry stay + introspectable on py<3.14 while spawn-time still + fails cleanly. +- `subint_proc()` coroutine with signature matching + `trio_proc`/`mp_proc` exactly (same param names, + defaults, and `TaskStatus[Portal]` typing) — + intentional so Phase B.2 can drop the impl in + without touching `_methods` or changing call-site + binding. +- Body raises `RuntimeError` on py<3.14 (with + `sys.version` printed) or `NotImplementedError` + with issue-#379 URL on py≥3.14. + +### `tractor/spawn/_spawn.py` (modified, +15 LOC) + +> `git diff 5cd6df5~1..5cd6df5 -- tractor/spawn/_spawn.py` + +- `import sys` re-added (pruned during Phase A, now + needed again for the py-version string in the + `'subint'` gate-error). +- `SpawnMethodKey = Literal[...]` grows `'subint'` as + the 4th member, with inline comment `# py3.14+ via + `concurrent.interpreters` (PEP 734)`. +- `try_set_start_method()` match-block grows a new + `case 'subint':` arm that imports + `from ._subint import _has_subints` lazily and + raises `RuntimeError` with a multi-line gate msg + if unavailable. +- Bottom-of-module late-import section grows + `from ._subint import subint_proc` alongside the + existing `_trio` / `_mp` imports. +- `_methods` dict grows `'subint': subint_proc`. + +### `tractor/_testing/pytest.py` (modified, +8 LOC) + +> `git diff 5cd6df5~1..5cd6df5 -- tractor/_testing/pytest.py` + +`pytest_configure()` wraps the +`try_set_start_method(backend)` call in a +`try/except RuntimeError` that re-raises as +`pytest.UsageError(str(err))`. Rationale: the gate +error on py<3.14 is legitimately a configuration +problem, not a test failure, so pytest's UsageError +path (exit code 4) gives a clean single-line banner +instead of a traceback. + +### `pyproject.toml` (modified, +2 / -1) + +> `git diff 5cd6df5~1..5cd6df5 -- pyproject.toml` + +- `requires-python` pin relaxed `>=3.12, <3.14` → + `>=3.12, <3.15` to admit 3.14 as a valid target. +- Added `"Programming Language :: Python :: 3.14"` + to the trove classifiers. + +## Non-code output (verbatim) + +### Design choices captured in the prior turn + +(Previously confirmed by the user on April 17 in this +session — relevant excerpts captured here for +provenance since they drove the shape of the impl): + +1. **Python pin**: `<3.15` + feature-gate the subint + backend at spawn time — user answered option (a) + from the design triage. +2. **Deferred `fork()`-via-subint hack** from issue + #379's "Our own thoughts" section. +3. **Phase B phasing**: user approved the B.1 / B.2 / + B.3 / B.4 breakdown — this commit is strictly B.1 + (scaffolding only, no spawn-flow impl). +4. **Option (B) worktree strategy**: new worktree + branched from `wkt/spawn_modularize`. *(Amended by + user at runtime: user switched the existing + `spawn_modularize` worktree to the + `subint_spawner_backend` branch instead.)* + +### Sanity-check output (verbatim terminal excerpts) + +Registry / feature-gate verification on py3.13: +``` +SpawnMethodKey values: ('trio', 'mp_spawn', + 'mp_forkserver', 'subint') +_methods keys: ['trio', 'mp_spawn', + 'mp_forkserver', 'subint'] +_has_subints: False (py version: (3, 13) ) +[expected] RuntimeError: The 'subint' spawn backend +requires Python 3.14+ (stdlib +`concurrent.interpreters`, PEP 734). +``` + +`try_set_start_method('subint')` gate on py3.13: +``` +[expected] RuntimeError: Spawn method 'subint' +requires Python 3.14+ (stdlib +`concurrent.interpreters`, PEP 734). +``` + +Pytest `--spawn-backend=subint` on py3.13 (the new +UsageError wrapper kicking in): +``` +ERROR: Spawn method 'subint' requires Python 3.14+ +(stdlib `concurrent.interpreters`, PEP 734). +Current runtime: 3.13.11 (main, Dec 5 2025, +16:06:33) [GCC 15.2.0] +``` + +Collection probe: `404 tests collected in 0.18s` +(no import errors from the new module). + +Spawn-relevant test subset (`tests/test_local.py +test_rpc.py test_spawning.py test_multi_program.py +tests/discovery/`): +``` +69 passed, 1 skipped, 10 warnings in 61.38s +``` + +Full suite was **not** run on py3.13 for this commit +— the scaffolding is no-op on <3.14 and full-suite +validation under py3.14 is pending that venv being +set up by the user. + +### Commit message + +Also AI-drafted (via `/commit-msg`, with the prose +rewrapped through `/home/goodboy/.claude/skills/pr-msg/ +scripts/rewrap.py --width 67`) — the 33-line message +on commit `5cd6df58` itself. Not reproduced here; see +`git log -1 5cd6df58`. + +### Known follow-ups flagged to user + +- **`uv.lock` deferred**: pin-bump regenerated cp314 + wheel entries in `uv.lock`, but the user chose to + not stage `uv.lock` for this commit. Warned + explicitly. +- **Phase B.2 needs py3.14 venv** — running the + actual subint impl requires it; user said they'd + set it up separately. diff --git a/tractor/_child.py b/tractor/_child.py index c61cdec3..727a5054 100644 --- a/tractor/_child.py +++ b/tractor/_child.py @@ -15,16 +15,23 @@ # along with this program. If not, see . """ -This is the "bootloader" for actors started using the native trio backend. +The "bootloader" for sub-actors spawned via the native `trio` +backend (the default `python -m tractor._child` CLI entry) and +the in-process `subint` backend (`tractor.spawn._subint`). """ +from __future__ import annotations import argparse - from ast import literal_eval +from typing import TYPE_CHECKING from .runtime._runtime import Actor from .spawn._entry import _trio_main +if TYPE_CHECKING: + from .discovery._addr import UnwrappedAddress + from .spawn._spawn import SpawnMethodKey + def parse_uid(arg): name, uuid = literal_eval(arg) # ensure 2 elements @@ -39,6 +46,36 @@ def parse_ipaddr(arg): return arg +def _actor_child_main( + uid: tuple[str, str], + loglevel: str | None, + parent_addr: UnwrappedAddress | None, + infect_asyncio: bool, + spawn_method: SpawnMethodKey = 'trio', + +) -> None: + ''' + Construct the child `Actor` and dispatch to `_trio_main()`. + + Shared entry shape used by both the `python -m tractor._child` + CLI (trio/mp subproc backends) and the `subint` backend, which + invokes this from inside a fresh `concurrent.interpreters` + sub-interpreter via `Interpreter.call()`. + + ''' + subactor = Actor( + name=uid[0], + uuid=uid[1], + loglevel=loglevel, + spawn_method=spawn_method, + ) + _trio_main( + subactor, + parent_addr=parent_addr, + infect_asyncio=infect_asyncio, + ) + + if __name__ == "__main__": __tracebackhide__: bool = True @@ -49,15 +86,10 @@ if __name__ == "__main__": parser.add_argument("--asyncio", action='store_true') args = parser.parse_args() - subactor = Actor( - name=args.uid[0], - uuid=args.uid[1], + _actor_child_main( + uid=args.uid, loglevel=args.loglevel, - spawn_method="trio" - ) - - _trio_main( - subactor, parent_addr=args.parent_addr, infect_asyncio=args.asyncio, + spawn_method='trio', ) diff --git a/tractor/runtime/_runtime.py b/tractor/runtime/_runtime.py index bee9e20d..0c25b926 100644 --- a/tractor/runtime/_runtime.py +++ b/tractor/runtime/_runtime.py @@ -870,7 +870,7 @@ class Actor: accept_addrs: list[UnwrappedAddress]|None = None - if self._spawn_method == "trio": + if self._spawn_method in ("trio", "subint"): # Receive post-spawn runtime state from our parent. spawnspec: msgtypes.SpawnSpec = await chan.recv() diff --git a/tractor/spawn/_subint.py b/tractor/spawn/_subint.py index 09793496..169657b2 100644 --- a/tractor/spawn/_subint.py +++ b/tractor/spawn/_subint.py @@ -18,9 +18,10 @@ Sub-interpreter (`subint`) actor spawning backend. Spawns each sub-actor as a CPython PEP 734 sub-interpreter -(`concurrent.interpreters.Interpreter`) — same-process state -isolation with faster start-up than an OS subproc, while -preserving tractor's IPC-based actor boundaries. +(`concurrent.interpreters.Interpreter`) driven on its own OS +thread — same-process state isolation with faster start-up +than an OS subproc, while preserving tractor's existing +IPC-based actor boundary. Availability ------------ @@ -28,14 +29,10 @@ Requires Python 3.14+ for the stdlib `concurrent.interpreters` module. On older runtimes the module still imports (so the registry stays introspectable) but `subint_proc()` raises. -Status ------- -SCAFFOLDING STUB — `subint_proc()` is **not yet implemented**. -The real impl lands in Phase B.2 (see issue #379). - ''' from __future__ import annotations import sys +from functools import partial from typing import ( Any, TYPE_CHECKING, @@ -45,21 +42,50 @@ import trio from trio import TaskStatus +# NOTE: we reach into the *private* `_interpreters` C module +# rather than using the nice `concurrent.interpreters` public +# API because the latter only exposes the `'isolated'` subint +# config (PEP 684, per-interp GIL). Under that config, any C +# extension lacking the `Py_mod_multiple_interpreters` slot +# refuses to import — which includes `msgspec` (used all over +# tractor's IPC layer) as of 0.19.x. Dropping to the `'legacy'` +# config keeps the main GIL + lets existing C extensions load +# normally while preserving the state-isolation we actually +# need for the actor model (separate `sys.modules`, `__main__`, +# globals). Once msgspec (and similar deps) opt-in to PEP 684 +# we can migrate to the public `interpreters.create()` API and +# pick up per-interp-GIL parallelism for free. try: - from concurrent import interpreters as _interpreters # type: ignore + import _interpreters # type: ignore _has_subints: bool = True except ImportError: _interpreters = None # type: ignore _has_subints: bool = False +from tractor.log import get_logger +from tractor.msg import ( + types as msgtypes, + pretty_struct, +) +from tractor.runtime._state import current_actor +from tractor.runtime._portal import Portal +from ._spawn import cancel_on_completion + + if TYPE_CHECKING: from tractor.discovery._addr import UnwrappedAddress - from tractor.runtime._portal import Portal + from tractor.ipc import ( + _server, + Channel, + ) from tractor.runtime._runtime import Actor from tractor.runtime._supervise import ActorNursery +log = get_logger('tractor') + + async def subint_proc( name: str, actor_nursery: ActorNursery, @@ -78,12 +104,21 @@ async def subint_proc( ) -> None: ''' Create a new sub-actor hosted inside a PEP 734 - sub-interpreter running in a dedicated OS thread, + sub-interpreter running on a dedicated OS thread, reusing tractor's existing UDS/TCP IPC handshake for parent<->child channel setup. - NOT YET IMPLEMENTED — placeholder stub pending the - Phase B.2 impl. + Supervision model mirrors `trio_proc()`: + - parent awaits `ipc_server.wait_for_peer()` for the + child to connect back; on success yields a `Portal` + via `task_status.started()` + - on graceful shutdown we await the sub-interpreter's + `trio.run()` completing naturally (driven by the + child's actor runtime) + - on cancellation we send `Portal.cancel_actor()` and + then wait for the subint's trio loop to exit cleanly + — unblocking the worker thread so the `Interpreter` + can be closed ''' if not _has_subints: @@ -93,8 +128,150 @@ async def subint_proc( f'Current runtime: {sys.version}' ) - raise NotImplementedError( - 'The `subint` spawn backend scaffolding is in place but ' - 'the spawn-flow itself is not yet implemented.\n' - 'Tracking: https://github.com/goodboy/tractor/issues/379' + interp_id: int = _interpreters.create('legacy') + log.runtime( + f'Created sub-interpreter (legacy cfg) for sub-actor\n' + f'(>\n' + f' |_interp_id={interp_id}\n' ) + + uid: tuple[str, str] = subactor.aid.uid + loglevel: str | None = subactor.loglevel + + # Build a bootstrap code string driven via `_interpreters.exec()`. + # All of `uid` (`tuple[str, str]`), `loglevel` (`str|None`), + # `parent_addr` (`tuple[str, int|str]` — see `UnwrappedAddress`) + # and `infect_asyncio` (`bool`) `repr()` to valid Python + # literals, so we can embed them directly. + bootstrap: str = ( + 'from tractor._child import _actor_child_main\n' + '_actor_child_main(\n' + f' uid={uid!r},\n' + f' loglevel={loglevel!r},\n' + f' parent_addr={parent_addr!r},\n' + f' infect_asyncio={infect_asyncio!r},\n' + f' spawn_method={"subint"!r},\n' + ')\n' + ) + + cancelled_during_spawn: bool = False + subint_exited = trio.Event() + ipc_server: _server.Server = actor_nursery._actor.ipc_server + + async def _drive_subint() -> None: + ''' + Block a worker OS-thread on `_interpreters.exec()` for + the lifetime of the sub-actor. When the subint's inner + `trio.run()` exits, `exec()` returns and the thread + naturally joins. + + ''' + try: + await trio.to_thread.run_sync( + _interpreters.exec, + interp_id, + bootstrap, + abandon_on_cancel=False, + ) + finally: + subint_exited.set() + + try: + try: + async with trio.open_nursery() as thread_n: + thread_n.start_soon(_drive_subint) + + try: + event, chan = await ipc_server.wait_for_peer(uid) + except trio.Cancelled: + cancelled_during_spawn = True + raise + + portal = Portal(chan) + actor_nursery._children[uid] = ( + subactor, + interp_id, # proxy for the normal `proc` slot + portal, + ) + + sspec = msgtypes.SpawnSpec( + _parent_main_data=subactor._parent_main_data, + enable_modules=subactor.enable_modules, + reg_addrs=subactor.reg_addrs, + bind_addrs=bind_addrs, + _runtime_vars=_runtime_vars, + ) + log.runtime( + f'Sending spawn spec to subint child\n' + f'{{}}=> {chan.aid.reprol()!r}\n' + f'\n' + f'{pretty_struct.pformat(sspec)}\n' + ) + await chan.send(sspec) + + curr_actor: Actor = current_actor() + curr_actor._actoruid2nursery[uid] = actor_nursery + + task_status.started(portal) + + with trio.CancelScope(shield=True): + await actor_nursery._join_procs.wait() + + async with trio.open_nursery() as lifecycle_n: + if portal in actor_nursery._cancel_after_result_on_exit: + lifecycle_n.start_soon( + cancel_on_completion, + portal, + subactor, + errors, + ) + + # Soft-kill analog: wait for the subint to exit + # naturally; on cancel, send a graceful cancel + # via the IPC portal and then wait for the + # driver thread to finish so `interp.close()` + # won't race with a running interpreter. + try: + await subint_exited.wait() + except trio.Cancelled: + with trio.CancelScope(shield=True): + log.cancel( + f'Soft-killing subint sub-actor\n' + f'c)=> {chan.aid.reprol()}\n' + f' |_interp_id={interp_id}\n' + ) + try: + await portal.cancel_actor() + except ( + trio.BrokenResourceError, + trio.ClosedResourceError, + ): + # channel already down — subint will + # exit on its own timeline + pass + await subint_exited.wait() + raise + finally: + lifecycle_n.cancel_scope.cancel() + + finally: + # The driver thread has exited (either natural subint + # completion or post-cancel teardown) so the subint is + # no longer running — safe to destroy. + with trio.CancelScope(shield=True): + try: + _interpreters.destroy(interp_id) + log.runtime( + f'Destroyed sub-interpreter\n' + f')>\n' + f' |_interp_id={interp_id}\n' + ) + except _interpreters.InterpreterError as e: + log.warning( + f'Could not destroy sub-interpreter ' + f'{interp_id}: {e}' + ) + + finally: + if not cancelled_during_spawn: + actor_nursery._children.pop(uid, None)