From 1d4867e51c7bf0a09434086ff4c771aba974e6a1 Mon Sep 17 00:00:00 2001 From: goodboy Date: Wed, 22 Apr 2026 18:00:06 -0400 Subject: [PATCH] Add trio-parent tests for `_subint_forkserver` MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit New pytest module `tests/spawn/test_subint_forkserver.py` drives the forkserver primitives from inside a real `trio.run()` in the parent — the runtime shape tractor will actually use when we wire up a `subint_forkserver` spawn backend proper. Complements the standalone no-trio-in-parent `ai/conc-anal/subint_fork_from_main_thread_smoketest.py`. Deats, - new test pkg `tests/spawn/` (+ empty `__init__.py`) - two tests, both `@pytest.mark.timeout(30, method='thread')` for the GIL-hostage safety reason doc'd in `ai/conc-anal/subint_sigint_starvation_issue.md`: - `test_fork_from_worker_thread_via_trio` — parent-side plumbing baseline. `trio.run()` off-loads forkserver prims via `trio.to_thread.run_sync()` + asserts the child reaps cleanly - `test_fork_and_run_trio_in_child` — end-to-end: forked child calls `run_subint_in_worker_thread()` with a bootstrap str that does `trio.run()` in a fresh subint - both tests wrap the inner `trio.run()` in a `dump_on_hang()` for post-mortem if the outer `pytest-timeout` fires - intentionally NOT using `--spawn-backend` — the tests drive the primitives directly rather than going through tractor's spawn-method registry (which the forkserver isn't plugged into yet) Also, rename `run_trio_in_subint()` → `run_subint_in_worker_thread()` for naming consistency with the sibling `fork_from_worker_thread()`. The action is really "host a subint on a worker thread", not specifically "run trio" — trio just happens to be the typical payload. Propagate the rename to the smoketest. Further, add a "TODO — cleanup gated on msgspec PEP 684 support" section to the `_subint_forkserver` module docstring: flags the dedicated-`threading.Thread` design as potentially-revisable once isolated-mode subints are viable in tractor. Cross-refs `msgspec#563` + `tractor#379` and points at an audit-plan conc-anal doc we'll add next. (this patch was generated in some part by [`claude-code`][claude-code-gh]) [claude-code-gh]: https://github.com/anthropics/claude-code --- .../subint_fork_from_main_thread_smoketest.py | 8 +- tests/spawn/__init__.py | 0 tests/spawn/test_subint_forkserver.py | 214 ++++++++++++++++++ tractor/spawn/_subint_forkserver.py | 41 +++- 4 files changed, 251 insertions(+), 12 deletions(-) create mode 100644 tests/spawn/__init__.py create mode 100644 tests/spawn/test_subint_forkserver.py diff --git a/ai/conc-anal/subint_fork_from_main_thread_smoketest.py b/ai/conc-anal/subint_fork_from_main_thread_smoketest.py index a03d5e26..84d11d49 100644 --- a/ai/conc-anal/subint_fork_from_main_thread_smoketest.py +++ b/ai/conc-anal/subint_fork_from_main_thread_smoketest.py @@ -91,7 +91,7 @@ except ImportError: # primitives have moved into tractor proper.) from tractor.spawn._subint_forkserver import ( fork_from_worker_thread, - run_trio_in_subint, + run_subint_in_worker_thread, wait_child, ) @@ -305,18 +305,18 @@ def _child_trio_in_subint() -> int: ''' CHILD-side `child_target`: drive a trivial `trio.run()` inside a fresh legacy-config subint on a worker thread, - using the `tractor.spawn._subint_forkserver.run_trio_in_subint` + using the `tractor.spawn._subint_forkserver.run_subint_in_worker_thread` primitive. Returns 0 on success. ''' try: - run_trio_in_subint( + run_subint_in_worker_thread( _CHILD_TRIO_BOOTSTRAP, thread_name='child-subint-trio-thread', ) except RuntimeError as err: print( - f' CHILD: run_trio_in_subint timed out / thread ' + f' CHILD: run_subint_in_worker_thread timed out / thread ' f'never returned: {err}', flush=True, ) diff --git a/tests/spawn/__init__.py b/tests/spawn/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/spawn/test_subint_forkserver.py b/tests/spawn/test_subint_forkserver.py new file mode 100644 index 00000000..87c497b6 --- /dev/null +++ b/tests/spawn/test_subint_forkserver.py @@ -0,0 +1,214 @@ +''' +Integration exercises for the `tractor.spawn._subint_forkserver` +primitives (`fork_from_worker_thread()` + `run_subint_in_worker_thread()`) +driven from inside a real `trio.run()` in the parent process — +the runtime shape tractor will need when we move toward wiring +up a `subint_forkserver` spawn backend proper. + +Background +---------- +`ai/conc-anal/subint_fork_blocked_by_cpython_post_fork_issue.md` +establishes that `os.fork()` from a non-main sub-interpreter +aborts the child at the CPython level. The sibling +`subint_fork_from_main_thread_smoketest.py` proves the escape +hatch: fork from a main-interp *worker thread* (one that has +never entered a subint) works, and the forked child can then +host its own `trio.run()` inside a fresh subint. + +Those smoke-test scenarios are standalone — no trio runtime +in the *parent*. These tests exercise the same primitives +from inside `trio.run()` in the parent, proving out the +piece actually needed for a working spawn backend. + +Gating +------ +- py3.14+ (via `concurrent.interpreters` presence) +- no backend restriction (these tests don't use + `--spawn-backend` — they drive the forkserver primitives + directly rather than going through tractor's spawn-method + registry). + +''' +from __future__ import annotations +from functools import partial +import os + +import pytest +import trio + +from tractor.devx import dump_on_hang + + +# Gate: subint forkserver primitives require py3.14+. Check +# the public stdlib wrapper's presence (added in 3.14) rather +# than `_interpreters` directly — see +# `tractor.spawn._subint` for why. +pytest.importorskip('concurrent.interpreters') + +from tractor.spawn._subint_forkserver import ( # noqa: E402 + fork_from_worker_thread, + run_subint_in_worker_thread, + wait_child, +) + + +# ---------------------------------------------------------------- +# child-side callables (passed via `child_target=` across fork) +# ---------------------------------------------------------------- + + +_CHILD_TRIO_BOOTSTRAP: str = ( + 'import trio\n' + 'async def _main():\n' + ' await trio.sleep(0.05)\n' + ' return 42\n' + 'result = trio.run(_main)\n' + 'assert result == 42, f"trio.run returned {result}"\n' +) + + +def _child_trio_in_subint() -> int: + ''' + `child_target` for the trio-in-child scenario: drive a + trivial `trio.run()` inside a fresh legacy-config subint + on a worker thread. + + Returns an exit code suitable for `os._exit()`: + - 0: subint-hosted `trio.run()` succeeded + - 3: driver thread hang (timeout inside `run_subint_in_worker_thread`) + - 4: subint bootstrap raised some other exception + + ''' + try: + run_subint_in_worker_thread( + _CHILD_TRIO_BOOTSTRAP, + thread_name='child-subint-trio-thread', + ) + except RuntimeError: + # timeout / thread-never-returned + return 3 + except BaseException: + return 4 + return 0 + + +# ---------------------------------------------------------------- +# parent-side harnesses (run inside `trio.run()`) +# ---------------------------------------------------------------- + + +async def run_fork_in_non_trio_thread( + deadline: float, + *, + child_target=None, +) -> int: + ''' + From inside a parent `trio.run()`, off-load the + forkserver primitive to a main-interp worker thread via + `trio.to_thread.run_sync()` and return the forked child's + pid. + + Then `wait_child()` on that pid (also off-loaded so we + don't block trio's event loop on `waitpid()`) and assert + the child exited cleanly. + + ''' + with trio.fail_after(deadline): + # NOTE: `fork_from_worker_thread` internally spawns its + # own dedicated `threading.Thread` (not from trio's + # cache) and joins it before returning — so we can + # safely off-load via `to_thread.run_sync` without + # worrying about the trio-thread-cache recycling the + # runner. Pass `abandon_on_cancel=False` for the + # same "bounded + clean" rationale we use in + # `_subint.subint_proc`. + pid: int = await trio.to_thread.run_sync( + partial( + fork_from_worker_thread, + child_target, + thread_name='test-subint-forkserver', + ), + abandon_on_cancel=False, + ) + assert pid > 0 + + ok, status_str = await trio.to_thread.run_sync( + partial( + wait_child, + pid, + expect_exit_ok=True, + ), + abandon_on_cancel=False, + ) + assert ok, ( + f'forked child did not exit cleanly: ' + f'{status_str}' + ) + return pid + + +# ---------------------------------------------------------------- +# tests +# ---------------------------------------------------------------- + + +# Bounded wall-clock via `pytest-timeout` (`method='thread'`) +# for the usual GIL-hostage safety reason documented in the +# sibling `test_subint_cancellation.py` / the class-A +# `subint_sigint_starvation_issue.md`. Each test also has an +# inner `trio.fail_after()` so assertion failures fire fast +# under normal conditions. +@pytest.mark.timeout(30, method='thread') +def test_fork_from_worker_thread_via_trio() -> None: + ''' + Baseline: inside `trio.run()`, call + `fork_from_worker_thread()` via `trio.to_thread.run_sync()`, + get a child pid back, reap the child cleanly. + + No trio-in-child. If this regresses we know the parent- + side trio↔worker-thread plumbing is broken independent + of any child-side subint machinery. + + ''' + deadline: float = 10.0 + with dump_on_hang( + seconds=deadline, + path='/tmp/subint_forkserver_baseline.dump', + ): + pid: int = trio.run( + partial(run_fork_in_non_trio_thread, deadline), + ) + # parent-side sanity — we got a real pid back. + assert isinstance(pid, int) and pid > 0 + # by now the child has been waited on; it shouldn't be + # reap-able again. + with pytest.raises((ChildProcessError, OSError)): + os.waitpid(pid, os.WNOHANG) + + +@pytest.mark.timeout(30, method='thread') +def test_fork_and_run_trio_in_child() -> None: + ''' + End-to-end: inside the parent's `trio.run()`, off-load + `fork_from_worker_thread()` to a worker thread, have the + forked child then create a fresh subint and run + `trio.run()` inside it on yet another worker thread. + + This is the full "forkserver + trio-in-subint-in-child" + pattern the proposed `subint_forkserver` spawn backend + would rest on. + + ''' + deadline: float = 15.0 + with dump_on_hang( + seconds=deadline, + path='/tmp/subint_forkserver_trio_in_child.dump', + ): + pid: int = trio.run( + partial( + run_fork_in_non_trio_thread, + deadline, + child_target=_child_trio_in_subint, + ), + ) + assert isinstance(pid, int) and pid > 0 diff --git a/tractor/spawn/_subint_forkserver.py b/tractor/spawn/_subint_forkserver.py index e0d5cab3..49d1a294 100644 --- a/tractor/spawn/_subint_forkserver.py +++ b/tractor/spawn/_subint_forkserver.py @@ -59,6 +59,24 @@ to drive these from a parent-side `trio.run()` and hook the returned child pid into tractor's normal actor-nursery/IPC machinery. +TODO — cleanup gated on msgspec PEP 684 support +----------------------------------------------- +Both primitives below allocate a dedicated +`threading.Thread` rather than using +`trio.to_thread.run_sync()`. That's a cautious design +rooted in three distinct-but-entangled issues (GIL +starvation from legacy-config subints, tstate-recycling +destroy race on trio cache threads, fork-from-main-tstate +invariant). Some of those dissolve under PEP 684 +isolated-mode subints; one requires empirical re-testing +to know. + +Full analysis + audit plan for when we can revisit is in +`ai/conc-anal/subint_forkserver_thread_constraints_on_pep684_issue.md`. +Intent: file a follow-up GH issue linked to #379 once +[jcrist/msgspec#563](https://github.com/jcrist/msgspec/issues/563) +unblocks isolated-mode subints in tractor. + See also -------- - `tractor.spawn._subint_fork` — the stub for the @@ -268,7 +286,7 @@ def fork_from_worker_thread( return pid -def run_trio_in_subint( +def run_subint_in_worker_thread( bootstrap: str, *, thread_name: str = 'subint-trio', @@ -276,14 +294,21 @@ def run_trio_in_subint( ) -> None: ''' - Helper for use inside a forked child: create a fresh - legacy-config sub-interpreter and drive the given - `bootstrap` code string through `_interpreters.exec()` - on a dedicated worker thread. + Create a fresh legacy-config sub-interpreter and drive + the given `bootstrap` code string through + `_interpreters.exec()` on a dedicated worker thread. - Typical `bootstrap` content imports `trio`, defines an - async entry, calls `trio.run()`. See - `tractor.spawn._subint.subint_proc` for the matching + Naming mirrors `fork_from_worker_thread()`: + "_in_worker_thread" — the action here is "run a + subint", not "run trio" per se. Typical `bootstrap` + content does import `trio` + call `trio.run()`, but + nothing about this primitive requires trio; it's a + generic "host a subint on a worker thread" helper. + Intended mainly for use inside a fork-child (see + `tractor.spawn._subint_forkserver` module docstring) but + works anywhere. + + See `tractor.spawn._subint.subint_proc` for the matching pattern tractor uses at the sub-actor level. Destroys the subint after the thread joins.